From f7e87b45f15083e5fcdea64821f06ed93ece4c4e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 7 Jun 2016 18:07:28 +0200 Subject: (repeated)crossvalidation plots --- lib/crossvalidation.rb | 35 +++++++++++++++++++++++++++++++++++ lib/nanoparticle.rb | 1 - lib/regression.rb | 2 +- lib/validation-statistics.rb | 6 +++--- 4 files changed, 39 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 15e25a5..7aae3d2 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -71,6 +71,8 @@ module OpenTox class RepeatedCrossValidation < Validation field :crossvalidation_ids, type: Array, default: [] + field :correlation_plot_id, type: BSON::ObjectId + def self.create model, folds=10, repeats=3 repeated_cross_validation = self.new repeats.times do |n| @@ -80,9 +82,42 @@ module OpenTox repeated_cross_validation.save repeated_cross_validation end + def crossvalidations crossvalidation_ids.collect{|id| CrossValidation.find(id)} end + + def correlation_plot format: "png" + #unless correlation_plot_id + feature = Feature.find(crossvalidations.first.model.prediction_feature) + title = feature.name + title += "[#{feature.unit}]" if feature.unit and !feature.unit.blank? + tmpfile = "/tmp/#{id.to_s}_correlation.#{format}" + images = [] + crossvalidations.each_with_index do |cv,i| + x = [] + y = [] + cv.predictions.each do |sid,p| + x << p["value"] + y << p["measurements"].median + end + R.assign "measurement", x + R.assign "prediction", y + R.eval "all = c(measurement,prediction)" + R.eval "range = c(min(all), max(all))" + R.eval "image#{i} = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)" + R.eval "image#{i} = image#{i} + geom_abline(intercept=0, slope=1)" + images << "image#{i}" + end + R.eval "pdf('#{tmpfile}')" + R.eval "grid.arrange(#{images.join ","},ncol=#{images.size})" + R.eval "dev.off()" + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}") + correlation_plot_id = $gridfs.insert_one(file) + update(:correlation_plot_id => correlation_plot_id) + #end + $gridfs.find_one(_id: correlation_plot_id).data + end end end diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index c1bf1b5..d6261ee 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -10,7 +10,6 @@ module OpenTox attr_accessor :scaled_values def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features: - p name dataset = Dataset.find(dataset_id) #relevant_features = {} measurements = [] diff --git a/lib/regression.rb b/lib/regression.rb index c4c83d2..51317ac 100644 --- a/lib/regression.rb +++ b/lib/regression.rb @@ -122,7 +122,7 @@ module OpenTox pc_ids.compact! prediction = r_model_prediction method, data_frame, pc_ids.collect{|i| "\"#{i}\""}, weights, query_descriptors if prediction.nil? - prediction = local_weighted_average substance, neighbors + prediction = local_weighted_average(substance: substance, neighbors: neighbors) prediction[:warning] = "Could not create local PLS model. Using weighted average of similar substances." end p prediction diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 6b252b1..9aa9cff 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -136,9 +136,9 @@ module OpenTox } end - def correlation_plot + def correlation_plot format: "png" unless correlation_plot_id - tmpfile = "/tmp/#{id.to_s}_correlation.pdf" + tmpfile = "/tmp/#{id.to_s}_correlation.#{format}" x = [] y = [] feature = Feature.find(predictions.first.last["prediction_feature_id"]) @@ -155,7 +155,7 @@ module OpenTox R.eval "image = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)" R.eval "image = image + geom_abline(intercept=0, slope=1)" R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.png") + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}") plot_id = $gridfs.insert_one(file) update(:correlation_plot_id => plot_id) end -- cgit v1.2.3