summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-06-07 18:07:28 +0200
committerChristoph Helma <helma@in-silico.ch>2016-06-07 18:07:28 +0200
commitf7e87b45f15083e5fcdea64821f06ed93ece4c4e (patch)
tree64d07c008eafa06bc8e91998e64edf92086ab852
parent290c7f86950c4051d018b8019ff4e72ec406c58c (diff)
(repeated)crossvalidation plots
-rw-r--r--lib/crossvalidation.rb35
-rw-r--r--lib/nanoparticle.rb1
-rw-r--r--lib/regression.rb2
-rw-r--r--lib/validation-statistics.rb6
-rw-r--r--test/nanoparticles.rb1
5 files changed, 40 insertions, 5 deletions
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 15e25a5..7aae3d2 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -71,6 +71,8 @@ module OpenTox
class RepeatedCrossValidation < Validation
field :crossvalidation_ids, type: Array, default: []
+ field :correlation_plot_id, type: BSON::ObjectId
+
def self.create model, folds=10, repeats=3
repeated_cross_validation = self.new
repeats.times do |n|
@@ -80,9 +82,42 @@ module OpenTox
repeated_cross_validation.save
repeated_cross_validation
end
+
def crossvalidations
crossvalidation_ids.collect{|id| CrossValidation.find(id)}
end
+
+ def correlation_plot format: "png"
+ #unless correlation_plot_id
+ feature = Feature.find(crossvalidations.first.model.prediction_feature)
+ title = feature.name
+ title += "[#{feature.unit}]" if feature.unit and !feature.unit.blank?
+ tmpfile = "/tmp/#{id.to_s}_correlation.#{format}"
+ images = []
+ crossvalidations.each_with_index do |cv,i|
+ x = []
+ y = []
+ cv.predictions.each do |sid,p|
+ x << p["value"]
+ y << p["measurements"].median
+ end
+ R.assign "measurement", x
+ R.assign "prediction", y
+ R.eval "all = c(measurement,prediction)"
+ R.eval "range = c(min(all), max(all))"
+ R.eval "image#{i} = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
+ R.eval "image#{i} = image#{i} + geom_abline(intercept=0, slope=1)"
+ images << "image#{i}"
+ end
+ R.eval "pdf('#{tmpfile}')"
+ R.eval "grid.arrange(#{images.join ","},ncol=#{images.size})"
+ R.eval "dev.off()"
+ file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}")
+ correlation_plot_id = $gridfs.insert_one(file)
+ update(:correlation_plot_id => correlation_plot_id)
+ #end
+ $gridfs.find_one(_id: correlation_plot_id).data
+ end
end
end
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index c1bf1b5..d6261ee 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -10,7 +10,6 @@ module OpenTox
attr_accessor :scaled_values
def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
- p name
dataset = Dataset.find(dataset_id)
#relevant_features = {}
measurements = []
diff --git a/lib/regression.rb b/lib/regression.rb
index c4c83d2..51317ac 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -122,7 +122,7 @@ module OpenTox
pc_ids.compact!
prediction = r_model_prediction method, data_frame, pc_ids.collect{|i| "\"#{i}\""}, weights, query_descriptors
if prediction.nil?
- prediction = local_weighted_average substance, neighbors
+ prediction = local_weighted_average(substance: substance, neighbors: neighbors)
prediction[:warning] = "Could not create local PLS model. Using weighted average of similar substances."
end
p prediction
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 6b252b1..9aa9cff 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -136,9 +136,9 @@ module OpenTox
}
end
- def correlation_plot
+ def correlation_plot format: "png"
unless correlation_plot_id
- tmpfile = "/tmp/#{id.to_s}_correlation.pdf"
+ tmpfile = "/tmp/#{id.to_s}_correlation.#{format}"
x = []
y = []
feature = Feature.find(predictions.first.last["prediction_feature_id"])
@@ -155,7 +155,7 @@ module OpenTox
R.eval "image = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
R.eval "image = image + geom_abline(intercept=0, slope=1)"
R.eval "ggsave(file='#{tmpfile}', plot=image)"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.png")
+ file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}")
plot_id = $gridfs.insert_one(file)
update(:correlation_plot_id => plot_id)
end
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index 3e0316f..0446086 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -6,6 +6,7 @@ class NanoparticleTest < MiniTest::Test
def setup
# TODO: multiple runs create duplicates
+ #$mongo.database.drop
#Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
end