summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2017-05-26 12:52:28 +0000
committergebele <gebele@in-silico.ch>2017-05-26 12:52:28 +0000
commit61a7d994d8f4fbcf25414beea96189bf885ad19d (patch)
tree6f8c3aa61d8502bd5157ce7c63960a9af62f495e
parent189ed7d93974f7051e7c34d63415000531719fa4 (diff)
parent91ad959bdc1d0ebea295e7dc13f2ec80dec04d19 (diff)
Merge branch 'master' of https://github.com/opentox/lazar
-rw-r--r--lib/lazar.rb7
-rw-r--r--lib/validation-statistics.rb62
2 files changed, 23 insertions, 46 deletions
diff --git a/lib/lazar.rb b/lib/lazar.rb
index f251379..a756742 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -16,16 +16,19 @@ raise "Incorrect lazar environment variable LAZAR_ENV '#{ENV["LAZAR_ENV"]}', ple
ENV["MONGOID_ENV"] = ENV["LAZAR_ENV"]
ENV["RACK_ENV"] = ENV["LAZAR_ENV"] # should set sinatra environment
+# search for a central mongo database in use
+# http://opentox.github.io/installation/2017/03/07/use-central-mongodb-in-docker-environment
+CENTRAL_MONGO_IP = `grep -oP '^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}(?=.*mongodb)' /etc/hosts`.chomp
Mongoid.load_configuration({
:clients => {
:default => {
:database => ENV["LAZAR_ENV"],
- :hosts => ["localhost:27017"],
+ :hosts => (CENTRAL_MONGO_IP.blank? ? ["localhost:27017"] : ["#{CENTRAL_MONGO_IP}:27017"]),
}
}
})
Mongoid.raise_not_found_error = false # return nil if no document is found
-$mongo = Mongo::Client.new("mongodb://127.0.0.1:27017/#{ENV['LAZAR_ENV']}")
+$mongo = Mongo::Client.new("mongodb://#{(CENTRAL_MONGO_IP.blank? ? "127.0.0.1" : CENTRAL_MONGO_IP)}:27017/#{ENV['LAZAR_ENV']}")
$gridfs = $mongo.database.fs
# Logger setup
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 553e6ac..2d522ae 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -179,8 +179,12 @@ module OpenTox
R.assign "prediction", y
R.eval "all = c(measurement,prediction)"
R.eval "range = c(min(all), max(all))"
- title = feature.name
- title += "[#{feature.unit}]" if feature.unit and !feature.unit.blank?
+ if feature.name.match /Net cell association/ # ad hoc fix for awkward units
+ title = "log2(Net cell association [mL/ug(Mg)])"
+ else
+ title = feature.name
+ title += " [#{feature.unit}]" if feature.unit and !feature.unit.blank?
+ end
R.eval "image = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
R.eval "image = image + geom_abline(intercept=0, slope=1)"
R.eval "ggsave(file='#{tmpfile}', plot=image)"
@@ -191,51 +195,21 @@ module OpenTox
$gridfs.find_one(_id: correlation_plot_id).data
end
- # Get predictions with the largest difference between predicted and measured values
- # @params [Fixnum] number of predictions
- # @params [TrueClass,FalseClass,nil] include neighbors
- # @params [TrueClass,FalseClass,nil] show common descriptors
+ # Get predictions with measurements outside of the prediction interval
# @return [Hash]
- def worst_predictions n: 5, show_neigbors: true, show_common_descriptors: false
- worst_predictions = predictions.sort_by{|sid,p| -(p["value"] - p["measurements"].median).abs}[0,n]
- worst_predictions.collect do |p|
- substance = Substance.find(p.first)
- prediction = p[1]
- if show_neigbors
- neighbors = prediction["neighbors"].collect do |n|
- common_descriptors = []
- if show_common_descriptors
- common_descriptors = n["common_descriptors"].collect do |d|
- f=Feature.find(d)
- {
- :id => f.id.to_s,
- :name => "#{f.name} (#{f.conditions})",
- :p_value => d[:p_value],
- :r_squared => d[:r_squared],
- }
- end
- else
- common_descriptors = n["common_descriptors"].size
- end
- {
- :name => Substance.find(n["_id"]).name,
- :id => n["_id"].to_s,
- :common_descriptors => common_descriptors
- }
- end
- else
- neighbors = prediction["neighbors"].size
+ def worst_predictions
+ worst_predictions = predictions.select do |sid,p|
+ p["prediction_interval"] and p["value"] and (p["measurements"].max < p["prediction_interval"][0] or p["measurements"].min > p["prediction_interval"][1])
+ end.compact.to_h
+ worst_predictions.each do |sid,p|
+ p["error"] = (p["value"] - p["measurements"].median).abs
+ if p["measurements"].max < p["prediction_interval"][0]
+ p["distance_prediction_interval"] = (p["measurements"].max - p["prediction_interval"][0]).abs
+ elsif p["measurements"].min > p["prediction_interval"][1]
+ p["distance_prediction_interval"] = (p["measurements"].min - p["prediction_interval"][1]).abs
end
- {
- :id => substance.id.to_s,
- :name => substance.name,
- :feature => Feature.find(prediction["prediction_feature_id"]).name,
- :error => (prediction["value"] - prediction["measurements"].median).abs,
- :prediction => prediction["value"],
- :measurements => prediction["measurements"],
- :neighbors => neighbors
- }
end
+ worst_predictions.sort_by{|sid,p| p["distance_prediction_interval"] }.to_h
end
end
end