summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2017-02-20 09:41:05 +0100
committerChristoph Helma <helma@in-silico.ch>2017-02-20 09:41:05 +0100
commitc26dc4713bba5de9e38e54f870f01071c2c4c960 (patch)
treee835baf26545c43045ed6e16157f24ed09e8968c
parent75408162397b9db75b042fc128e9a01a2832828c (diff)
worst predictions fixed for regressionnano-lazar-paper.revision
-rw-r--r--lib/validation-statistics.rb54
1 files changed, 12 insertions, 42 deletions
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 236a66c..2d522ae 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -195,51 +195,21 @@ module OpenTox
$gridfs.find_one(_id: correlation_plot_id).data
end
- # Get predictions with the largest difference between predicted and measured values
- # @params [Fixnum] number of predictions
- # @params [TrueClass,FalseClass,nil] include neighbors
- # @params [TrueClass,FalseClass,nil] show common descriptors
+ # Get predictions with measurements outside of the prediction interval
# @return [Hash]
- def worst_predictions n: 5, show_neigbors: true, show_common_descriptors: false
- worst_predictions = predictions.sort_by{|sid,p| -(p["value"] - p["measurements"].median).abs}[0,n]
- worst_predictions.collect do |p|
- substance = Substance.find(p.first)
- prediction = p[1]
- if show_neigbors
- neighbors = prediction["neighbors"].collect do |n|
- common_descriptors = []
- if show_common_descriptors
- common_descriptors = n["common_descriptors"].collect do |d|
- f=Feature.find(d)
- {
- :id => f.id.to_s,
- :name => "#{f.name} (#{f.conditions})",
- :p_value => d[:p_value],
- :r_squared => d[:r_squared],
- }
- end
- else
- common_descriptors = n["common_descriptors"].size
- end
- {
- :name => Substance.find(n["_id"]).name,
- :id => n["_id"].to_s,
- :common_descriptors => common_descriptors
- }
- end
- else
- neighbors = prediction["neighbors"].size
+ def worst_predictions
+ worst_predictions = predictions.select do |sid,p|
+ p["prediction_interval"] and p["value"] and (p["measurements"].max < p["prediction_interval"][0] or p["measurements"].min > p["prediction_interval"][1])
+ end.compact.to_h
+ worst_predictions.each do |sid,p|
+ p["error"] = (p["value"] - p["measurements"].median).abs
+ if p["measurements"].max < p["prediction_interval"][0]
+ p["distance_prediction_interval"] = (p["measurements"].max - p["prediction_interval"][0]).abs
+ elsif p["measurements"].min > p["prediction_interval"][1]
+ p["distance_prediction_interval"] = (p["measurements"].min - p["prediction_interval"][1]).abs
end
- {
- :id => substance.id.to_s,
- :name => substance.name,
- :feature => Feature.find(prediction["prediction_feature_id"]).name,
- :error => (prediction["value"] - prediction["measurements"].median).abs,
- :prediction => prediction["value"],
- :measurements => prediction["measurements"],
- :neighbors => neighbors
- }
end
+ worst_predictions.sort_by{|sid,p| p["distance_prediction_interval"] }.to_h
end
end
end