confidence for prediction datasets
authorhelma@in-silico.ch <helma@in-silico.ch>
Fri, 16 Nov 2018 21:45:17 +0000 (22:45 +0100)
committerhelma@in-silico.ch <helma@in-silico.ch>
Fri, 16 Nov 2018 21:45:17 +0000 (22:45 +0100)
lib/dataset.rb
lib/feature.rb
lib/model.rb
test/classification-model.rb
test/regression-model.rb

index 3979105..df17569 100644 (file)
@@ -70,6 +70,12 @@ module OpenTox
       features.select{|f| f.is_a?(Warnings)}
     end
 
+    # Get Confidence feature
+    # @return [OpenTox::Confidence] confidence feature
+    def confidence_feature
+      features.select{|f| f.is_a?(Confidence)}.first
+    end
+
     # Get nominal and numeric bioactivity features
     # @return [Array<OpenTox::NominalBioActivity,OpenTox::NumericBioActivity>]
     def bioactivity_features
@@ -392,8 +398,9 @@ module OpenTox
         predictions[s] ||= {}
         prediction_feature = prediction_features.first
         predictions[s][:value] = values(s,prediction_feature).first
-        predictions[s][:warnings] = []
-        warnings_features.each { |w| predictions[s][:warnings] += values(s,w) }
+        #predictions[s][:warnings] = []
+        #warnings_features.each { |w| predictions[s][:warnings] += values(s,w) }
+        predictions[s][:confidence] = values(s,confidence_feature).first
         if predictions[s][:value] and prediction_feature.is_a? NominalLazarPrediction
           prediction_feature.accept_values.each do |v|
             f = LazarPredictionProbability.find_by(:name => v, :model_id => prediction_feature.model_id, :training_feature_id => prediction_feature.training_feature_id)
index b474398..72c26d7 100644 (file)
@@ -15,6 +15,11 @@ module OpenTox
     field :dataset_id, type: BSON::ObjectId
   end
 
+  # Confidence
+  class Confidence < Feature
+    field :dataset_id, type: BSON::ObjectId
+  end
+
   # Categorical variables
   class NominalFeature < Feature
     field :accept_values, type: Array
index 08ca07e..cbfefe3 100644 (file)
@@ -338,7 +338,8 @@ module OpenTox
           return predictions
         elsif object.is_a? Dataset
           d = object.copy
-          warning_feature = Warnings.find_or_create_by(:dataset_id => d.id)
+          #warning_feature = Warnings.find_or_create_by(:dataset_id => d.id)
+          confidence_feature = Confidence.find_or_create_by(:dataset_id => d.id)
           if prediction_feature.is_a? NominalBioActivity
             f = NominalLazarPrediction.find_or_create_by(:name => prediction_feature.name, :accept_values => prediction_feature.accept_values, :model_id => self.id, :training_feature_id => prediction_feature.id)
             probability_features = {}
@@ -356,7 +357,7 @@ module OpenTox
           # add predictions to dataset
           predictions.each do |substance_id,p|
             substance_id = BSON::ObjectId.from_string(substance_id)
-            d.add substance_id,warning_feature,p[:warnings].join(" ") unless p[:warnings].empty?
+            d.add substance_id,confidence_feature,p[:confidence]
             unless p[:value].nil?
               d.add substance_id,f,p[:value]
               p[:probabilities].each {|name,p| d.add substance_id,probability_features[name],p} if p[:probabilities]
index 2032bf8..79ccb98 100644 (file)
@@ -90,6 +90,7 @@ class ClassificationModelTest < MiniTest::Test
     assert_nil result.predictions[result.compounds.first][:value]
     assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value]
     assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["non-carcinogenic"].round(2)
+    assert_match /High/i, result.predictions[result.compounds[1]][:confidence]
   end
 
   def test_carcinogenicity_rf_classification
index b925439..7f667dc 100644 (file)
@@ -179,6 +179,7 @@ class LazarRegressionTest < MiniTest::Test
     assert_equal [1.37], result.values(result.compounds[6], result.bioactivity_features[0]).collect{|v| v.round(2)}
     assert_equal [1.79], result.values(result.compounds[6], result.prediction_features[0]).collect{|v| v.round(2)}
     assert_equal [1.84,1.73], result.values(result.compounds[7], result.bioactivity_features[0]).collect{|v| v.round(2)}
+    assert_match /Low/i, result.predictions[result.compounds[6]][:confidence]
   end
 
 end