summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-29 20:34:39 +0100
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-29 20:34:39 +0100
commitd9c9d78e49d886ea91386adbbd2b523347df226e (patch)
tree6f728c82ca01b6a19d8e4aa1905e26bea9253f33
parent5e9a08c0b534fa96179fb5c81a9b4193e7b0aad8 (diff)
dataset predictions fixed
-rw-r--r--lib/dataset.rb23
-rw-r--r--lib/feature.rb12
-rw-r--r--lib/model.rb3
-rw-r--r--lib/validation-statistics.rb3
-rw-r--r--test/classification-model.rb46
-rw-r--r--test/compound.rb2
-rw-r--r--test/dataset.rb2
7 files changed, 52 insertions, 39 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 41d7b5c..78f5633 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -86,6 +86,10 @@ module OpenTox
features.select{|f| f._type.match("SubstanceProperty")}
end
+ def prediction_features
+ features.select{|f| f._type.match("Prediction")}
+ end
+
# Writers
# Add a value for a given substance and feature
@@ -352,6 +356,25 @@ module OpenTox
sdf
end
+ def predictions
+ predictions = {}
+ substances.each do |s|
+ predictions[s] ||= {}
+ prediction_feature = prediction_features.first
+ predictions[s][:value] = values(s,prediction_feature).first
+ predictions[s][:warnings] = []
+ warnings_features.each { |w| predictions[s][:warnings] += values(s,w) }
+ if predictions[s][:value] and prediction_feature.is_a? NominalLazarPrediction
+ prediction_feature.accept_values.each do |v|
+ f = LazarPredictionProbability.find_by(:name => v, :model_id => prediction_feature.model_id, :training_feature_id => prediction_feature.training_feature_id)
+ predictions[s][:probabilities] ||= {}
+ predictions[s][:probabilities][v] = values(s,f).first
+ end
+ end
+ end
+ predictions
+ end
+
# Dataset operations
# Merge an array of datasets
diff --git a/lib/feature.rb b/lib/feature.rb
index be07e7a..c18b0b8 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -54,18 +54,30 @@ module OpenTox
class NominalLazarPrediction < NominalFeature
field :model_id, type: BSON::ObjectId
field :training_feature_id, type: BSON::ObjectId
+ def name
+ "#{self[:name]} Prediction"
+ end
end
class LazarPredictionProbability < NominalLazarPrediction
+ def name
+ "probability(#{self[:name]})"
+ end
end
# Numeric lazar prediction
class NumericLazarPrediction < NumericFeature
field :model_id, type: BSON::ObjectId
field :training_feature_id, type: BSON::ObjectId
+ def name
+ "#{name} Prediction"
+ end
end
class LazarPredictionInterval < NumericLazarPrediction
+ def name
+ "prediction_interval_#{self[:name]}"
+ end
end
class NominalSubstanceProperty < NominalFeature
diff --git a/lib/model.rb b/lib/model.rb
index fc98e09..7eaa469 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -346,7 +346,8 @@ module OpenTox
# add predictions to dataset
predictions.each do |substance_id,p|
- d.add substance_id,warning_feature,p[:warnings].join(" ") if p[:warnings]
+ substance_id = BSON::ObjectId.from_string(substance_id)
+ d.add substance_id,warning_feature,p[:warnings].join(" ") unless p[:warnings].empty?
unless p[:value].nil?
d.add substance_id,f,p[:value]
p[:probabilities].each {|name,p| d.add substance_id,probability_features[name],p} if p[:probabilities]
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index e440731..7bae891 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -150,8 +150,7 @@ module OpenTox
y = {:all => [],:without_warnings => []}
self.nr_predictions = {:all =>0,:without_warnings => 0}
predictions.each do |cid,pred|
- p pred
- if pred[:value] and pred[:measurements]
+ !if pred[:value] and pred[:measurements] and !pred[:measurements].empty?
self.nr_predictions[:all] +=1
x[:all] << pred[:measurements].median
y[:all] << pred[:value]
diff --git a/test/classification-model.rb b/test/classification-model.rb
index bfb64db..85668fb 100644
--- a/test/classification-model.rb
+++ b/test/classification-model.rb
@@ -1,6 +1,6 @@
require_relative "setup.rb"
-class LazarClassificationTest < MiniTest::Test
+class ClassificationModelTest < MiniTest::Test
def test_classification_default
algorithms = {
@@ -31,31 +31,6 @@ class LazarClassificationTest < MiniTest::Test
prediction = model.predict example[:compound]
assert_equal example[:prediction], prediction[:value]
end
-
- # make a dataset prediction
- compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call.csv")
- prediction_dataset = model.predict compound_dataset
- puts prediction_dataset.to_csv
- assert_equal compound_dataset.compounds.size, prediction_dataset.compounds.size
- c = Compound.from_smiles "CC(CN(CC(O)C)N=O)O"
- prediction_feature = prediction_dataset.features.select{|f| f.class == NominalLazarPrediction}[0]
- assert_equal ["true"], prediction_dataset.values(c, prediction_feature)
- p_true = LazarPredictionProbability.find_by(:name => "true")
- p_false = LazarPredictionProbability.find_by(:name => "false")
- p p_true
- assert_equal [0.7], prediction_dataset.values(c,p_true)
- assert_equal [0.0], prediction_dataset.values(c,p_false)
- assert_equal 0.0, p_false
-
-# cid = prediction_dataset.compounds[7].id.to_s
-# assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warnings][0]
-# expectations = ["Cannot create prediction: Only one similar compound in the training set.",
-# "Could not find similar substances with experimental data in the training dataset."]
-# prediction_dataset.predictions.each do |cid,pred|
-# assert_includes expectations, pred[:warnings][0] if pred[:value].nil?
-# end
-# cid = Compound.from_smiles("CCOC(=O)N").id.to_s
-# assert_match "excluded", prediction_dataset.predictions[cid][:info]
end
def test_classification_parameters
@@ -81,16 +56,19 @@ class LazarClassificationTest < MiniTest::Test
end
def test_dataset_prediction
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call.csv")
+ test_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
model = Model::Lazar.create training_dataset: training_dataset
- result = model.predict training_dataset
- puts result.to_csv
+ result = model.predict test_dataset
assert_kind_of Dataset, result
- assert 3, result.features.size
- assert 8, result.compounds.size
- assert_equal ["true"], result.values(result.compounds.first, result.features[0])
- assert_equal [0.65], result.values(result.compounds.first, result.features[1])
- assert_equal [0], result.values(result.compounds.first, result.features[2]) # classification returns nil, check if
+ assert_equal 7, result.features.size
+ assert_equal 85, result.compounds.size
+ prediction_feature = result.prediction_features.first
+ assert_equal ["yes"], result.values(result.compounds[1], prediction_feature)
+ assert_equal ["no"], result.values(result.compounds[5], prediction_feature)
+ assert_nil result.predictions[result.compounds.first][:value]
+ assert_equal "yes", result.predictions[result.compounds[1]][:value]
+ assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["no"].round(2)
end
def test_carcinogenicity_rf_classification
diff --git a/test/compound.rb b/test/compound.rb
index 69ad21e..44e47f1 100644
--- a/test/compound.rb
+++ b/test/compound.rb
@@ -74,7 +74,7 @@ class CompoundTest < MiniTest::Test
end
def test_openbabel_segfault
- inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/b13-5-/t11-,12-,14-,18-,19?/m1/s1"
+ inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/t11-,12-,14-,18-,19?/m1/s1"
c = Compound.from_inchi(inchi)
assert_equal inchi, c.inchi
diff --git a/test/dataset.rb b/test/dataset.rb
index c197648..fd6ed52 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -110,7 +110,7 @@ class DatasetTest < MiniTest::Test
assert_match smi, d.warnings.join
end
duplicates.each do |inchi|
- refute_empty d.values(Compound.from_inchi(inchi),d.warnings_feature)
+ refute_empty d.values(Compound.from_inchi(inchi),d.warnings_features.first)
end
d.delete
end