summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-24 15:41:24 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-24 15:41:24 +0200
commitcc08e6beda7f7d70ebf6c6929a22d1a0cd7c1a20 (patch)
treecc1c37d5623a72787e0d74b000692ff380bd45f5
parentb2d80ad2e470fcb41af4b747142e5693f2fa4615 (diff)
tests fixed. DescriptorTest#test_compound_all may fail within all.rb
-rw-r--r--lib/dataset.rb5
-rw-r--r--lib/model.rb9
-rw-r--r--lib/validation-statistics.rb9
-rw-r--r--test/regression.rb2
-rw-r--r--test/validation.rb14
5 files changed, 25 insertions, 14 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 38a55a8..9138452 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -229,6 +229,11 @@ module OpenTox
save
end
+ def delete
+ compounds.each{|c| c.dataset_ids.delete id.to_s}
+ super
+ end
+
end
# Dataset for lazar predictions
diff --git a/lib/model.rb b/lib/model.rb
index 8baed41..3a178a1 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -69,6 +69,7 @@ module OpenTox
end
def predict_substance substance
+ neighbor_algorithm_parameters = Hash[self.neighbor_algorithm_parameters.map{ |k, v| [k.to_sym, v] }] # convert string keys to symbols
neighbors = substance.send(neighbor_algorithm, neighbor_algorithm_parameters)
database_activities = nil
prediction = {}
@@ -82,22 +83,22 @@ module OpenTox
neighbors.delete_if{|n| n["_id"] == substance.id} # remove query substance for an unbiased prediction (also useful for loo validation)
end
if neighbors.empty?
- prediction.merge!({:value => nil,:confidence => nil,:warning => "Could not find similar substances with experimental data in the training dataset.",:neighbors => []})
+ prediction.merge!({:value => nil,:probabilities => nil,:warning => "Could not find similar substances with experimental data in the training dataset.",:neighbors => []})
elsif neighbors.size == 1
value = nil
tox = neighbors.first["toxicities"]
if tox.size == 1 # single measurement
- value = tox
+ value = tox.first
else # multiple measurement
if tox.collect{|t| t.numeric?}.uniq == [true] # numeric
value = tox.median
elsif tox.uniq.size == 1 # single value
value = tox.first
else # contradictory results
- # TODO add majority vote
+ # TODO add majority vote??
end
end
- prediction.merge!({:value => value, :confidence => nil, :warning => "Only one similar compound in the training set. Predicting median of its experimental values."}) if value
+ prediction.merge!({:value => value, :probabilities => nil, :warning => "Only one similar compound in the training set. Predicting median of its experimental values.", :neighbors => neighbors}) if value
else
# call prediction algorithm
klass,method = prediction_algorithm.split('.')
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 3c52b15..156353a 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -8,10 +8,11 @@ module OpenTox
predictivity = {}
nr_instances = 0
predictions.each do |cid,pred|
- # TODO use measured majority class
- if pred[:measured].uniq.size == 1
+ # TODO
+ # use predictions without probabilities (single neighbor)??
+ # use measured majority class??
+ if pred[:measured].uniq.size == 1 and pred[:probabilities]
m = pred[:measured].first
- #pred[:measured].each do |m|
if pred[:value] == m
if pred[:value] == accept_values[0]
confusion_matrix[0][0] += 1
@@ -63,12 +64,12 @@ module OpenTox
end
def self.regression predictions
+ # TODO: predictions within prediction_interval
rmse = 0
mae = 0
x = []
y = []
predictions.each do |cid,pred|
- p pred
if pred[:value] and pred[:measured]
x << pred[:measured].median
y << pred[:value]
diff --git a/test/regression.rb b/test/regression.rb
index 799650f..c0782c4 100644
--- a/test/regression.rb
+++ b/test/regression.rb
@@ -17,7 +17,7 @@ class LazarRegressionTest < MiniTest::Test
model.neighbor_algorithm_parameters[:type] = "MP2D"
compound = Compound.from_smiles "CCCSCCSCC"
prediction = model.predict compound
- assert_equal 1.26, prediction[:value].round(2)
+ assert_equal 1.37, prediction[:value].round(2)
assert_equal 3, prediction[:neighbors].size
end
diff --git a/test/validation.rb b/test/validation.rb
index ed19fee..39314da 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -34,13 +34,16 @@ class ValidationTest < MiniTest::Test
model.save
cv = ClassificationCrossValidation.create model
params = model.neighbor_algorithm_parameters
- params.delete :training_dataset_id
params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string
cv.validations.each do |validation|
validation_params = validation.model.neighbor_algorithm_parameters
- validation_params.delete "training_dataset_id"
- assert_equal params, validation_params
+ refute_nil params["dataset_id"]
+ refute_nil validation_params[:dataset_id]
+ refute_equal params["dataset_id"], validation_params[:dataset_id]
+ ["min_sim","type","prediction_feature_id"].each do |k|
+ assert_equal params[k], validation_params[k]
+ end
end
end
@@ -55,13 +58,14 @@ class ValidationTest < MiniTest::Test
}
}
model = Model::LazarRegression.create dataset.features.first, dataset, params
- p model
cv = RegressionCrossValidation.create model
cv.validation_ids.each do |vid|
model = Model::Lazar.find(Validation.find(vid).model_id)
assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type]
assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim]
- refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id]
+ refute_nil model[:neighbor_algorithm_parameters][:dataset_id]
+ refute_equal dataset.id, model[:neighbor_algorithm_parameters][:dataset_id]
+ assert_equal model.training_dataset_id, model[:neighbor_algorithm_parameters][:dataset_id]
end
refute_nil cv.rmse