summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-29 17:58:09 +0100
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-29 17:58:09 +0100
commit5e9a08c0b534fa96179fb5c81a9b4193e7b0aad8 (patch)
treea9d97dc520f51b5bca61b5f0122a1f86817e1ea8 /lib
parent15f4ad23eb918a91d52779887ccfb51bc6547f1b (diff)
dataset folds fixed
Diffstat (limited to 'lib')
-rw-r--r--lib/crossvalidation.rb16
-rw-r--r--lib/dataset.rb30
-rw-r--r--lib/feature.rb2
-rw-r--r--lib/model.rb15
4 files changed, 20 insertions, 43 deletions
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 2e44ff2..4f61ff4 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -29,16 +29,14 @@ module OpenTox
training_dataset = model.training_dataset
training_dataset.folds(n).each_with_index do |fold,fold_nr|
#fork do # parallel execution of validations can lead to Rserve and memory problems
- $logger.debug "Dataset #{training_dataset.name}: Fold #{fold_nr} started"
- t = Time.now
- validation = TrainTest.create(model, fold[0], fold[1])
- cv.validation_ids << validation.id
- cv.nr_instances += validation.nr_instances
- cv.nr_unpredicted += validation.nr_unpredicted
- $logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds"
- #end
+ $logger.debug "Dataset #{training_dataset.name}: Fold #{fold_nr} started"
+ t = Time.now
+ validation = TrainTest.create(model, fold[0], fold[1])
+ cv.validation_ids << validation.id
+ cv.nr_instances += validation.nr_instances
+ cv.nr_unpredicted += validation.nr_unpredicted
+ $logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds"
end
- #Process.waitall
cv.save
$logger.debug "Nr unpredicted: #{cv.nr_unpredicted}"
cv.statistics
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 9611fff..41d7b5c 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -401,7 +401,7 @@ module OpenTox
substance.dataset_ids << dataset.id
substance.dataset_ids.uniq!
substance.save
- dataset.data_entries << data_entries.select{|row| row[0] == substance.id}
+ dataset.data_entries += data_entries.select{|row| row[0] == substance.id}
end
dataset.save
dataset
@@ -433,6 +433,8 @@ module OpenTox
if map
values(c,feature).each { |v| dataset.add c, new_feature, map[v] }
else
+ end
+ end
end
def transform # TODO
@@ -446,30 +448,4 @@ module OpenTox
end
- # Dataset for lazar predictions
- class LazarPrediction < Dataset
- field :creator, type: String
- #field :prediction_feature_id, type: BSON::ObjectId
- field :predictions, type: Hash, default: {}
-
- # Get prediction feature
- # @return [OpenTox::Feature]
- def prediction_feature
- Feature.find prediction_feature_id
- end
-
- def prediction compound
- end
-
- def probability klass
- end
-
- def prediction_interval
- end
-
- def predictions
- end
-
- end
-
end
diff --git a/lib/feature.rb b/lib/feature.rb
index 50dea77..be07e7a 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -65,7 +65,7 @@ module OpenTox
field :training_feature_id, type: BSON::ObjectId
end
- class LazarConfidenceInterval < NumericLazarPrediction
+ class LazarPredictionInterval < NumericLazarPrediction
end
class NominalSubstanceProperty < NominalFeature
diff --git a/lib/model.rb b/lib/model.rb
index 9858949..fc98e09 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -277,7 +277,7 @@ module OpenTox
prediction.merge! result
prediction[:neighbors] = neighbor_ids.collect_with_index{|id,i| {:id => id, :measurement => neighbor_dependent_variables[i], :similarity => neighbor_similarities[i]}}
#if neighbor_similarities.max < algorithms[:similarity][:warn_min]
- #prediction[:warnings] << "Closest neighbor has similarity < #{algorithms[:similarity][:warn_min]}. Prediction may be out of applicability domain."
+ #prediction[:warnings] << "Closest neighbor has similarity #{neighbor_similarities.max} < #{algorithms[:similarity][:warn_min]}. Prediction may be out of applicability domain."
#end
end
if prediction[:warnings].empty? or threshold < algorithms[:similarity][:min] or threshold <= 0.2
@@ -328,7 +328,8 @@ module OpenTox
elsif object.is_a? Array
return predictions
elsif object.is_a? Dataset
- warning_feature = InfoFeature.find_or_create_by(:name => "Warnings")
+ d = object.copy
+ warning_feature = Warnings.find_or_create_by(:dataset_id => d.id)
if prediction_feature.is_a? NominalBioActivity
f = NominalLazarPrediction.find_or_create_by(:name => prediction_feature.name, :accept_values => prediction_feature.accept_values, :model_id => self.id, :training_feature_id => prediction_feature.id)
probability_features = {}
@@ -337,17 +338,19 @@ module OpenTox
end
elsif prediction_feature.is_a? NumericBioActivity
f = NumericLazarPrediction.find_or_create_by(:name => prediction_feature.name, :unit => prediction_feature.unit, :model_id => self.id, :training_feature_id => prediction_feature.id)
- # TODO prediction interval
+ prediction_interval = {}
+ ["lower","upper"].each do |v|
+ prediction_interval[v] = LazarPredictionInterval.find_or_create_by(:name => v, :model_id => self.id, :training_feature_id => prediction_feature.id)
+ end
end
- d = Dataset.new(:name => object.name)
# add predictions to dataset
predictions.each do |substance_id,p|
d.add substance_id,warning_feature,p[:warnings].join(" ") if p[:warnings]
unless p[:value].nil?
d.add substance_id,f,p[:value]
- p[:probabilities].each {|name,p| d.add substance_id,probability_features[name],p}
- # TODO prediction interval
+ p[:probabilities].each {|name,p| d.add substance_id,probability_features[name],p} if p[:probabilities]
+ p[:prediction_interval].each_with_index {|v,i| d.add substance_id, prediction_interval[i], v } if p[:prediction_interval]
end
end
d.save