summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2018-03-15 16:05:55 +0100
committerChristoph Helma <helma@in-silico.ch>2018-03-15 16:05:55 +0100
commit719b7fd84a1593fe209c393d5858fce64c7314aa (patch)
treeef9d3c03f9cf60b5b1c5b1ad819ac85435bb3fa2
parent4c791043366275f6748a706dd5f73260d0560d41 (diff)
-rw-r--r--lib/caret.rb11
-rw-r--r--lib/dataset.rb2
-rw-r--r--lib/model.rb35
-rw-r--r--lib/regression.rb2
4 files changed, 29 insertions, 21 deletions
diff --git a/lib/caret.rb b/lib/caret.rb
index f5c2bde..8bccf74 100644
--- a/lib/caret.rb
+++ b/lib/caret.rb
@@ -22,12 +22,11 @@ module OpenTox
end
if independent_variables.flatten.uniq == ["NA"] or independent_variables.flatten.uniq == []
prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
- prediction[:warning] = "No variables for regression model. Using weighted average of similar substances."
+ prediction[:warnings] << "No variables for regression model. Using weighted average of similar substances."
elsif
dependent_variables.size < 3
prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
- prediction[:warning] = "Insufficient number of neighbors (#{dependent_variables.size}) for regression model. Using weighted average of similar substances."
-
+ prediction[:warnings] << "Insufficient number of neighbors (#{dependent_variables.size}) for regression model. Using weighted average of similar substances."
else
dependent_variables.each_with_index do |v,i|
dependent_variables[i] = to_r(v)
@@ -52,7 +51,7 @@ module OpenTox
$logger.debug dependent_variables
$logger.debug independent_variables
prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
- prediction[:warning] = "R caret model creation error. Using weighted average of similar substances."
+ prediction[:warnings] << "R caret model creation error. Using weighted average of similar substances."
return prediction
end
begin
@@ -73,12 +72,12 @@ module OpenTox
$logger.debug "R caret prediction error for:"
$logger.debug self.inspect
prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
- prediction[:warning] = "R caret prediction error. Using weighted average of similar substances"
+ prediction[:warnings] << "R caret prediction error. Using weighted average of similar substances"
return prediction
end
if prediction.nil? or prediction[:value].nil?
prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
- prediction[:warning] = "Could not create local caret model. Using weighted average of similar substances."
+ prediction[:warnings] << "Empty R caret prediction. Using weighted average of similar substances."
end
end
prediction
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 44690e1..6e7d67f 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -46,7 +46,7 @@ module OpenTox
if data_entries[substance.to_s] and data_entries[substance.to_s][feature.to_s]
data_entries[substance.to_s][feature.to_s]
else
- nil
+ [nil]
end
end
diff --git a/lib/model.rb b/lib/model.rb
index b18610d..56d8665 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -57,7 +57,7 @@ module OpenTox
model.version = {:warning => "git is not installed"}
end
- # set defaults
+ # set defaults#
substance_classes = training_dataset.substances.collect{|s| s.class.to_s}.uniq
bad_request_error "Cannot create models for mixed substance classes '#{substance_classes.join ', '}'." unless substance_classes.size == 1
@@ -70,7 +70,7 @@ module OpenTox
},
:similarity => {
:method => "Algorithm::Similarity.tanimoto",
- :min => 0.1
+ :min => 0.5,
},
:feature_selection => nil
}
@@ -81,7 +81,7 @@ module OpenTox
}
elsif model.class == LazarRegression
model.algorithms[:prediction] = {
- :method => "Algorithm::Caret.pls",
+ :method => "Algorithm::Caret.rf",
}
end
@@ -93,7 +93,7 @@ module OpenTox
},
:similarity => {
:method => "Algorithm::Similarity.weighted_cosine",
- :min => 0.5
+ :min => 0.5,
},
:prediction => {
:method => "Algorithm::Caret.rf",
@@ -191,7 +191,7 @@ module OpenTox
# Predict a substance (compound or nanoparticle)
# @param [OpenTox::Substance]
# @return [Hash]
- def predict_substance substance
+ def predict_substance substance, threshold = self.algorithms[:similarity][:min]
@independent_variables = Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
case algorithms[:similarity][:method]
@@ -221,20 +221,19 @@ module OpenTox
bad_request_error "Unknown descriptor type '#{descriptors}' for similarity method '#{similarity[:method]}'."
end
- prediction = {}
+ prediction = {:warnings => [], :measurements => []}
+ prediction[:warnings] << "Similarity threshold #{threshold} < #{algorithms[:similarity][:min]}, prediction may be out of applicability domain." if threshold < algorithms[:similarity][:min]
neighbor_ids = []
neighbor_similarities = []
neighbor_dependent_variables = []
neighbor_independent_variables = []
- prediction = {}
# find neighbors
substance_ids.each_with_index do |s,i|
# handle query substance
if substance.id.to_s == s
- prediction[:measurements] ||= []
prediction[:measurements] << dependent_variables[i]
- prediction[:warning] = "Substance '#{substance.name}, id:#{substance.id}' has been excluded from neighbors, because it is identical with the query substance."
+ prediction[:info] = "Substance '#{substance.name}, id:#{substance.id}' has been excluded from neighbors, because it is identical with the query substance."
else
if fingerprints?
neighbor_descriptors = fingerprints[i]
@@ -243,7 +242,7 @@ module OpenTox
neighbor_descriptors = scaled_variables.collect{|v| v[i]}
end
sim = Algorithm.run algorithms[:similarity][:method], [similarity_descriptors, neighbor_descriptors, descriptor_weights]
- if sim >= algorithms[:similarity][:min]
+ if sim >= threshold
neighbor_ids << s
neighbor_similarities << sim
neighbor_dependent_variables << dependent_variables[i]
@@ -258,17 +257,27 @@ module OpenTox
measurements = nil
if neighbor_similarities.empty?
- prediction.merge!({:value => nil,:warning => "Could not find similar substances with experimental data in the training dataset.",:neighbors => []})
+ prediction[:value] = nil
+ prediction[:warnings] << "Could not find similar substances with experimental data in the training dataset."
elsif neighbor_similarities.size == 1
- prediction.merge!({:value => dependent_variables.first, :probabilities => nil, :warning => "Only one similar compound in the training set. Predicting its experimental value.", :neighbors => [{:id => neighbor_ids.first, :similarity => neighbor_similarities.first}]})
+ prediction[:value] = nil
+ prediction[:warnings] << "Cannot create prediction: Only one similar compound in the training set."
+ prediction[:neighbors] = [{:id => neighbor_ids.first, :similarity => neighbor_similarities.first}]
else
query_descriptors.collect!{|d| d ? 1 : 0} if algorithms[:feature_selection] and algorithms[:descriptors][:method] == "fingerprint"
# call prediction algorithm
result = Algorithm.run algorithms[:prediction][:method], dependent_variables:neighbor_dependent_variables,independent_variables:neighbor_independent_variables ,weights:neighbor_similarities, query_variables:query_descriptors
prediction.merge! result
prediction[:neighbors] = neighbor_ids.collect_with_index{|id,i| {:id => id, :measurement => neighbor_dependent_variables[i], :similarity => neighbor_similarities[i]}}
+ #if neighbor_similarities.max < algorithms[:similarity][:warn_min]
+ #prediction[:warnings] << "Closest neighbor has similarity < #{algorithms[:similarity][:warn_min]}. Prediction may be out of applicability domain."
+ #end
+ end
+ if prediction[:warnings].empty? or threshold < algorithms[:similarity][:min]
+ prediction
+ else # try again with a lower threshold
+ predict_substance substance, 0.2
end
- prediction
end
# Predict a substance (compound or nanoparticle), an array of substances or a dataset
diff --git a/lib/regression.rb b/lib/regression.rb
index fd2855f..25c0732 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -17,7 +17,7 @@ module OpenTox
sim_sum += weights[i]
end if dependent_variables
sim_sum == 0 ? prediction = nil : prediction = weighted_sum/sim_sum
- {:value => prediction}
+ {:value => prediction, :warnings => ["Weighted average prediction, no prediction interval available."]}
end
end