summaryrefslogtreecommitdiff
path: root/lib/model.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-05 13:22:12 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-05 13:22:12 +0200
commit5d4e5e463c2b87241bbb56e4658e1e26c0ed084f (patch)
treebbae8f77dbb2ac85053f1253ab518c3076e0d176 /lib/model.rb
parentadefea0e78a4f05a2c9537e643873ad61fc22a0a (diff)
substance and nanoparticle model creation and predictions
Diffstat (limited to 'lib/model.rb')
-rw-r--r--lib/model.rb135
1 files changed, 65 insertions, 70 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 749611e..a272580 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -28,101 +28,91 @@ module OpenTox
when /Regression/
model = LazarRegression.new
end
+
# guess model type
elsif prediction_feature.numeric?
model = LazarRegression.new
else
model = LazarClassification.new
end
+
# set defaults
- if model.class == LazarClassification
+ substance_classes = training_dataset.substances.collect{|s| s.class.to_s}.uniq
+ bad_request_error "Cannot create models for mixed substance classes '#{substance_classes.join ', '}'." unless substance_classes.size == 1
+
+ if substance_classes.first == "OpenTox::Compound"
+
model.algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => 'MP2D',
+ },
:similarity => {
- :descriptors => "fingerprint['MP2D']",
:method => "Algorithm::Similarity.tanimoto",
:min => 0.1
},
- :prediction => {
- :descriptors => "fingerprint['MP2D']",
- :method => "Algorithm::Classification.weighted_majority_vote",
- },
- :feature_selection => nil,
+ :feature_selection => nil
}
- elsif model.class == LazarRegression
+
+ if model.class == LazarClassification
+ model.algorithms[:prediction] = {
+ :method => "Algorithm::Classification.weighted_majority_vote",
+ }
+ elsif model.class == LazarRegression
+ model.algorithms[:prediction] = {
+ :method => "Algorithm::Regression.caret",
+ :parameters => "pls",
+ }
+ end
+
+ elsif substance_classes.first == "OpenTox::Nanoparticle"
model.algorithms = {
+ :descriptors => {
+ :method => "properties",
+ #:types => ["P-CHEM","Proteomics"],
+ :types => ["P-CHEM"],
+ },
:similarity => {
- :descriptors => "fingerprint['MP2D']",
- :method => "Algorithm::Similarity.tanimoto",
- :min => 0.1
+ :method => "Algorithm::Similarity.weighted_cosine",
+ :min => 0.5
},
:prediction => {
- :descriptors => "fingerprint['MP2D']",
- :method => "Algorithm::Regression.local_caret",
- :parameters => "pls",
+ :method => "Algorithm::Regression.caret",
+ :parameters => "rf",
+ },
+ :feature_selection => {
+ :method => "Algorithm::FeatureSelection.correlation_filter",
},
- :feature_selection => nil,
}
+ else
+ bad_request_error "Cannot create models for #{substance_classes.first}."
end
- # overwrite defaults
+ # overwrite defaults with explicit parameters
algorithms.each do |type,parameters|
- parameters.each do |p,v|
- model.algorithms[type][p] = v
- end if parameters
+ if parameters and parameters.is_a? Hash
+ parameters.each do |p,v|
+ model.algorithms[type] ||= {}
+ model.algorithms[type][p] = v
+ end
+ else
+ model.algorithms[type] = parameters
+ end
end
- # set defaults for empty parameters
model.prediction_feature_id = prediction_feature.id
model.training_dataset_id = training_dataset.id
model.name = "#{training_dataset.name} #{prediction_feature.name}"
- #send(feature_selection_algorithm.to_sym) if feature_selection_algorithm
+ if model.algorithms[:feature_selection] and model.algorithms[:feature_selection][:method]
+ model.relevant_features = Algorithm.run model.algorithms[:feature_selection][:method], dataset: training_dataset, prediction_feature: prediction_feature, types: model.algorithms[:descriptors][:types]
+ end
model.save
- p model
model
end
- def correlation_filter
- # TODO: speedup, single assignment of all features to R+ parallel computation of significance?
- self.relevant_features = {}
- measurements = []
- substances = []
- training_dataset.substances.each do |s|
- training_dataset.values(s,prediction_feature_id).each do |act|
- measurements << act
- substances << s
- end
- end
- R.assign "tox", measurements
- feature_ids = training_dataset.substances.collect{ |s| s["physchem_descriptors"].keys}.flatten.uniq
- feature_ids.select!{|fid| Feature.find(fid).category == feature_selection_algorithm_parameters[:category]} if feature_selection_algorithm_parameters[:category]
- feature_ids.each do |feature_id|
- feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
- unless feature_values.uniq.size == 1
- R.assign "feature", feature_values
- begin
- R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
- pvalue = R.eval("cor$p.value").to_ruby
- if pvalue <= 0.05
- r = R.eval("cor$estimate").to_ruby
- self.relevant_features[feature_id] = {}
- self.relevant_features[feature_id]["pvalue"] = pvalue
- self.relevant_features[feature_id]["r"] = r
- self.relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
- self.relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
- end
- rescue
- warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed."
- end
- end
- end
- self.relevant_features = self.relevant_features.sort{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h
- end
-
def predict_substance substance
- neighbor_algorithm_parameters = Hash[self.neighbor_algorithm_parameters.map{ |k, v| [k.to_sym, v] }] # convert string keys to symbols
- neighbor_algorithm_parameters[:relevant_features] = self.relevant_features if self.relevant_features
- neighbors = substance.send(neighbor_algorithm, neighbor_algorithm_parameters)
+ neighbors = substance.neighbors dataset_id: training_dataset_id, prediction_feature_id: prediction_feature_id, descriptors: algorithms[:descriptors], similarity: algorithms[:similarity], relevant_features: relevant_features
measurements = nil
prediction = {}
# handle query substance
@@ -153,9 +143,17 @@ module OpenTox
prediction.merge!({:value => value, :probabilities => nil, :warning => "Only one similar compound in the training set. Predicting median of its experimental values.", :neighbors => neighbors}) if value
else
# call prediction algorithm
- klass,method = prediction_algorithm.split('.')
- params = prediction_algorithm_parameters.merge({:substance => substance, :neighbors => neighbors})
- result = Object.const_get(klass).send(method,params)
+ case algorithms[:descriptors][:method]
+ when "fingerprint"
+ descriptors = substance.fingerprints[algorithms[:descriptors][:type]]
+ when "properties"
+ descriptors = substance.properties
+ else
+ bad_request_error "Descriptor method '#{algorithms[:descriptors][:method]}' not available."
+ end
+ params = algorithms[:prediction].merge({:descriptors => descriptors, :neighbors => neighbors})
+ params.delete :method
+ result = Algorithm.run algorithms[:prediction][:method], params
prediction.merge! result
prediction[:neighbors] = neighbors
prediction[:neighbors] ||= []
@@ -176,7 +174,7 @@ module OpenTox
elsif object.is_a? Dataset
substances = object.substances
else
- bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter."
+ bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Substances or an OpenTox::Dataset as parameter."
end
# make predictions
@@ -194,7 +192,6 @@ module OpenTox
elsif object.is_a? Array
return predictions
elsif object.is_a? Dataset
- #predictions.each{|cid,p| p.delete(:neighbors)}
# prepare prediction dataset
measurement_feature = Feature.find prediction_feature_id
@@ -205,8 +202,6 @@ module OpenTox
:prediction_feature_id => prediction_feature.id,
:predictions => predictions
)
-
- #prediction_dataset.save
return prediction_dataset
end
@@ -314,7 +309,7 @@ module OpenTox
:feature_selection_algorithm_parameters => {:category => category},
:neighbor_algorithm => "physchem_neighbors",
:neighbor_algorithm_parameters => {:min_sim => 0.5},
- :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression",
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.physchem_regression",
:prediction_algorithm_parameters => {:method => 'rf'}, # random forests
}
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")