summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-06-02 19:01:18 +0200
committerChristoph Helma <helma@in-silico.ch>2016-06-02 19:01:18 +0200
commit128fd36b2531756c15a93776871e80eb44e524f1 (patch)
treeead8f7b83c825d0df8c857866b8d5686d7082927
parenteec5bddbd35c9ecee8021128508d8718bccb4fe3 (diff)
proteomics regression validation
-rw-r--r--lib/model.rb30
-rw-r--r--lib/nanoparticle.rb28
-rw-r--r--test/nanoparticles.rb4
3 files changed, 22 insertions, 40 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 3482aee..277bca3 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -31,7 +31,7 @@ module OpenTox
self.neighbor_algorithm_parameters ||= {}
self.neighbor_algorithm_parameters[:dataset_id] = training_dataset.id
- #send(feature_selection_algorithm.to_sym) if feature_selection_algorithm
+ send(feature_selection_algorithm.to_sym) if feature_selection_algorithm
save
end
@@ -49,25 +49,31 @@ module OpenTox
feature_ids = training_dataset.substances.collect{ |s| s["physchem_descriptors"].keys}.flatten.uniq
feature_ids.each do |feature_id|
feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
- R.assign "feature", feature_values
- begin
- R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
- pvalue = R.eval("cor$p.value").to_ruby
- if pvalue <= 0.05
- r = R.eval("cor$estimate").to_ruby
- self.relevant_features[feature_id] = {}
- self.relevant_features[feature_id]["pvalue"] = pvalue
- self.relevant_features[feature_id]["r"] = r
+ unless feature_values.uniq.size == 1
+ R.assign "feature", feature_values
+ begin
+ R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
+ pvalue = R.eval("cor$p.value").to_ruby
+ if pvalue <= 0.05
+ r = R.eval("cor$estimate").to_ruby
+ self.relevant_features[feature_id] = {}
+ self.relevant_features[feature_id]["pvalue"] = pvalue
+ self.relevant_features[feature_id]["r"] = r
+ self.relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
+ self.relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
+ end
+ rescue
+ warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed."
end
- rescue
- warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed."
end
end
self.relevant_features = self.relevant_features.sort{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h
+ p self.relevant_features
end
def predict_substance substance
neighbor_algorithm_parameters = Hash[self.neighbor_algorithm_parameters.map{ |k, v| [k.to_sym, v] }] # convert string keys to symbols
+ neighbor_algorithm_parameters[:relevant_features] = self.relevant_features if self.relevant_features
neighbors = substance.send(neighbor_algorithm, neighbor_algorithm_parameters)
measurements = nil
prediction = {}
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 3e29ae1..c1bf1b5 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -9,10 +9,10 @@ module OpenTox
attr_accessor :scaled_values
- def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:
+ def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
p name
dataset = Dataset.find(dataset_id)
- relevant_features = {}
+ #relevant_features = {}
measurements = []
substances = []
# TODO: exclude query activities!!!
@@ -24,30 +24,6 @@ module OpenTox
end
end
end
- R.assign "tox", measurements
- feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature}
- # identify relevant features
- feature_ids.each do |feature_id|
- feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
- unless feature_values.uniq.size == 1
- R.assign "feature", feature_values
- begin
- R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
- p_value = R.eval("cor$p.value").to_ruby
- if p_value <= 0.05
- r = R.eval("cor$estimate").to_ruby
- relevant_features[feature_id] = {}
- relevant_features[feature_id]["p_value"] = p_value
- relevant_features[feature_id]["r"] = r
- relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
- relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
- end
- rescue
- warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed."
- end
- end
- end
- #p relevant_features.keys.collect{|i| Feature.find(i).name}
neighbors = []
substances.each do |substance|
values = dataset.values(substance,prediction_feature_id)
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index 227f7db..e5d1973 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -54,7 +54,7 @@ class NanoparticleTest < MiniTest::Test
#feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :feature_selection_algorithm => :correlation_filter, :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
cv = RegressionCrossValidation.create model
p cv.rmse
p cv.r_squared
@@ -67,7 +67,7 @@ class NanoparticleTest < MiniTest::Test
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :feature_selection_algorithm => :correlation_filter, :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
cv = RegressionCrossValidation.create model
p cv.rmse
p cv.r_squared