From 128fd36b2531756c15a93776871e80eb44e524f1 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 2 Jun 2016 19:01:18 +0200 Subject: proteomics regression validation --- lib/model.rb | 30 ++++++++++++++++++------------ lib/nanoparticle.rb | 28 ++-------------------------- 2 files changed, 20 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/model.rb b/lib/model.rb index 3482aee..277bca3 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -31,7 +31,7 @@ module OpenTox self.neighbor_algorithm_parameters ||= {} self.neighbor_algorithm_parameters[:dataset_id] = training_dataset.id - #send(feature_selection_algorithm.to_sym) if feature_selection_algorithm + send(feature_selection_algorithm.to_sym) if feature_selection_algorithm save end @@ -49,25 +49,31 @@ module OpenTox feature_ids = training_dataset.substances.collect{ |s| s["physchem_descriptors"].keys}.flatten.uniq feature_ids.each do |feature_id| feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]} - R.assign "feature", feature_values - begin - R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" - pvalue = R.eval("cor$p.value").to_ruby - if pvalue <= 0.05 - r = R.eval("cor$estimate").to_ruby - self.relevant_features[feature_id] = {} - self.relevant_features[feature_id]["pvalue"] = pvalue - self.relevant_features[feature_id]["r"] = r + unless feature_values.uniq.size == 1 + R.assign "feature", feature_values + begin + R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" + pvalue = R.eval("cor$p.value").to_ruby + if pvalue <= 0.05 + r = R.eval("cor$estimate").to_ruby + self.relevant_features[feature_id] = {} + self.relevant_features[feature_id]["pvalue"] = pvalue + self.relevant_features[feature_id]["r"] = r + self.relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby + self.relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby + end + rescue + warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed." end - rescue - warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed." end end self.relevant_features = self.relevant_features.sort{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h + p self.relevant_features end def predict_substance substance neighbor_algorithm_parameters = Hash[self.neighbor_algorithm_parameters.map{ |k, v| [k.to_sym, v] }] # convert string keys to symbols + neighbor_algorithm_parameters[:relevant_features] = self.relevant_features if self.relevant_features neighbors = substance.send(neighbor_algorithm, neighbor_algorithm_parameters) measurements = nil prediction = {} diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 3e29ae1..c1bf1b5 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -9,10 +9,10 @@ module OpenTox attr_accessor :scaled_values - def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id: + def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features: p name dataset = Dataset.find(dataset_id) - relevant_features = {} + #relevant_features = {} measurements = [] substances = [] # TODO: exclude query activities!!! @@ -24,30 +24,6 @@ module OpenTox end end end - R.assign "tox", measurements - feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature} - # identify relevant features - feature_ids.each do |feature_id| - feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]} - unless feature_values.uniq.size == 1 - R.assign "feature", feature_values - begin - R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" - p_value = R.eval("cor$p.value").to_ruby - if p_value <= 0.05 - r = R.eval("cor$estimate").to_ruby - relevant_features[feature_id] = {} - relevant_features[feature_id]["p_value"] = p_value - relevant_features[feature_id]["r"] = r - relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby - relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby - end - rescue - warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed." - end - end - end - #p relevant_features.keys.collect{|i| Feature.find(i).name} neighbors = [] substances.each do |substance| values = dataset.values(substance,prediction_feature_id) -- cgit v1.2.3