first reasonable results for nanoparticle crossvalidation

author: Christoph Helma <helma@in-silico.ch> 2016-05-04 19:24:42 +0200
committer: Christoph Helma <helma@in-silico.ch> 2016-05-04 19:24:42 +0200
commit: 05386e748270c337c66f6f379317ea4b25905236 (patch)
tree: 4b62f1c9449dded9cd7670715a5735f5bc93dd2e /lib/model.rb
parent: 79238bddb59607aa9f759caa9e3c8db176709703 (diff)
1 files changed, 44 insertions, 57 deletions
diff --git a/lib/model.rb b/lib/model.rb
index f61368e..841ab20 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -2,7 +2,7 @@ module OpenTox
 
   module Model
 
-    class Model
+    class Lazar 
       include OpenTox
       include Mongoid::Document
       include Mongoid::Timestamps
@@ -10,27 +10,13 @@ module OpenTox
 
       field :name, type: String
       field :creator, type: String, default: __FILE__
-      # datasets
       field :training_dataset_id, type: BSON::ObjectId
-      # algorithms
       field :prediction_algorithm, type: String
-      # prediction feature
       field :prediction_feature_id, type: BSON::ObjectId
-
-      def training_dataset
-        Dataset.find(training_dataset_id)
-      end
-
-      def prediction_feature
-        Feature.find(prediction_feature_id)
-      end
-    end
-
-    class Lazar < Model
-
-      # algorithms
       field :neighbor_algorithm, type: String
       field :neighbor_algorithm_parameters, type: Hash, default: {}
+      field :feature_selection_algorithm, type: String
+      field :relevant_features, type: Hash
 
       # Create a lazar model from a training_dataset and a feature_dataset
       # @param [OpenTox::Dataset] training_dataset
@@ -45,10 +31,43 @@ module OpenTox
         self.name ||= "#{training_dataset.name} #{prediction_feature.name}" 
         self.neighbor_algorithm_parameters ||= {}
         self.neighbor_algorithm_parameters[:training_dataset_id] = training_dataset.id
+
+        Algorithm.run(feature_selection_algorithm, self) if feature_selection_algorithm
         save
         self
       end
 
+      def correlation_filter
+        toxicities = []
+        substances = []
+        training_dataset.substances.each do |s|
+          s["toxicities"][prediction_feature_id].each do |act|
+            toxicities << act
+            substances << s
+          end
+        end
+        R.assign "tox", toxicities
+        feature_ids = training_dataset.substances.collect{ |s| s["physchem_descriptors"].keys}.flatten.uniq
+        feature_ids.each do |feature_id|
+          feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id]}
+          R.assign "feature", feature_values
+          begin
+            #R.eval "cor <- cor.test(-log(tox),-log(feature),use='complete')"
+            R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='complete')"
+            pvalue = R.eval("cor$p.value").to_ruby
+            if pvalue <= 0.05
+              r = R.eval("cor$estimate").to_ruby
+              relevant_features[feature] = {}
+              relevant_features[feature]["pvalue"] = pvalue
+              relevant_features[feature]["r"] = r
+            end
+          rescue
+            warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{toxicities}) failed."
+          end
+        end
+        relevant_features.sort!{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h
+      end
+
       def predict_compound compound
         neighbors = compound.send(neighbor_algorithm, neighbor_algorithm_parameters)
         # remove neighbors without prediction_feature
@@ -63,7 +82,6 @@ module OpenTox
           prediction[:warning] = "#{database_activities.size} compounds have been removed from neighbors, because they have the same structure as the query compound."
           neighbors.delete_if{|n| n["_id"] == compound.id}
         end
-        #neighbors.delete_if{|n| n['toxicities'].empty? or n['toxicities'][prediction_feature.id.to_s] == [nil] }
         if neighbors.empty?
           prediction.merge!({:value => nil,:confidence => nil,:warning => "Could not find similar compounds with experimental data in the training dataset.",:neighbors => []})
         else
@@ -123,6 +141,14 @@ module OpenTox
 
       end
 
+      def training_dataset
+        Dataset.find(training_dataset_id)
+      end
+
+      def prediction_feature
+        Feature.find(prediction_feature_id)
+      end
+
     end
 
     class LazarClassification < Lazar
@@ -229,45 +255,6 @@ module OpenTox
       end
     end
 
-    class NanoLazar
-      include OpenTox
-      include Mongoid::Document
-      include Mongoid::Timestamps
-      store_in collection: "models"
-
-      field :name, type: String
-      field :creator, type: String, default: __FILE__
-      # datasets
-      field :training_dataset_id, type: BSON::ObjectId
-      # algorithms
-      field :prediction_algorithm, type: String
-      # prediction feature
-      field :prediction_feature_id, type: BSON::ObjectId
-      field :training_particle_ids, type: Array
-
-      def self.create_all
-        nanoparticles = Nanoparticle.all
-        toxfeatures = Nanoparticle.all.collect{|np| np.toxicities.keys}.flatten.uniq.collect{|id| Feature.find id}
-        tox = {}
-        toxfeatures.each do |t|
-          tox[t] = nanoparticles.select{|np| np.toxicities.keys.include? t.id.to_s}
-        end
-        tox.select!{|t,nps| nps.size > 50}
-        tox.collect do |t,nps|
-          find_or_create_by(:prediction_feature_id => t.id, :training_particle_ids => nps.collect{|np| np.id})
-        end
-      end
-
-      def predict nanoparticle
-        training = training_particle_ids.collect{|id| Nanoparticle.find id}
-        training_features = training.collect{|t| t.physchem_descriptors.keys}.flatten.uniq
-        query_features = nanoparticle.physchem_descriptors.keys
-        common_features = (training_features & query_features)
-        #p common_features
-      end
-
-    end
-
   end
 
 end
author	Christoph Helma <helma@in-silico.ch>	2016-05-04 19:24:42 +0200
committer	Christoph Helma <helma@in-silico.ch>	2016-05-04 19:24:42 +0200
commit	05386e748270c337c66f6f379317ea4b25905236 (patch)
tree	4b62f1c9449dded9cd7670715a5735f5bc93dd2e /lib/model.rb
parent	79238bddb59607aa9f759caa9e3c8db176709703 (diff)