From 32d767ee7cfcc19337892551906950621f348174 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 28 Apr 2016 08:11:12 +0200 Subject: nanoparticle crossvalidation technically working --- lib/crossvalidation.rb | 2 +- lib/regression.rb | 14 +++++++------- lib/validation.rb | 6 +++--- test/nanoparticles.rb | 10 +++++++++- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 50afb6f..0ae36c4 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -44,7 +44,7 @@ module OpenTox $logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds" #end end - Process.waitall + #Process.waitall cv.validation_ids = Validation.where(:crossvalidation_id => cv.id).distinct(:_id) cv.validations.each do |validation| nr_instances += validation.nr_instances diff --git a/lib/regression.rb b/lib/regression.rb index 5610a77..3a59c14 100644 --- a/lib/regression.rb +++ b/lib/regression.rb @@ -9,6 +9,7 @@ module OpenTox neighbors = params[:neighbors] neighbors.each do |row| sim = row["tanimoto"] + sim ||= 1 # TODO: sim f nanoparticles if row["toxicities"][params[:prediction_feature_id].to_s] row["toxicities"][params[:prediction_feature_id].to_s].each do |act| weighted_sum += sim*Math.log10(act) @@ -120,7 +121,7 @@ module OpenTox result[:warning] = "No variables for regression model. Using weighted average of similar compounds." return result else - query_descriptors = pc_ids.collect{|i| compound.physchem_descriptors[i].for_R} + query_descriptors = pc_ids.collect{|i| compound.physchem_descriptors[i].for_R if compound.physchem_descriptors[i]}.compact remove_idx = [] query_descriptors.each_with_index do |v,i| remove_idx << i if v == "NA" @@ -172,13 +173,9 @@ rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R")) R.eval "data <- #{r_data_frame}" R.assign "features", training_features - R.eval "names(data) <- append(c('activities'),features)" # - #begin + begin + R.eval "names(data) <- append(c('activities'),features)" # R.eval "model <- train(activities ~ ., data = data, method = '#{method}', na.action = na.pass)" - #rescue - #return nil - #end - p query_feature_values R.eval "fingerprint <- data.frame(rbind(c(#{query_feature_values.join ','})))" R.eval "names(fingerprint) <- features" R.eval "prediction <- predict(model,fingerprint)" @@ -187,6 +184,9 @@ rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R")) :rmse => R.eval("getTrainPerf(model)$TrainRMSE").to_f, :r_squared => R.eval("getTrainPerf(model)$TrainRsquared").to_f, } + rescue + return nil + end end end diff --git a/lib/validation.rb b/lib/validation.rb index 6b515e4..68cb1a1 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -24,12 +24,12 @@ module OpenTox def self.create model, training_set, test_set, crossvalidation=nil - atts = model.attributes.dup # do not modify attributes from original model + atts = model.attributes.dup # do not modify attributes of the original model atts["_id"] = BSON::ObjectId.new atts[:training_dataset_id] = training_set.id validation_model = model.class.create model.prediction_feature, training_set, atts validation_model.save - predictions = validation_model.predict test_set.compounds + predictions = validation_model.predict test_set.substances predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 predictions.each do |cid,prediction| @@ -43,7 +43,7 @@ module OpenTox validation = self.new( :model_id => validation_model.id, :test_dataset_id => test_set.id, - :nr_instances => test_set.compounds.size, + :nr_instances => test_set.substances.size, :nr_unpredicted => nr_unpredicted, :predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence ) diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb index 31bb903..46c6620 100644 --- a/test/nanoparticles.rb +++ b/test/nanoparticles.rb @@ -60,7 +60,15 @@ class NanoparticleTest < MiniTest::Test prediction = model.predict nanoparticle p prediction #p prediction - #refute_nil prediction[:value] + refute_nil prediction[:value] + end + + def test_validate_model + training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles") + feature = Feature.find_or_create_by(name: "7.99 Toxicity (other) ICP-AES", category: "TOX", unit: "mL/ug(Mg)") + model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"}) + cv = RegressionCrossValidation.create model + p cv end end -- cgit v1.2.3