From 66c34e4682965272060a121b6e362af67ed4be5f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 27 Aug 2015 20:28:25 +0200 Subject: Initial GUI for Nestec models --- lib/compound.rb | 2 +- lib/crossvalidation.rb | 24 +++++++++++++++++++++--- lib/dataset.rb | 2 +- lib/feature.rb | 5 ----- lib/model.rb | 32 ++++++++++++++++++++++++++------ lib/regression.rb | 5 +++-- 6 files changed, 52 insertions(+), 18 deletions(-) diff --git a/lib/compound.rb b/lib/compound.rb index a819f56..8f393f5 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -133,7 +133,7 @@ module OpenTox if self.svg_id.nil? svg = obconversion(smiles,"smi","svg") file = Mongo::Grid::File.new(svg, :filename => "#{id}.svg", :content_type => "image/svg") - update(:image_id => $gridfs.insert_one(file)) + update(:svg_id => $gridfs.insert_one(file)) end $gridfs.find_one(_id: self.svg_id).data diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 58a9664..a10dc1d 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -105,9 +105,9 @@ module OpenTox field :mae, type: Float field :weighted_rmse, type: Float field :weighted_mae, type: Float - field :weighted_mae, type: Float field :r_squared, type: Float field :correlation_plot_id, type: BSON::ObjectId + field :confidence_plot_id, type: BSON::ObjectId def self.create model, n=10 cv = self.new @@ -147,7 +147,7 @@ module OpenTox predictions.each do |pred| compound_id,activity,prediction,confidence = pred if activity and prediction - error = Math.log(prediction)-Math.log(activity) + error = Math.log10(prediction)-Math.log10(activity) rmse += error**2 weighted_rmse += confidence*error**2 mae += error.abs @@ -224,9 +224,27 @@ module OpenTox end end + def confidence_plot + tmpfile = "/tmp/#{id.to_s}_confidence.svg" + sorted_predictions = predictions.sort{|a,b| b[3]<=>a[3]}.collect{|p| [(Math.log10(p[1])-Math.log10(p[2]))**2,p[3]]} + R.assign "error", sorted_predictions.collect{|p| p[0]} + #R.assign "p", predictions.collect{|p| p[2]} + R.assign "confidence", predictions.collect{|p| p[2]} + #R.eval "diff = log(m)-log(p)" + R.eval "library(ggplot2)" + R.eval "svg(filename='#{tmpfile}')" + R.eval "image = qplot(confidence,error)"#,main='#{self.name}',asp=1,xlim=range, ylim=range)" + R.eval "ggsave(file='#{tmpfile}', plot=image)" + R.eval "dev.off()" + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg") + plot_id = $gridfs.insert_one(file) + update(:confidence_plot_id => plot_id) + $gridfs.find_one(_id: confidence_plot_id).data + end + def correlation_plot unless correlation_plot_id - tmpfile = "/tmp/#{id.to_s}.svg" + tmpfile = "/tmp/#{id.to_s}_correlation.svg" x = predictions.collect{|p| p[1]} y = predictions.collect{|p| p[2]} attributes = Model::Lazar.find(self.model_id).attributes diff --git a/lib/dataset.rb b/lib/dataset.rb index 979753c..28d2120 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -118,7 +118,7 @@ module OpenTox def density_plot # TODO: create/store svg R.assign "acts", data_entries.collect{|r| r.first }#.compact - R.eval "plot(density(log(acts),na.rm= TRUE), main='log(#{features.first.name})')" + R.eval "plot(density(-log(acts),na.rm= TRUE), main='-log(#{features.first.name})')" end # Serialisation diff --git a/lib/feature.rb b/lib/feature.rb index 9521597..6fc2c06 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -2,7 +2,6 @@ module OpenTox # Basic feature class class Feature - field :name, type: String field :nominal, type: Boolean field :numeric, type: Boolean field :measured, type: Boolean @@ -85,14 +84,10 @@ module OpenTox # Feature for categorical bioassay results class NominalBioAssay < NominalFeature - # TODO: needed? move to dataset? - field :description, type: String end # Feature for quantitative bioassay results class NumericBioAssay < NumericFeature - # TODO: needed? move to dataset? - field :description, type: String end end diff --git a/lib/model.rb b/lib/model.rb index 418ec18..aed789c 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -86,7 +86,7 @@ module OpenTox acts.empty? ? nil : n << acts end neighbors.compact! # remove neighbors without training activities - predictions << Algorithm.run(prediction_algorithm, compound, neighbors) + predictions << Algorithm.run(prediction_algorithm, compound, {:neighbors => neighbors,:training_dataset_size => training_dataset.data_entries.size}) end # serialize result @@ -138,7 +138,6 @@ module OpenTox end class LazarFminerClassification < LazarClassification - def self.create training_dataset model = super(training_dataset) model.update "_type" => self.to_s # adjust class @@ -155,14 +154,12 @@ module OpenTox end class LazarRegression < Lazar - def initialize super self.neighbor_algorithm = "OpenTox::Algorithm::Neighbor.fingerprint_similarity" self.prediction_algorithm = "OpenTox::Algorithm::Regression.weighted_average" self.neighbor_algorithm_parameters = {:min_sim => 0.7} end - end class Prediction @@ -179,15 +176,38 @@ module OpenTox field :crossvalidation_id, type: BSON::ObjectId def predict object - Model::Lazar.find(model_id).predict object + Lazar.find(model_id).predict object + end + + def training_dataset + model.training_dataset + end + + def model + Lazar.find model_id end def crossvalidation CrossValidation.find crossvalidation_id end + + def self.from_csv_file file + p file + metadata_file = file.sub(/csv$/,"json") + p metadata_file + bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file + prediction_model = self.new JSON.parse(File.read(metadata_file)) + training_dataset = Dataset.from_csv_file file + # TODO classification + model = LazarRegression.create training_dataset + cv = RegressionCrossValidation.create model + prediction_model[:model_id] = model.id + prediction_model[:crossvalidation_id] = cv.id + prediction_model.save + prediction_model + end end end end - diff --git a/lib/regression.rb b/lib/regression.rb index 020bb3a..2580a1e 100644 --- a/lib/regression.rb +++ b/lib/regression.rb @@ -19,9 +19,10 @@ module OpenTox class Regression - def self.weighted_average compound, neighbors + def self.weighted_average compound, params weighted_sum = 0.0 sim_sum = 0.0 + neighbors = params[:neighbors] neighbors.each do |row| n,sim,acts = row acts.each do |act| @@ -29,7 +30,7 @@ module OpenTox sim_sum += sim end end - confidence = sim_sum/neighbors.size.to_f + confidence = sim_sum*neighbors.size.to_f/params[:training_dataset_size] sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum) {:value => prediction,:confidence => confidence} end -- cgit v1.2.3