summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-27 20:28:25 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-27 20:28:25 +0200
commit66c34e4682965272060a121b6e362af67ed4be5f (patch)
tree61350c4ada8cbb6ca724f3af221f229ffcd70560
parentd542e9fe92567c54423f39904111bd5293236416 (diff)
Initial GUI for Nestec models
-rw-r--r--lib/compound.rb2
-rw-r--r--lib/crossvalidation.rb24
-rw-r--r--lib/dataset.rb2
-rw-r--r--lib/feature.rb5
-rw-r--r--lib/model.rb32
-rw-r--r--lib/regression.rb5
6 files changed, 52 insertions, 18 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index a819f56..8f393f5 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -133,7 +133,7 @@ module OpenTox
if self.svg_id.nil?
svg = obconversion(smiles,"smi","svg")
file = Mongo::Grid::File.new(svg, :filename => "#{id}.svg", :content_type => "image/svg")
- update(:image_id => $gridfs.insert_one(file))
+ update(:svg_id => $gridfs.insert_one(file))
end
$gridfs.find_one(_id: self.svg_id).data
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 58a9664..a10dc1d 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -105,9 +105,9 @@ module OpenTox
field :mae, type: Float
field :weighted_rmse, type: Float
field :weighted_mae, type: Float
- field :weighted_mae, type: Float
field :r_squared, type: Float
field :correlation_plot_id, type: BSON::ObjectId
+ field :confidence_plot_id, type: BSON::ObjectId
def self.create model, n=10
cv = self.new
@@ -147,7 +147,7 @@ module OpenTox
predictions.each do |pred|
compound_id,activity,prediction,confidence = pred
if activity and prediction
- error = Math.log(prediction)-Math.log(activity)
+ error = Math.log10(prediction)-Math.log10(activity)
rmse += error**2
weighted_rmse += confidence*error**2
mae += error.abs
@@ -224,9 +224,27 @@ module OpenTox
end
end
+ def confidence_plot
+ tmpfile = "/tmp/#{id.to_s}_confidence.svg"
+ sorted_predictions = predictions.sort{|a,b| b[3]<=>a[3]}.collect{|p| [(Math.log10(p[1])-Math.log10(p[2]))**2,p[3]]}
+ R.assign "error", sorted_predictions.collect{|p| p[0]}
+ #R.assign "p", predictions.collect{|p| p[2]}
+ R.assign "confidence", predictions.collect{|p| p[2]}
+ #R.eval "diff = log(m)-log(p)"
+ R.eval "library(ggplot2)"
+ R.eval "svg(filename='#{tmpfile}')"
+ R.eval "image = qplot(confidence,error)"#,main='#{self.name}',asp=1,xlim=range, ylim=range)"
+ R.eval "ggsave(file='#{tmpfile}', plot=image)"
+ R.eval "dev.off()"
+ file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg")
+ plot_id = $gridfs.insert_one(file)
+ update(:confidence_plot_id => plot_id)
+ $gridfs.find_one(_id: confidence_plot_id).data
+ end
+
def correlation_plot
unless correlation_plot_id
- tmpfile = "/tmp/#{id.to_s}.svg"
+ tmpfile = "/tmp/#{id.to_s}_correlation.svg"
x = predictions.collect{|p| p[1]}
y = predictions.collect{|p| p[2]}
attributes = Model::Lazar.find(self.model_id).attributes
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 979753c..28d2120 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -118,7 +118,7 @@ module OpenTox
def density_plot
# TODO: create/store svg
R.assign "acts", data_entries.collect{|r| r.first }#.compact
- R.eval "plot(density(log(acts),na.rm= TRUE), main='log(#{features.first.name})')"
+ R.eval "plot(density(-log(acts),na.rm= TRUE), main='-log(#{features.first.name})')"
end
# Serialisation
diff --git a/lib/feature.rb b/lib/feature.rb
index 9521597..6fc2c06 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -2,7 +2,6 @@ module OpenTox
# Basic feature class
class Feature
- field :name, type: String
field :nominal, type: Boolean
field :numeric, type: Boolean
field :measured, type: Boolean
@@ -85,14 +84,10 @@ module OpenTox
# Feature for categorical bioassay results
class NominalBioAssay < NominalFeature
- # TODO: needed? move to dataset?
- field :description, type: String
end
# Feature for quantitative bioassay results
class NumericBioAssay < NumericFeature
- # TODO: needed? move to dataset?
- field :description, type: String
end
end
diff --git a/lib/model.rb b/lib/model.rb
index 418ec18..aed789c 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -86,7 +86,7 @@ module OpenTox
acts.empty? ? nil : n << acts
end
neighbors.compact! # remove neighbors without training activities
- predictions << Algorithm.run(prediction_algorithm, compound, neighbors)
+ predictions << Algorithm.run(prediction_algorithm, compound, {:neighbors => neighbors,:training_dataset_size => training_dataset.data_entries.size})
end
# serialize result
@@ -138,7 +138,6 @@ module OpenTox
end
class LazarFminerClassification < LazarClassification
-
def self.create training_dataset
model = super(training_dataset)
model.update "_type" => self.to_s # adjust class
@@ -155,14 +154,12 @@ module OpenTox
end
class LazarRegression < Lazar
-
def initialize
super
self.neighbor_algorithm = "OpenTox::Algorithm::Neighbor.fingerprint_similarity"
self.prediction_algorithm = "OpenTox::Algorithm::Regression.weighted_average"
self.neighbor_algorithm_parameters = {:min_sim => 0.7}
end
-
end
class Prediction
@@ -179,15 +176,38 @@ module OpenTox
field :crossvalidation_id, type: BSON::ObjectId
def predict object
- Model::Lazar.find(model_id).predict object
+ Lazar.find(model_id).predict object
+ end
+
+ def training_dataset
+ model.training_dataset
+ end
+
+ def model
+ Lazar.find model_id
end
def crossvalidation
CrossValidation.find crossvalidation_id
end
+
+ def self.from_csv_file file
+ p file
+ metadata_file = file.sub(/csv$/,"json")
+ p metadata_file
+ bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file
+ prediction_model = self.new JSON.parse(File.read(metadata_file))
+ training_dataset = Dataset.from_csv_file file
+ # TODO classification
+ model = LazarRegression.create training_dataset
+ cv = RegressionCrossValidation.create model
+ prediction_model[:model_id] = model.id
+ prediction_model[:crossvalidation_id] = cv.id
+ prediction_model.save
+ prediction_model
+ end
end
end
end
-
diff --git a/lib/regression.rb b/lib/regression.rb
index 020bb3a..2580a1e 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -19,9 +19,10 @@ module OpenTox
class Regression
- def self.weighted_average compound, neighbors
+ def self.weighted_average compound, params
weighted_sum = 0.0
sim_sum = 0.0
+ neighbors = params[:neighbors]
neighbors.each do |row|
n,sim,acts = row
acts.each do |act|
@@ -29,7 +30,7 @@ module OpenTox
sim_sum += sim
end
end
- confidence = sim_sum/neighbors.size.to_f
+ confidence = sim_sum*neighbors.size.to_f/params[:training_dataset_size]
sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
{:value => prediction,:confidence => confidence}
end