summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-11-10 15:27:26 +0100
committerChristoph Helma <helma@in-silico.ch>2016-11-10 15:27:26 +0100
commit9a06f2ff5ae6bdbe7dc90555599e186f1585e0d2 (patch)
treec9cbb63f398c2937f3cba78a9976c7356a3f79a4
parent85ef2c4982f72c811d5e9fa4ce22e238c512fe6e (diff)
Model::NanoPrediction parameters
-rw-r--r--lib/caret.rb2
-rw-r--r--lib/import.rb7
-rw-r--r--lib/model.rb51
-rw-r--r--lib/similarity.rb4
-rw-r--r--test/model-nanoparticle.rb30
-rw-r--r--test/nanomaterial-prediction-models.rb60
-rw-r--r--test/validation-nanoparticle.rb19
7 files changed, 139 insertions, 34 deletions
diff --git a/lib/caret.rb b/lib/caret.rb
index 18bfc41..7e4f771 100644
--- a/lib/caret.rb
+++ b/lib/caret.rb
@@ -12,7 +12,7 @@ module OpenTox
independent_variables.delete_at i
query_variables.delete_at i
end
- if independent_variables.flatten.uniq == ["NA"]
+ if independent_variables.flatten.uniq == ["NA"] or independent_variables.flatten.uniq == []
prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
prediction[:warning] = "No variables for regression model. Using weighted average of similar substances."
elsif
diff --git a/lib/import.rb b/lib/import.rb
index 541c9b5..8f640b1 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -5,7 +5,12 @@ module OpenTox
class Enanomapper
include OpenTox
- def self.mirror dir="."
+ def self.mirror dir=nil
+ # clean download dir
+ dir ||= File.join(File.dirname(__FILE__),"..","data","enm")
+ FileUtils.rm_rf dir
+ FileUtils.mkdir_p dir
+
#get list of bundle URIs
bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
diff --git a/lib/model.rb b/lib/model.rb
index 549cbd2..809dc48 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -106,7 +106,7 @@ module OpenTox
else
model.algorithms[type] = parameters
end
- end
+ end if algorithms
# parse dependent_variables from training dataset
training_dataset.substances.each do |substance|
@@ -249,6 +249,7 @@ module OpenTox
elsif neighbor_similarities.size == 1
prediction.merge!({:value => dependent_variables.first, :probabilities => nil, :warning => "Only one similar compound in the training set. Predicting its experimental value.", :neighbors => [{:id => neighbor_ids.first, :similarity => neighbor_similarities.first}]})
else
+ query_descriptors.collect!{|d| d ? 1 : 0} if independent_variables[0][0].numeric?
# call prediction algorithm
result = Algorithm.run algorithms[:prediction][:method], dependent_variables:neighbor_dependent_variables,independent_variables:neighbor_independent_variables ,weights:neighbor_similarities, query_variables:query_descriptors
prediction.merge! result
@@ -343,7 +344,7 @@ module OpenTox
field :unit, type: String
field :model_id, type: BSON::ObjectId
field :repeated_crossvalidation_id, type: BSON::ObjectId
- field :leave_one_out_validation_id, type: BSON::ObjectId
+ #field :leave_one_out_validation_id, type: BSON::ObjectId
def predict object
model.predict object
@@ -398,42 +399,28 @@ module OpenTox
class NanoPrediction < Prediction
- def self.from_json_dump dir, category
- Import::Enanomapper.import dir
- training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
- unless training_dataset
- Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
+ def self.create training_dataset: nil, prediction_feature:nil, algorithms: nil
+
+ # find/import training_dataset
+ training_dataset ||= Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ unless training_dataset # try to import from json dump
+ Import::Enanomapper.import
training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ unless training_dataset
+ Import::Enanomapper.mirror
+ Import::Enanomapper.import
+ training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ bad_request_error "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset
+ end
end
- prediction_model = self.new(
- :endpoint => "log2(Net cell association)",
- :source => "https://data.enanomapper.net/",
- :species => "A549 human lung epithelial carcinoma cells",
- :unit => "log2(ug/Mg)"
- )
- prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first
- model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset)
- prediction_model[:model_id] = model.id
- repeated_cv = Validation::RepeatedCrossValidation.create model
- prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
- #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id
- prediction_model.save
- prediction_model
- end
+ prediction_feature ||= Feature.where(name: "log2(Net cell association)", category: "TOX").first
- def self.create dir: dir, algorithms: algorithms
- training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
- unless training_dataset
- Import::Enanomapper.import dir
- training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
- end
prediction_model = self.new(
- :endpoint => "log2(Net cell association)",
- :source => "https://data.enanomapper.net/",
+ :endpoint => prediction_feature.name,
+ :source => prediction_feature.source,
:species => "A549 human lung epithelial carcinoma cells",
- :unit => "log2(ug/Mg)"
+ :unit => prediction_feature.unit
)
- prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first
model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms)
prediction_model[:model_id] = model.id
repeated_cv = Validation::RepeatedCrossValidation.create model
diff --git a/lib/similarity.rb b/lib/similarity.rb
index 772e812..0901936 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -19,6 +19,10 @@ module OpenTox
( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
end
+ #def self.weighted_tanimoto fingerprints
+ #( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
+ #end
+
def self.euclid scaled_properties
sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2}
Math.sqrt(sq.inject(0) {|s,c| s + c})
diff --git a/test/model-nanoparticle.rb b/test/model-nanoparticle.rb
index 88032bc..c5f3223 100644
--- a/test/model-nanoparticle.rb
+++ b/test/model-nanoparticle.rb
@@ -61,6 +61,36 @@ class NanoparticleModelTest < MiniTest::Test
model.delete
end
+ def test_nanoparticle_fingerprint_model_with_feature_selection
+ assert true, @prediction_feature.measured
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D",
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1
+ },
+ }
+ model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature, algorithms: algorithms
+ refute_empty model.algorithms[:feature_selection]
+ refute_empty model.dependent_variables
+ refute_empty model.descriptor_ids
+ refute_empty model.independent_variables
+ assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ nanoparticle = @training_dataset.nanoparticles[-34]
+ assert_includes nanoparticle.dataset_ids, @training_dataset.id
+ prediction = model.predict nanoparticle
+ refute_nil prediction[:value]
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+ prediction = model.predict @training_dataset.substances[14]
+ refute_nil prediction[:value]
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+ model.delete
+ end
+
def test_nanoparticle_calculated_properties_model
skip "Nanoparticle calculate_properties similarity not yet implemented"
assert true, @prediction_feature.measured
diff --git a/test/nanomaterial-prediction-models.rb b/test/nanomaterial-prediction-models.rb
new file mode 100644
index 0000000..b0c05f3
--- /dev/null
+++ b/test/nanomaterial-prediction-models.rb
@@ -0,0 +1,60 @@
+require_relative "setup.rb"
+
+class NanomaterialPredictionModelTest < MiniTest::Test
+
+ def setup
+ @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ unless @training_dataset
+ Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
+ @training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ end
+ @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
+ end
+
+ def test_default_nanomaterial_prediction_model
+ prediction_model = Model::NanoPrediction.create
+ p prediction_model
+ [:endpoint,:species,:source].each do |p|
+ refute_empty prediction_model[p]
+ end
+ assert prediction_model.regression?
+ refute prediction_model.classification?
+ prediction_model.crossvalidations.each do |cv|
+ refute_nil cv.r_squared
+ refute_nil cv.rmse
+ end
+ nanoparticle = @training_dataset.nanoparticles[-34]
+ assert_includes nanoparticle.dataset_ids, @training_dataset.id
+ prediction = prediction_model.predict nanoparticle
+ refute_nil prediction[:value]
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+ prediction_model.delete
+ end
+
+ def test_nanomaterial_prediction_model_parameters
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D",
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1
+ },
+ :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
+ :feature_selection => nil
+ }
+ prediction_model = Model::NanoPrediction.create algorithms: algorithms
+ assert prediction_model.regression?
+ refute prediction_model.classification?
+ prediction_model.crossvalidations.each do |cv|
+ refute_nil cv.r_squared
+ refute_nil cv.rmse
+ end
+ nanoparticle = @training_dataset.nanoparticles[-34]
+ assert_includes nanoparticle.dataset_ids, @training_dataset.id
+ prediction = prediction_model.predict nanoparticle
+ refute_nil prediction[:value]
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+ end
+end
diff --git a/test/validation-nanoparticle.rb b/test/validation-nanoparticle.rb
index 7391f21..5ed70f2 100644
--- a/test/validation-nanoparticle.rb
+++ b/test/validation-nanoparticle.rb
@@ -113,4 +113,23 @@ class NanoparticleValidationTest < MiniTest::Test
refute_nil cv.rmse
end
+ def test_nanoparticle_fingerprint_model_with_feature_selection
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D",
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1
+ },
+ }
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+ cv = CrossValidation.create model
+ p cv.rmse
+ p cv.r_squared
+ refute_nil cv.r_squared
+ refute_nil cv.rmse
+ end
+
end