summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-13 17:34:31 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-13 17:34:31 +0200
commitad7ec6a1e33f69557fe64371581d5f42a65ecaa8 (patch)
tree7bb819b950790d34fb4bc9746f67b71298f2d31c
parent9e99495ecbff147218023c136bade9e56a502fed (diff)
classification fixed
-rw-r--r--lib/model.rb63
-rw-r--r--test/model-classification.rb (renamed from test/classification.rb)16
-rw-r--r--test/model.rb7
-rw-r--r--test/nanoparticles.rb13
4 files changed, 60 insertions, 39 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 7029c31..b949042 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -57,7 +57,10 @@ module OpenTox
if substance_classes.first == "OpenTox::Compound"
model.algorithms = {
- :descriptors => ['MP2D'],
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D",
+ },
:similarity => {
:method => "Algorithm::Similarity.tanimoto",
:min => 0.1
@@ -77,7 +80,10 @@ module OpenTox
elsif substance_classes.first == "OpenTox::Nanoparticle"
model.algorithms = {
- :descriptors => ["P-CHEM"],
+ :descriptors => {
+ :method => "properties",
+ :category => "P-CHEM",
+ },
#:descriptors => ["P-CHEM","Proteomics"],
:similarity => {
:method => "Algorithm::Similarity.weighted_cosine",
@@ -115,34 +121,41 @@ module OpenTox
end if values
end
+ descriptor_method = model.algorithms[:descriptors][:method]
+ case descriptor_method
# parse fingerprints
- if model.fingerprints?
- model.algorithms[:descriptors].each do |type|
- model.substances.each_with_index do |s,i|
- model.fingerprints[i] ||= []
- model.fingerprints[i] += s.fingerprint(type)
- model.fingerprints[i].uniq!
- end
+ when "fingerprint"
+ type = model.algorithms[:descriptors][:type]
+ model.substances.each_with_index do |s,i|
+ model.fingerprints[i] ||= []
+ model.fingerprints[i] += s.fingerprint(type)
+ model.fingerprints[i].uniq!
end
model.descriptor_ids = model.fingerprints.flatten.uniq
model.descriptor_ids.each do |d|
- # resulting model may break BSON size limit (e.g. f Kazius dataset
+ # resulting model may break BSON size limit (e.g. f Kazius dataset)
model.independent_variables << model.substance_ids.collect_with_index{|s,i| model.fingerprints[i].include? d} if model.algorithms[:prediction][:method].match /Caret/
end
- else
- # parse independent_variables
- if (model.algorithms[:descriptors] & [PhysChem::OPENBABEL,PhysChem::CDK,PhysChem::JOELIB]).empty?
- properties = model.substances.collect { |s| s.properties }
- all_property_ids = properties.collect{|p| p.keys}.flatten.uniq
- model.descriptor_ids = all_property_ids.select{|id| model.algorithms[:descriptors].include? Feature.find(id).category }
- model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}}
-
- # calculate physchem properties
- else
- properties = model.substances.collect { |s| s.calculate_properties(model.algorithms[:descriptors]) }
- model.descriptor_ids = properties.collect{|p| p.keys}.flatten.uniq
- model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i]}}
+ # calculate physchem properties
+ when "calculate_properties"
+ features = model.algorithms[:descriptors][:features]
+ model.descriptor_ids = features.collect{|f| f.id.to_s}
+ model.algorithms[:descriptors].delete(:features)
+ model.algorithms[:descriptors].delete(:type)
+ model.substances.each_with_index do |s,i|
+ s.calculate_properties(features).each_with_index do |v,j|
+ model.independent_variables[j] ||= []
+ model.independent_variables[j][i] = v
+ end
end
+ # parse independent_variables
+ when "properties"
+ properties = model.substances.collect { |s| s.properties }
+ all_property_ids = properties.collect{|p| p.keys}.flatten.uniq
+ model.descriptor_ids = all_property_ids.select{|id| model.algorithms[:descriptors].include? Feature.find(id).category }
+ model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}}
+ else
+ bad_request_error "Descriptor method '#{descriptor_method}' not implemented."
end
if model.algorithms[:feature_selection] and model.algorithms[:feature_selection][:method]
@@ -165,7 +178,7 @@ module OpenTox
case algorithms[:similarity][:method]
when /tanimoto/ # binary features
- similarity_descriptors = algorithms[:descriptors].collect{|type| substance.fingerprint(type)}.flatten.uniq
+ similarity_descriptors = substance.fingerprint algorithms[:descriptors][:type]
# TODO this excludes descriptors only present in the query substance
query_descriptors = descriptor_ids.collect{|id| similarity_descriptors.include? id}
when /euclid|cosine/ # quantitative features
@@ -295,7 +308,7 @@ module OpenTox
end
def fingerprints?
- algorithms[:similarity][:method].match("tanimoto") ? true : false
+ algorithms[:descriptors][:method] == "fingerprint" ? true : false
end
end
diff --git a/test/classification.rb b/test/model-classification.rb
index c670bb5..1424f6a 100644
--- a/test/classification.rb
+++ b/test/model-classification.rb
@@ -4,7 +4,10 @@ class LazarClassificationTest < MiniTest::Test
def test_classification_default
algorithms = {
- :descriptors => [ "MP2D" ],
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D"
+ },
:similarity => {
:method => "Algorithm::Similarity.tanimoto",
:min => 0.1
@@ -55,7 +58,10 @@ class LazarClassificationTest < MiniTest::Test
def test_classification_parameters
algorithms = {
- :descriptors => ['MACCS'],
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MACCS"
+ },
:similarity => {
:min => 0.4
},
@@ -86,7 +92,11 @@ class LazarClassificationTest < MiniTest::Test
training_dataset.delete
end
- def test_fingerprint_feature_selection
+ def test_caret_classification
+ skip
+ end
+
+ def test_fingerprint_chisq_feature_selection
skip
end
diff --git a/test/model.rb b/test/model.rb
index 322ad90..027efe4 100644
--- a/test/model.rb
+++ b/test/model.rb
@@ -49,7 +49,10 @@ class ModelTest < MiniTest::Test
def test_physchem_regression
algorithms = {
- :descriptors => [PhysChem::OPENBABEL],
+ :descriptors => {
+ :method => "calculate_properties",
+ :features => PhysChem.openbabel_descriptors,
+ },
:similarity => {
:method => "Algorithm::Similarity.cosine",
}
@@ -60,9 +63,9 @@ class ModelTest < MiniTest::Test
assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
assert_equal "Algorithm::Similarity.cosine", model.algorithms[:similarity][:method]
assert_equal 0.1, model.algorithms[:similarity][:min]
+ algorithms[:descriptors].delete :features
assert_equal algorithms[:descriptors], model.algorithms[:descriptors]
prediction = model.predict training_dataset.substances[10]
- p prediction
refute_nil prediction[:value]
# TODO test predictin
end
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index c489cb7..9a67e63 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -1,6 +1,5 @@
require_relative "setup.rb"
-
class NanoparticleTest < MiniTest::Test
include OpenTox::Validation
@@ -13,7 +12,7 @@ class NanoparticleTest < MiniTest::Test
@prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
end
- def test_create_model
+ def test_nanoparticle_model
model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
nanoparticle = @training_dataset.nanoparticles[-34]
prediction = model.predict nanoparticle
@@ -23,6 +22,8 @@ class NanoparticleTest < MiniTest::Test
model.delete
end
+ # validations
+
def test_validate_default_nanoparticle_model
model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
cv = CrossValidation.create model
@@ -77,15 +78,9 @@ class NanoparticleTest < MiniTest::Test
refute_nil cv.rmse
end
- def test_export
- skip
- Dataset.all.each do |d|
- puts d.to_csv
- end
- end
def test_import_ld
- skip
+ skip # Ambit JSON-LD export defunct
dataset_ids = Import::Enanomapper.import_ld
end
end