summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-01 18:02:45 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-01 18:02:45 +0200
commitd0df314e5f78214917fd0ea8ed3b213872c2a4d2 (patch)
treee8c3266602c025cd8bb42f1d92f51924e56e592c
parent8edf76d23b169387b4652c5dd1571ba60b040f96 (diff)
50 times faster bbrc setup by eliminating @fminer.add_fminer_data
-rw-r--r--standalone.rb7
-rw-r--r--test/lazar-fminer.rb27
-rw-r--r--test/lazar-long.rb41
3 files changed, 39 insertions, 36 deletions
diff --git a/standalone.rb b/standalone.rb
index 907fa2c..5c0897f 100644
--- a/standalone.rb
+++ b/standalone.rb
@@ -1,5 +1,6 @@
require 'minitest/autorun'
[
+ "feature",
"algorithm",
"compound",
"dataset-long",
@@ -7,13 +8,13 @@ require 'minitest/autorun'
"descriptor-long",
"descriptor",
"edit_objects",
- #"error",
+ "error",
"fminer",
"lazar-fminer",
"lazar-long",
#"lazar-models",
- #"lazar-physchem-long",
- #"lazar-physchem-short",
+ "lazar-physchem-long",
+ "lazar-physchem-short",
#"lazarweb",
#"task",
#"validation-long",
diff --git a/test/lazar-fminer.rb b/test/lazar-fminer.rb
index 6c0ee41..04d1704 100644
--- a/test/lazar-fminer.rb
+++ b/test/lazar-fminer.rb
@@ -6,7 +6,6 @@ class LazarFminerTest < MiniTest::Test
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
feature_dataset = Algorithm::Fminer.bbrc(training_dataset)
model = Model::Lazar.create training_dataset, feature_dataset
- #feature_dataset = OpenTox::Dataset.find model.feature_dataset_id
assert_equal training_dataset.compounds.size, feature_dataset.compounds.size
assert_equal 54, feature_dataset.features.size
feature_dataset.data_entries.each do |e|
@@ -17,25 +16,33 @@ class LazarFminerTest < MiniTest::Test
[ {
:compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
:prediction => "false",
- :confidence => 0.25281385281385277
+ :confidence => 0.25281385281385277,
+ :nr_neighbors => 11
},{
:compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
:prediction => "false",
- :confidence => 0.3639589577089577
- } ].each do |example|
- prediction_dataset = model.predict :compound => example[:compound]
+ :confidence => 0.3639589577089577,
+ :nr_neighbors => 14
+ }, {
+ :compound => Compound.from_smiles('OCCCCCCCC\C=C/CCCCCCCC'),
+ :prediction => "false",
+ :confidence => 0.5555555555555556,
+ :nr_neighbors => 1
+ }].each do |example|
+ prediction = model.predict example[:compound]
- prediction = prediction_dataset.data_entries.first.first
- confidence = prediction_dataset.data_entries.first.last
- assert_equal example[:prediction], prediction
- assert_equal example[:confidence], confidence
+ assert_equal example[:prediction], prediction[:value]
+ assert_equal example[:confidence], prediction[:confidence]
+ assert_equal example[:nr_neighbors], prediction[:neighbors].size
end
# make a dataset prediction
compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
- prediction = model.predict :dataset => compound_dataset
+ prediction = model.predict compound_dataset
assert_equal compound_dataset.compounds, prediction.compounds
+ assert_match /No neighbors/, prediction.data_entries[7][2]
+ assert_equal "measured", prediction.data_entries[14][1]
# cleanup
[training_dataset,model,feature_dataset,compound_dataset].each{|o| o.delete}
end
diff --git a/test/lazar-long.rb b/test/lazar-long.rb
index 487a015..b69adfa 100644
--- a/test/lazar-long.rb
+++ b/test/lazar-long.rb
@@ -3,30 +3,27 @@ require_relative "setup.rb"
class LazarExtendedTest < MiniTest::Test
def test_lazar_bbrc_ham_minfreq
- dataset = OpenTox::MeasuredDataset.new
- dataset.upload File.join(DATA_DIR,"hamster_carcinogenicity.csv")
- model = OpenTox::Model::Lazar.create OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 5)
- feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = OpenTox::Model::Lazar.create dataset, OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 5)
+ feature_dataset = OpenTox::Dataset.find model.feature_dataset_id
assert_equal dataset.compounds.size, feature_dataset.compounds.size
assert_equal 41, feature_dataset.features.size
- assert_equal '[#7&A]-[#6&A]=[#7&A]', feature_dataset.features.first.title
+ assert_equal 'N-C=N', feature_dataset.features.first.smarts
compound = OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H")
- prediction_dataset = model.predict :compound => compound
- prediction = prediction_dataset.data_entries.first
- assert_equal "false", prediction.first
- assert_equal 0.12380952380952381, prediction.last
+ prediction = model.predict compound
+ assert_equal "false", prediction[:value]
+ assert_equal 0.12380952380952381, prediction[:confidence]
dataset.delete
model.delete
feature_dataset.delete
- prediction_dataset.delete
end
def test_lazar_bbrc_large_ds
# TODO fminer crashes with these settings
- dataset = OpenTox::MeasuredDataset.new
- dataset.upload File.join(DATA_DIR,"multi_cell_call_no_dup.csv")
- feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15)
- model = OpenTox::Model::Lazar.create feature_dataset
+ skip "it seems that fminer aborts without further notice"
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call_no_dup.csv")
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc dataset#, :min_frequency => 15)
+ model = OpenTox::Model::Lazar.create dataset, feature_dataset
model.save
p model.id
feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id
@@ -34,7 +31,7 @@ class LazarExtendedTest < MiniTest::Test
assert_equal 52, feature_dataset.features.size
assert_equal '[#17&A]-[#6&A]', feature_dataset.features.first.title
compound = OpenTox::Compound.from_inchi("InChI=1S/C10H9NO2S/c1-8-2-4-9(5-3-8)13-6-10(12)11-7-14/h2-5H,6H2,1H3")
- prediction_dataset = model.predict :compound => compound
+ prediction_dataset = model.predict compound
prediction = prediction_dataset.data_entries.first
assert_in_delta 0.025, prediction[:confidence], 0.001
#assert_equal 0.025885845574483608, prediction[:confidence]
@@ -56,15 +53,13 @@ class LazarExtendedTest < MiniTest::Test
p model.id
#prediction_times = []
2.times do
- compound = Compound.from_smiles("Clc1ccccc1NN")
- prediction = model.predict :compound => compound
- p prediction.data_entries
- assert_equal "1", prediction.data_entries.first.first
- assert_in_delta 0.019858401199860445, prediction.data_entries.first.last, 0.001
+ compound = Compound.from_smiles("Clc1ccccc1NN")
+ prediction = model.predict compound
+ assert_equal "1", prediction[:value]
+ assert_in_delta 0.019858401199860445, prediction[:confidence], 0.001
end
-
- #dataset.delete
- #feature_dataset.delete
+ dataset.delete
+ feature_dataset.delete
end
end