From d0df314e5f78214917fd0ea8ed3b213872c2a4d2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 1 Aug 2015 18:02:45 +0200 Subject: 50 times faster bbrc setup by eliminating @fminer.add_fminer_data --- standalone.rb | 7 ++++--- test/lazar-fminer.rb | 27 +++++++++++++++++---------- test/lazar-long.rb | 41 ++++++++++++++++++----------------------- 3 files changed, 39 insertions(+), 36 deletions(-) diff --git a/standalone.rb b/standalone.rb index 907fa2c..5c0897f 100644 --- a/standalone.rb +++ b/standalone.rb @@ -1,5 +1,6 @@ require 'minitest/autorun' [ + "feature", "algorithm", "compound", "dataset-long", @@ -7,13 +8,13 @@ require 'minitest/autorun' "descriptor-long", "descriptor", "edit_objects", - #"error", + "error", "fminer", "lazar-fminer", "lazar-long", #"lazar-models", - #"lazar-physchem-long", - #"lazar-physchem-short", + "lazar-physchem-long", + "lazar-physchem-short", #"lazarweb", #"task", #"validation-long", diff --git a/test/lazar-fminer.rb b/test/lazar-fminer.rb index 6c0ee41..04d1704 100644 --- a/test/lazar-fminer.rb +++ b/test/lazar-fminer.rb @@ -6,7 +6,6 @@ class LazarFminerTest < MiniTest::Test training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") feature_dataset = Algorithm::Fminer.bbrc(training_dataset) model = Model::Lazar.create training_dataset, feature_dataset - #feature_dataset = OpenTox::Dataset.find model.feature_dataset_id assert_equal training_dataset.compounds.size, feature_dataset.compounds.size assert_equal 54, feature_dataset.features.size feature_dataset.data_entries.each do |e| @@ -17,25 +16,33 @@ class LazarFminerTest < MiniTest::Test [ { :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"), :prediction => "false", - :confidence => 0.25281385281385277 + :confidence => 0.25281385281385277, + :nr_neighbors => 11 },{ :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"), :prediction => "false", - :confidence => 0.3639589577089577 - } ].each do |example| - prediction_dataset = model.predict :compound => example[:compound] + :confidence => 0.3639589577089577, + :nr_neighbors => 14 + }, { + :compound => Compound.from_smiles('OCCCCCCCC\C=C/CCCCCCCC'), + :prediction => "false", + :confidence => 0.5555555555555556, + :nr_neighbors => 1 + }].each do |example| + prediction = model.predict example[:compound] - prediction = prediction_dataset.data_entries.first.first - confidence = prediction_dataset.data_entries.first.last - assert_equal example[:prediction], prediction - assert_equal example[:confidence], confidence + assert_equal example[:prediction], prediction[:value] + assert_equal example[:confidence], prediction[:confidence] + assert_equal example[:nr_neighbors], prediction[:neighbors].size end # make a dataset prediction compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") - prediction = model.predict :dataset => compound_dataset + prediction = model.predict compound_dataset assert_equal compound_dataset.compounds, prediction.compounds + assert_match /No neighbors/, prediction.data_entries[7][2] + assert_equal "measured", prediction.data_entries[14][1] # cleanup [training_dataset,model,feature_dataset,compound_dataset].each{|o| o.delete} end diff --git a/test/lazar-long.rb b/test/lazar-long.rb index 487a015..b69adfa 100644 --- a/test/lazar-long.rb +++ b/test/lazar-long.rb @@ -3,30 +3,27 @@ require_relative "setup.rb" class LazarExtendedTest < MiniTest::Test def test_lazar_bbrc_ham_minfreq - dataset = OpenTox::MeasuredDataset.new - dataset.upload File.join(DATA_DIR,"hamster_carcinogenicity.csv") - model = OpenTox::Model::Lazar.create OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 5) - feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") + model = OpenTox::Model::Lazar.create dataset, OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 5) + feature_dataset = OpenTox::Dataset.find model.feature_dataset_id assert_equal dataset.compounds.size, feature_dataset.compounds.size assert_equal 41, feature_dataset.features.size - assert_equal '[#7&A]-[#6&A]=[#7&A]', feature_dataset.features.first.title + assert_equal 'N-C=N', feature_dataset.features.first.smarts compound = OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H") - prediction_dataset = model.predict :compound => compound - prediction = prediction_dataset.data_entries.first - assert_equal "false", prediction.first - assert_equal 0.12380952380952381, prediction.last + prediction = model.predict compound + assert_equal "false", prediction[:value] + assert_equal 0.12380952380952381, prediction[:confidence] dataset.delete model.delete feature_dataset.delete - prediction_dataset.delete end def test_lazar_bbrc_large_ds # TODO fminer crashes with these settings - dataset = OpenTox::MeasuredDataset.new - dataset.upload File.join(DATA_DIR,"multi_cell_call_no_dup.csv") - feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15) - model = OpenTox::Model::Lazar.create feature_dataset + skip "it seems that fminer aborts without further notice" + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call_no_dup.csv") + feature_dataset = OpenTox::Algorithm::Fminer.bbrc dataset#, :min_frequency => 15) + model = OpenTox::Model::Lazar.create dataset, feature_dataset model.save p model.id feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id @@ -34,7 +31,7 @@ class LazarExtendedTest < MiniTest::Test assert_equal 52, feature_dataset.features.size assert_equal '[#17&A]-[#6&A]', feature_dataset.features.first.title compound = OpenTox::Compound.from_inchi("InChI=1S/C10H9NO2S/c1-8-2-4-9(5-3-8)13-6-10(12)11-7-14/h2-5H,6H2,1H3") - prediction_dataset = model.predict :compound => compound + prediction_dataset = model.predict compound prediction = prediction_dataset.data_entries.first assert_in_delta 0.025, prediction[:confidence], 0.001 #assert_equal 0.025885845574483608, prediction[:confidence] @@ -56,15 +53,13 @@ class LazarExtendedTest < MiniTest::Test p model.id #prediction_times = [] 2.times do - compound = Compound.from_smiles("Clc1ccccc1NN") - prediction = model.predict :compound => compound - p prediction.data_entries - assert_equal "1", prediction.data_entries.first.first - assert_in_delta 0.019858401199860445, prediction.data_entries.first.last, 0.001 + compound = Compound.from_smiles("Clc1ccccc1NN") + prediction = model.predict compound + assert_equal "1", prediction[:value] + assert_in_delta 0.019858401199860445, prediction[:confidence], 0.001 end - - #dataset.delete - #feature_dataset.delete + dataset.delete + feature_dataset.delete end end -- cgit v1.2.3