From 6cd7c5eb7eab41c65f2af27d37f05bb61570c58c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 29 Jul 2015 17:10:16 +0200 Subject: kazius lazar predictions working --- test/data_entry.rb | 6 ++++-- test/dataset-long.rb | 18 ++++++------------ test/fminer-long.rb | 13 ++++++------- test/fminer.rb | 21 ++++----------------- test/lazar-fminer.rb | 23 +++++++++-------------- test/lazar-long.rb | 21 +++++++++++++++------ 6 files changed, 44 insertions(+), 58 deletions(-) diff --git a/test/data_entry.rb b/test/data_entry.rb index 7c9e49e..c956352 100644 --- a/test/data_entry.rb +++ b/test/data_entry.rb @@ -43,10 +43,12 @@ class DataEntryTest < MiniTest::Test feature = d.features.first assert_kind_of NumericBioAssay, feature assert_match "EPAFHM.mini.csv", feature.source - assert_equal 0.0113, DataEntry[d.compounds.first, feature] assert_equal 0.0113, d[d.compounds.first, feature] - assert_equal 0.00323, DataEntry[d.compounds[5], feature] assert_equal 0.00323, d[d.compounds[5], feature] + d2 = Dataset.find d.id + p d2.data_entries + assert_equal 0.0113, d2[d.compounds.first, feature] + assert_equal 0.00323, d2[d.compounds[5], feature] end def test_upload_kazius diff --git a/test/dataset-long.rb b/test/dataset-long.rb index cba84dd..fbdeeb9 100644 --- a/test/dataset-long.rb +++ b/test/dataset-long.rb @@ -12,8 +12,7 @@ class DatasetLongTest < MiniTest::Test def test_01_upload_epafhm f = File.join DATA_DIR, "EPAFHM.csv" - d = OpenTox::Dataset.new - d.upload f + d = OpenTox::Dataset.from_csv_file f csv = CSV.read f assert_equal csv.size-1, d.compounds.size assert_equal csv.first.size-1, d.features.size @@ -33,8 +32,7 @@ class DatasetLongTest < MiniTest::Test ] errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ] f = File.join DATA_DIR, "multi_cell_call.csv" - d = OpenTox::Dataset.new - d.upload f + d = OpenTox::Dataset.from_csv_file f csv = CSV.read f assert_equal true, d.features.first.nominal assert_nil d["index"] @@ -49,8 +47,7 @@ class DatasetLongTest < MiniTest::Test def test_03_upload_isscan f = File.join DATA_DIR, "ISSCAN-multi.csv" - d = OpenTox::Dataset.new - d.upload f + d = OpenTox::Dataset.from_csv_file f csv = CSV.read f assert_equal csv.size-1, d.compounds.size assert_equal csv.first.size-1, d.features.size @@ -63,8 +60,7 @@ class DatasetLongTest < MiniTest::Test threads = [] 3.times do |t| threads << Thread.new(t) do |up| - d = OpenTox::Dataset.new - d.upload "#{DATA_DIR}/hamster_carcinogenicity.csv" + d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" assert_equal OpenTox::Dataset, d.class assert_equal 1, d.features.size assert_equal 85, d.compounds.size @@ -81,8 +77,7 @@ class DatasetLongTest < MiniTest::Test def test_05_upload_kazius f = File.join DATA_DIR, "kazius.csv" - d = OpenTox::Dataset.new - d.upload f + d = OpenTox::Dataset.from_csv_file f csv = CSV.read f assert_equal csv.size-1, d.compounds.size assert_equal csv.first.size-1, d.features.size @@ -93,8 +88,7 @@ class DatasetLongTest < MiniTest::Test def test_06_upload_feature_dataset t1 = Time.now f = File.join DATA_DIR, "rat_feature_dataset.csv" - d = OpenTox::Dataset.new - d.upload f + d = OpenTox::Dataset.from_csv_file f t2 = Time.now p "Upload: #{t2-t1}" d2 = OpenTox::Dataset.find d.id diff --git a/test/fminer-long.rb b/test/fminer-long.rb index 187b344..a5027d6 100644 --- a/test/fminer-long.rb +++ b/test/fminer-long.rb @@ -7,14 +7,14 @@ class FminerTest < MiniTest::Test dataset = OpenTox::Dataset.new #multi_cell_call.csv dataset.upload File.join(DATA_DIR,"multi_cell_call.csv") - feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15) + feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset)#, :min_frequency => 15) dataset.delete feature_dataset.delete end def test_fminer_isscan dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"ISSCAN-multi.csv") - feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15) + feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset)#, :min_frequency => 15) assert_equal feature_dataset.compounds.size, dataset.compounds.size p feature_dataset dataset.delete @@ -23,12 +23,11 @@ class FminerTest < MiniTest::Test def test_fminer_kazius dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv") - feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 50) - #feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset, :min_frequency => 200) - #feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15) + # TODO reactivate default settings + feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 20) assert_equal feature_dataset.compounds.size, dataset.compounds.size - p feature_dataset.compounds.size - p feature_dataset.features.size + feature_dataset = Dataset.find feature_dataset.id + assert feature_dataset.data_entries.size, dataset.compounds.size dataset.delete feature_dataset.delete end diff --git a/test/fminer.rb b/test/fminer.rb index e319b5e..649a78e 100644 --- a/test/fminer.rb +++ b/test/fminer.rb @@ -5,29 +5,16 @@ class FminerTest < MiniTest::Test def test_fminer_bbrc dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") refute_nil dataset.id - feature_dataset = OpenTox::Algorithm::Fminer.bbrc :dataset => dataset + feature_dataset = OpenTox::Algorithm::Fminer.bbrc dataset + feature_dataset = Dataset.find feature_dataset.id assert_equal dataset.compounds.size, feature_dataset.compounds.size assert_equal 54, feature_dataset.features.size assert_equal "C-C-C=C", feature_dataset.features.first.smarts compounds = feature_dataset.compounds smarts = feature_dataset.features.collect{|f| f.smarts} match = OpenTox::Algorithm::Descriptor.smarts_count compounds, smarts - p smarts - compounds.each_with_index do |c,i| - p c.smiles - p match[i] - p feature_dataset.feature_values(c) - smarts.each_with_index do |s,j| - #unless match[i][j] == DataEntry.where(:dataset_id => feature_dataset.id, :compound_id => c.id, :feature_id => feature_dataset.features[j]).distinct(:value).first - #p c - #p s - #p feature_dataset.features[j] - #p match[i][j] - #p DataEntry.where(:dataset_id => feature_dataset.id, :compound_id => c.id, :feature_id => feature_dataset.features[j]).distinct(:value) - #end - - #assert_equal match[i][j], feature_dataset[c,feature_dataset.features[j]] - end + feature_dataset.data_entries.each_with_index do |fingerprint,i| + assert_equal match[i], fingerprint end dataset.delete diff --git a/test/lazar-fminer.rb b/test/lazar-fminer.rb index 11fef72..6c0ee41 100644 --- a/test/lazar-fminer.rb +++ b/test/lazar-fminer.rb @@ -3,18 +3,16 @@ require_relative "setup.rb" class LazarFminerTest < MiniTest::Test def test_lazar_fminer - training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") - feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => training_dataset) - #p feature_dataset - model = OpenTox::Model::Lazar.create training_dataset, feature_dataset + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") + feature_dataset = Algorithm::Fminer.bbrc(training_dataset) + model = Model::Lazar.create training_dataset, feature_dataset #feature_dataset = OpenTox::Dataset.find model.feature_dataset_id - p model assert_equal training_dataset.compounds.size, feature_dataset.compounds.size assert_equal 54, feature_dataset.features.size - #feature_dataset.data_entries.each do |e| - #assert_equal e.size, feature_dataset.features.size - #end - assert_equal '[#6&A]-[#6&A]-[#6&A]=[#6&A]', feature_dataset.features.first.smarts + feature_dataset.data_entries.each do |e| + assert_equal e.size, feature_dataset.features.size + end + assert_equal 'C-C-C=C', feature_dataset.features.first.smarts [ { :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"), @@ -34,14 +32,11 @@ class LazarFminerTest < MiniTest::Test end # make a dataset prediction - compound_dataset = OpenTox::Dataset.from_sdf File.join(DATA_DIR,"EPAFHM.mini.csv") - #assert_equal compound_dataset.uri.uri?, true + compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") prediction = model.predict :dataset => compound_dataset assert_equal compound_dataset.compounds, prediction.compounds - #prediction = OpenTox::Dataset.new prediction_uri - #assert_equal prediction.uri.uri?, true # cleanup - [dataset,model,feature_dataset,compound_dataset].each{|o| o.delete} + [training_dataset,model,feature_dataset,compound_dataset].each{|o| o.delete} end end diff --git a/test/lazar-long.rb b/test/lazar-long.rb index 8f59ec3..487a015 100644 --- a/test/lazar-long.rb +++ b/test/lazar-long.rb @@ -5,7 +5,7 @@ class LazarExtendedTest < MiniTest::Test def test_lazar_bbrc_ham_minfreq dataset = OpenTox::MeasuredDataset.new dataset.upload File.join(DATA_DIR,"hamster_carcinogenicity.csv") - model = OpenTox::Model::Lazar.create OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset, :min_frequency => 5) + model = OpenTox::Model::Lazar.create OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 5) feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id assert_equal dataset.compounds.size, feature_dataset.compounds.size assert_equal 41, feature_dataset.features.size @@ -48,13 +48,22 @@ class LazarExtendedTest < MiniTest::Test end def test_lazar_kazius - # TODO find a solution for feature datasets > 16M (size limit in mongodb) - dataset = OpenTox::MeasuredDataset.from_csv_file File.join(DATA_DIR,"kazius.csv") - feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset, :min_frequency => 100) + dataset = Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv") + feature_dataset = Algorithm::Fminer.bbrc(dataset, :min_frequency => 100) assert_equal feature_dataset.compounds.size, dataset.compounds.size - model = OpenTox::Model::Lazar.create feature_dataset + model = Model::Lazar.create dataset, feature_dataset + #model = Model::Lazar.find('55b8e9c07a78383f6700017e') p model.id - dataset.delete + #prediction_times = [] + 2.times do + compound = Compound.from_smiles("Clc1ccccc1NN") + prediction = model.predict :compound => compound + p prediction.data_entries + assert_equal "1", prediction.data_entries.first.first + assert_in_delta 0.019858401199860445, prediction.data_entries.first.last, 0.001 + end + + #dataset.delete #feature_dataset.delete end -- cgit v1.2.3