From ca2bb0f90335b1f2c4ecc28ee423e85b281ffcf0 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 4 Nov 2015 17:50:17 +0100 Subject: neighbor search delegated to database backend --- test/compound.rb | 14 ++++++++++++-- test/dataset-long.rb | 1 + test/dataset.rb | 6 ++---- test/fminer-long.rb | 3 +++ test/lazar-classification.rb | 42 ++++++++++++++++++++++++++++++++++++++++++ test/lazar-fminer.rb | 1 + test/lazar-long.rb | 23 ++++++++++++++++++++++- test/lazar-regression.rb | 4 ++-- test/prediction_models.rb | 11 +---------- test/validation.rb | 26 ++++++++++++++++---------- 10 files changed, 102 insertions(+), 29 deletions(-) create mode 100644 test/lazar-classification.rb (limited to 'test') diff --git a/test/compound.rb b/test/compound.rb index 22c152b..ff20c1c 100644 --- a/test/compound.rb +++ b/test/compound.rb @@ -162,7 +162,7 @@ print c.sdf end def test_fingerprint_db_neighbors - skip + #skip training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv") [ "CC(=O)CC(C)C#N", @@ -170,8 +170,18 @@ print c.sdf "C(=O)CC(C)C#N", ].each do |smi| c = OpenTox::Compound.from_smiles smi + t = Time.now neighbors = c.db_neighbors(:training_dataset_id => training_dataset.id, :min_sim => 0.2) - p neighbors + p Time.now - t + t = Time.now + neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :training_dataset_id => training_dataset.id, :min_sim => 0.2}) + p Time.now - t + p neighbors.size + p neighbors2.size + #p neighbors + #p neighbors2 + #p neighbors2 - neighbors + #assert_equal neighbors, neighbors2 end end end diff --git a/test/dataset-long.rb b/test/dataset-long.rb index 5c8dfb8..49b61df 100644 --- a/test/dataset-long.rb +++ b/test/dataset-long.rb @@ -86,6 +86,7 @@ class DatasetLongTest < MiniTest::Test end def test_upload_feature_dataset + skip t = Time.now f = File.join DATA_DIR, "rat_feature_dataset.csv" d = Dataset.from_csv_file f diff --git a/test/dataset.rb b/test/dataset.rb index 4f1e885..1814081 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -127,7 +127,7 @@ class DatasetTest < MiniTest::Test original_csv.shift csv.each_with_index do |row,i| compound = Compound.from_smiles row.shift - original_compound = Compound.from_smiles original_csv[i].shift + original_compound = Compound.from_smiles original_csv[i].shift.strip assert_equal original_compound.inchi, compound.inchi row.each_with_index do |v,j| if v.numeric? @@ -142,7 +142,6 @@ class DatasetTest < MiniTest::Test def test_from_csv d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - p d assert_equal Dataset, d.class assert_equal 1, d.features.size assert_equal 85, d.compounds.size @@ -170,8 +169,7 @@ class DatasetTest < MiniTest::Test def test_from_csv2 File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") } dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv" - p dataset.warnings - assert_equal "Cannot parse SMILES compound ' ' at position 3, all entries are ignored.", dataset.warnings.join + assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join File.delete "#{DATA_DIR}/temp_test.csv" dataset.features.each{|f| feature = Feature.find f.id; feature.delete} dataset.delete diff --git a/test/fminer-long.rb b/test/fminer-long.rb index 0f202b4..845ed71 100644 --- a/test/fminer-long.rb +++ b/test/fminer-long.rb @@ -3,6 +3,7 @@ require_relative "setup.rb" class FminerTest < MiniTest::Test def test_fminer_multicell + skip #skip "multicell segfaults" # TODO aborts, probably fminer # or OpenBabel segfault @@ -15,6 +16,7 @@ class FminerTest < MiniTest::Test end def test_fminer_isscan + skip dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"ISSCAN-multi.csv") feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset)#, :min_frequency => 15) assert_equal feature_dataset.compounds.size, dataset.compounds.size @@ -25,6 +27,7 @@ class FminerTest < MiniTest::Test end def test_fminer_kazius + skip dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv") # TODO reactivate default settings feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 20) diff --git a/test/lazar-classification.rb b/test/lazar-classification.rb new file mode 100644 index 0000000..e8b2181 --- /dev/null +++ b/test/lazar-classification.rb @@ -0,0 +1,42 @@ +require_relative "setup.rb" + +class LazarClassificationTest < MiniTest::Test + + def test_lazar_classification + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") + model = Model::LazarClassification.create training_dataset#, feature_dataset + #assert_equal 'C-C-C=C', feature_dataset.features.first.smarts + + [ { + :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"), + :prediction => "false", + :confidence => 0.25281385281385277, + :nr_neighbors => 11 + },{ + :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"), + :prediction => "false", + :confidence => 0.3639589577089577, + :nr_neighbors => 14 + } ].each do |example| + prediction = model.predict example[:compound] + assert_equal example[:prediction], prediction[:value] + #assert_equal example[:confidence], prediction[:confidence] + #assert_equal example[:nr_neighbors], prediction[:neighbors].size + end + + compound = Compound.from_smiles "CCO" + prediction = model.predict compound + assert_equal ["false"], prediction[:database_activities] + assert_equal "true", prediction[:value] + + # make a dataset prediction + compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") + prediction = model.predict compound_dataset + assert_equal compound_dataset.compounds, prediction.compounds + + assert_equal "Cound not find similar compounds.", prediction.data_entries[7][2] + assert_equal "measured", prediction.data_entries[14][1] + # cleanup + [training_dataset,model,compound_dataset].each{|o| o.delete} + end +end diff --git a/test/lazar-fminer.rb b/test/lazar-fminer.rb index 41e1071..9e024a1 100644 --- a/test/lazar-fminer.rb +++ b/test/lazar-fminer.rb @@ -3,6 +3,7 @@ require_relative "setup.rb" class LazarFminerTest < MiniTest::Test def test_lazar_fminer + skip training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") model = Model::LazarFminerClassification.create training_dataset#, feature_dataset feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id] diff --git a/test/lazar-long.rb b/test/lazar-long.rb index 92d7d5a..525b96e 100644 --- a/test/lazar-long.rb +++ b/test/lazar-long.rb @@ -3,6 +3,7 @@ require_relative "setup.rb" class LazarExtendedTest < MiniTest::Test def test_lazar_bbrc_ham_minfreq + skip dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") model = Model::LazarFminerClassification.create(dataset, :min_frequency => 5) feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id] @@ -21,6 +22,7 @@ class LazarExtendedTest < MiniTest::Test end def test_lazar_bbrc_large_ds + skip dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call_no_dup.csv") model = Model::LazarFminerClassification.create dataset feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id] @@ -44,7 +46,8 @@ class LazarExtendedTest < MiniTest::Test feature_dataset.delete end - def test_lazar_kazius + def test_lazar_fminer_kazius + skip t = Time.now dataset = Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv") p "Dataset upload: #{Time.now-t}" @@ -68,4 +71,22 @@ class LazarExtendedTest < MiniTest::Test #feature_dataset.delete end + def test_lazar_kazius + t = Time.now + dataset = Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv") + p "Dataset upload: #{Time.now-t}" + t = Time.now + model = Model::LazarClassification.create(dataset) + p "Feature mining: #{Time.now-t}" + t = Time.now + 2.times do + compound = Compound.from_smiles("Clc1ccccc1NN") + prediction = model.predict compound + #p prediction + assert_equal "1", prediction[:value] + #assert_in_delta 0.019858401199860445, prediction[:confidence], 0.001 + end + dataset.delete + end + end diff --git a/test/lazar-regression.rb b/test/lazar-regression.rb index 4f5a332..c1dc9b9 100644 --- a/test/lazar-regression.rb +++ b/test/lazar-regression.rb @@ -8,7 +8,7 @@ class LazarRegressionTest < MiniTest::Test compound = Compound.from_smiles "CC(C)(C)CN" prediction = model.predict compound assert_equal 7.2, prediction[:value].round(1) - assert_equal 91, prediction[:neighbors].size + assert_equal 88, prediction[:neighbors].size end def test_mpd_fingerprints @@ -17,7 +17,7 @@ class LazarRegressionTest < MiniTest::Test model.neighbor_algorithm_parameters[:type] = "MP2D" compound = Compound.from_smiles "CCCSCCSCC" prediction = model.predict compound - assert_equal 0.02, prediction[:value].round(2) + assert_equal 0.04, prediction[:value].round(2) assert_equal 3, prediction[:neighbors].size end diff --git a/test/prediction_models.rb b/test/prediction_models.rb index 1b9e788..067c3c8 100644 --- a/test/prediction_models.rb +++ b/test/prediction_models.rb @@ -4,22 +4,13 @@ class PredictionModelTest < MiniTest::Test def test_prediction_model pm = Model::Prediction.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - #dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - #model = Model::LazarFminerClassification.create dataset - #cv = ClassificationCrossValidation.create model - #metadata = JSON.parse(File.read("#{DATA_DIR}/hamster_carcinogenicity.json")) - - #metadata[:model_id] = model.id - #metadata[:crossvalidation_id] = cv.id - #pm = Model::Prediction.new(metadata) - #pm.save [:endpoint,:species,:source].each do |p| refute_empty pm[p] end assert pm.classification? refute pm.regression? pm.crossvalidations.each do |cv| - assert cv.accuracy > 0.75 + assert cv.accuracy > 0.75, "Crossvalidation accuracy (#{cv.accuracy}) should be larger than 0.75. This may happen due to an unfavorable training/test set split." end prediction = pm.predict Compound.from_smiles("CCCC(NN)C") assert_equal "true", prediction[:value] diff --git a/test/validation.rb b/test/validation.rb index 6764a32..7de944c 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -3,6 +3,7 @@ require_relative "setup.rb" class ValidationTest < MiniTest::Test def test_fminer_crossvalidation + skip dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::LazarFminerClassification.create dataset cv = ClassificationCrossValidation.create model @@ -15,12 +16,13 @@ class ValidationTest < MiniTest::Test dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::LazarClassification.create dataset#, features cv = ClassificationCrossValidation.create model - assert cv.accuracy > 0.7 - File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot} - `inkview tmp.svg` + #p cv + assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7" + #File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot} + #`inkview tmp.svg` p cv.nr_unpredicted p cv.accuracy - #assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy should be larger than unweighted accuracy." + assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) should be larger than unweighted accuracy (#{cv.accuracy}) ." end def test_default_regression_crossvalidation @@ -28,11 +30,11 @@ class ValidationTest < MiniTest::Test model = Model::LazarRegression.create dataset cv = RegressionCrossValidation.create model #cv = RegressionCrossValidation.find '561503262b72ed54fd000001' - p cv.id - File.open("tmp.svg","w+"){|f| f.puts cv.correlation_plot} - `inkview tmp.svg` - File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot} - `inkview tmp.svg` + #p cv.id + #File.open("tmp.svg","w+"){|f| f.puts cv.correlation_plot} + #`inkview tmp.svg` + #File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot} + #`inkview tmp.svg` #puts cv.misclassifications.to_yaml p cv.rmse @@ -91,9 +93,13 @@ class ValidationTest < MiniTest::Test model.save cv = ClassificationCrossValidation.create model params = model.neighbor_algorithm_parameters + params.delete :training_dataset_id params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string + cv.validations.each do |validation| - assert_equal params, validation.model.neighbor_algorithm_parameters + validation_params = validation.model.neighbor_algorithm_parameters + validation_params.delete "training_dataset_id" + assert_equal params, validation_params end end -- cgit v1.2.3