diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/classification.rb | 12 | ||||
-rw-r--r-- | test/dataset.rb | 50 | ||||
-rw-r--r-- | test/nanoparticles.rb | 34 | ||||
-rw-r--r-- | test/prediction_models.rb | 1 | ||||
-rw-r--r-- | test/validation.rb | 22 |
5 files changed, 78 insertions, 41 deletions
diff --git a/test/classification.rb b/test/classification.rb index bedbe14..7412714 100644 --- a/test/classification.rb +++ b/test/classification.rb @@ -30,12 +30,14 @@ class LazarClassificationTest < MiniTest::Test # make a dataset prediction compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") - prediction = model.predict compound_dataset - assert_equal compound_dataset.compounds, prediction.compounds + prediction_dataset = model.predict compound_dataset + assert_equal compound_dataset.compounds, prediction_dataset.compounds - assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3] - assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3] + cid = prediction_dataset.compounds[7].id.to_s + assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warning] + cid = prediction_dataset.compounds[9].id.to_s + assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction_dataset.predictions[cid][:warning] # cleanup - [training_dataset,model,compound_dataset].each{|o| o.delete} + [training_dataset,model,compound_dataset,prediction_dataset].each{|o| o.delete} end end diff --git a/test/dataset.rb b/test/dataset.rb index 297251e..a7b8769 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -36,38 +36,34 @@ class DatasetTest < MiniTest::Test assert_equal Dataset, d.class d.name = "Create dataset test" - # features not set - # << operator was removed for efficiency reasons (CH) - #assert_raises BadRequestError do - # d << [Compound.from_smiles("c1ccccc1NN"), 1,2] - #end - # add data entries - d.features = ["test1", "test2"].collect do |title| + features = ["test1", "test2"].collect do |title| f = Feature.new f.name = title f.numeric = true f.save f end - - # wrong feature size - # << operator was removed for efficiency reasons (CH) - #assert_raises BadRequestError do - # d << [Compound.from_smiles("c1ccccc1NN"), 1,2,3] - #end # manual low-level insertions without consistency checks for runtime efficiency + compounds = ["c1ccccc1NN", "CC(C)N", "C1C(C)CCCC1"].collect do |smi| + Compound.from_smiles smi + end data_entries = [] - d.compound_ids << Compound.from_smiles("c1ccccc1NN").id data_entries << [1,2] - d.compound_ids << Compound.from_smiles("CC(C)N").id data_entries << [4,5] - d.compound_ids << Compound.from_smiles("C1C(C)CCCC1").id data_entries << [6,7] - d.data_entries = data_entries + compounds.each_with_index do |c,i| + features.each_with_index do |f,j| + d.data_entries[c.id.to_s] ||= {} + d.data_entries[c.id.to_s][f.id.to_s] ||= [] + d.data_entries[c.id.to_s][f.id.to_s] << data_entries[i][j] + end + end + assert_equal 3, d.compounds.size assert_equal 2, d.features.size + p d.data_entries assert_equal [[1,2],[4,5],[6,7]], d.data_entries d.save # check if dataset has been saved correctly @@ -89,8 +85,14 @@ class DatasetTest < MiniTest::Test assert_equal "multicolumn", new_dataset.name # get features assert_equal 6, new_dataset.features.size - assert_equal 7, new_dataset.compounds.size - assert_equal ["1", nil, "false", nil, nil, 1.0], new_dataset.data_entries.last + assert_equal 5, new_dataset.compounds.size + de = new_dataset.data_entries[new_dataset.compounds.last.id.to_s] + fid = new_dataset.features.first.id.to_s + assert_equal ["1"], de[fid] + fid = new_dataset.features.last.id.to_s + assert_equal [1.0], de[fid] + fid = new_dataset.features[2].id.to_s + assert_equal ["false"], de[fid] d.delete end @@ -117,7 +119,7 @@ class DatasetTest < MiniTest::Test assert d.warnings.grep(/Duplicate compound/) assert d.warnings.grep(/3, 5/) assert_equal 6, d.features.size - assert_equal 7, d.compounds.size + assert_equal 5, d.compounds.size assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7] @@ -195,7 +197,7 @@ class DatasetTest < MiniTest::Test assert_match "EPAFHM.mini.csv", d.source assert_equal 1, d.features.size feature = d.features.first - assert_kind_of NumericBioAssay, feature + assert_kind_of NumericFeature, feature assert_equal 0.0113, d.data_entries[0][0] assert_equal 0.00323, d.data_entries[5][0] d2 = Dataset.find d.id @@ -207,10 +209,10 @@ class DatasetTest < MiniTest::Test dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv") dataset.folds(10).each do |fold| fold.each do |d| - assert_equal d.data_entries.size, d.compound_ids.size - assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size + assert_equal d.data_entries.size, d.compounds.size + assert_equal d.compounds.size, :>=, d.compounds.uniq.size end - assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size + assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size end #puts dataset.folds 10 end diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb new file mode 100644 index 0000000..46073a9 --- /dev/null +++ b/test/nanoparticles.rb @@ -0,0 +1,34 @@ +require_relative "setup.rb" + +class NanoparticleTest < MiniTest::Test + + def test_import + dataset_ids = Import::Enanomapper.import + assert_operator Nanoparticle.count , :>, 570, "Only #{Nanoparticle.count} nanoparticles imported" + assert_operator dataset_ids.size, :>, 8, "Only #{dataset_ids.size} bundles imported" + assert dataset_ids.collect{|d| Dataset.find(d).name}.include? ("NanoWiki") + assert dataset_ids.collect{|d| Dataset.find(d).name}.include? ("Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles") + p dataset_ids.collect{|d| {d => Dataset.find(d).name}} + dataset_ids.collect do |d| + d = Dataset.find(d) + p d.name + puts d.to_csv + end + end + + def test_export + Dataset.all.each do |d| + puts d.to_csv + end + end + + def test_create_model + training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles") + model = Model::LazarRegression.create(training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors") + nanoparticle = training_dataset.nanoparticles[-34] + prediction = model.predict nanoparticle + p prediction + refute_nil prediction[:value] + end + +end diff --git a/test/prediction_models.rb b/test/prediction_models.rb index a2e5fe2..49a2472 100644 --- a/test/prediction_models.rb +++ b/test/prediction_models.rb @@ -10,7 +10,6 @@ class PredictionModelTest < MiniTest::Test assert pm.classification? refute pm.regression? pm.crossvalidations.each do |cv| - p cv assert cv.accuracy > 0.74, "Crossvalidation accuracy (#{cv.accuracy}) should be larger than 0.75. This may happen due to an unfavorable training/test set split." end prediction = pm.predict Compound.from_smiles("CCCC(NN)C") diff --git a/test/validation.rb b/test/validation.rb index d8eea59..baee2d1 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -6,17 +6,17 @@ class ValidationTest < MiniTest::Test def test_default_classification_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset + model = Model::LazarClassification.create dataset.features.first, dataset cv = ClassificationCrossValidation.create model - assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7" + assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split" end def test_default_regression_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - model = Model::LazarRegression.create dataset + model = Model::LazarRegression.create dataset.features.first, dataset cv = RegressionCrossValidation.create model - assert cv.rmse < 1.5, "RMSE > 1.5" - assert cv.mae < 1 + assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split" + assert cv.mae < 1, "MAE #{cv.mae} should be larger than 1, this may occur due to an unfavorable training/test set split" end # parameters @@ -30,7 +30,7 @@ class ValidationTest < MiniTest::Test :type => "FP3" } } - model = Model::LazarClassification.create dataset, params + model = Model::LazarClassification.create dataset.features.first, dataset, params model.save cv = ClassificationCrossValidation.create model params = model.neighbor_algorithm_parameters @@ -54,7 +54,7 @@ class ValidationTest < MiniTest::Test :min_sim => 0.7, } } - model = Model::LazarRegression.create dataset, params + model = Model::LazarRegression.create dataset.features.first, dataset, params cv = RegressionCrossValidation.create model cv.validation_ids.each do |vid| model = Model::Lazar.find(Validation.find(vid).model_id) @@ -70,7 +70,7 @@ class ValidationTest < MiniTest::Test def test_physchem_regression_crossvalidation training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") - model = Model::LazarRegression.create(training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") + model = Model::LazarRegression.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") cv = RegressionCrossValidation.create model refute_nil cv.rmse refute_nil cv.mae @@ -80,7 +80,7 @@ class ValidationTest < MiniTest::Test def test_classification_loo_validation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset + model = Model::LazarClassification.create dataset.features.first, dataset loo = ClassificationLeaveOneOutValidation.create model assert_equal 14, loo.nr_unpredicted refute_empty loo.confusion_matrix @@ -89,7 +89,7 @@ class ValidationTest < MiniTest::Test def test_regression_loo_validation dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") - model = Model::LazarRegression.create dataset + model = Model::LazarRegression.create dataset.features.first, dataset loo = RegressionLeaveOneOutValidation.create model assert loo.r_squared > 0.34 end @@ -98,7 +98,7 @@ class ValidationTest < MiniTest::Test def test_repeated_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset + model = Model::LazarClassification.create dataset.features.first, dataset repeated_cv = RepeatedCrossValidation.create model repeated_cv.crossvalidations.each do |cv| assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" |