summaryrefslogtreecommitdiff
path: root/test/model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'test/model.rb')
-rw-r--r--test/model.rb177
1 files changed, 177 insertions, 0 deletions
diff --git a/test/model.rb b/test/model.rb
new file mode 100644
index 0000000..563d081
--- /dev/null
+++ b/test/model.rb
@@ -0,0 +1,177 @@
+require_relative "setup.rb"
+
+class ModelTest < MiniTest::Test
+
+ def test_default_regression
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D"
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1
+ },
+ :prediction => {
+ :method => "Algorithm::Regression.caret",
+ :parameters => "pls",
+ },
+ :feature_selection => nil,
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset
+ assert_kind_of Model::LazarRegression, model
+ assert_equal algorithms, model.algorithms
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+ end
+
+ def test_regression_parameters
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D"
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.3
+ },
+ :prediction => {
+ :method => "Algorithm::Regression.weighted_average",
+ :parameters => "rf",
+ },
+ :feature_selection => nil,
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarRegression, model
+ assert_equal "Algorithm::Regression.weighted_average", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+ assert_equal algorithms[:prediction][:parameters], model.algorithms[:prediction][:parameters]
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_equal 0.83, prediction[:value].round(2)
+ end
+
+ def test_physchem_regression
+ algorithms = {
+ :descriptors => "physchem",
+ :similarity => {
+ :method => "Algorithm::Similarity.weighted_cosine",
+ }
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarRegression, model
+ assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method]
+ assert_equal 0.1, model.algorithms[:similarity][:min]
+ assert_equal algorithms[:descriptors], model.algorithms[:descriptors]
+ end
+
+ def test_nanoparticle_default
+ training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ unless training_dataset
+ Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
+ training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ end
+ model = Model::Lazar.create training_dataset: training_dataset
+ assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "rf", model.algorithms[:prediction][:parameters]
+ assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method]
+ prediction = model.predict training_dataset.substances[14]
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+
+ end
+
+ def test_nanoparticle_parameters
+ end
+
+ def test_regression_with_feature_selection
+ algorithms = {
+ :feature_selection => {
+ :method => "Algorithm::FeatureSelection.correlation_filter",
+ },
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarRegression, model
+ assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ assert_equal 0.1, model.algorithms[:similarity][:min]
+ assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method]
+ end
+
+ def test_caret_parameters
+ end
+
+ def test_default_classification
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => 'MP2D',
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1
+ },
+ :prediction => {
+ :method => "Algorithm::Classification.weighted_majority_vote",
+ },
+ :feature_selection => nil,
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::Lazar.create training_dataset: training_dataset
+ assert_kind_of Model::LazarClassification, model
+ assert_equal algorithms, model.algorithms
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_equal "false", prediction[:value]
+ end
+
+ def test_classification_parameters
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => 'MACCS',
+ },
+ :similarity => {
+ :min => 0.4
+ },
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarClassification, model
+ assert_equal "Algorithm::Classification.weighted_majority_vote", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_equal "false", prediction[:value]
+ assert_equal 4, prediction[:neighbors].size
+ end
+
+=begin
+ def test_physchem_description
+ assert_equal 355, PhysChem.descriptors.size
+ assert_equal 15, PhysChem.openbabel_descriptors.size
+ assert_equal 295, PhysChem.cdk_descriptors.size
+ assert_equal 45, PhysChem.joelib_descriptors.size
+ assert_equal 310, PhysChem.unique_descriptors.size
+ end
+
+ def test_physchem
+ assert_equal 355, PhysChem.descriptors.size
+ c = Compound.from_smiles "CC(=O)CC(C)C"
+ logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
+ assert_equal 1.6215, logP.calculate(c)
+ jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
+ assert_equal 3.5951, jlogP.calculate(c)
+ alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
+ assert_equal 0.35380000000000034, alogP.calculate(c)
+ end
+=end
+
+end