summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-24 18:21:34 +0200
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-24 18:21:34 +0200
commit1652fd5df948da7ace622c73d158010add656b9f (patch)
tree49e7eef3c6cdaaaadc38742e56996aaa145ac3de /test
parent9d17895ab9e8cd31e0f32e8e622e13612ea5ff77 (diff)
dataset mapfeature_classes
Diffstat (limited to 'test')
-rw-r--r--test/classification-model.rb47
-rw-r--r--test/dataset.rb89
2 files changed, 82 insertions, 54 deletions
diff --git a/test/classification-model.rb b/test/classification-model.rb
index b94b5e6..7a2a64f 100644
--- a/test/classification-model.rb
+++ b/test/classification-model.rb
@@ -22,37 +22,40 @@ class LazarClassificationTest < MiniTest::Test
assert_kind_of Model::LazarClassification, model
assert_equal algorithms, model.algorithms
[ {
- :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
+ :compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"),
:prediction => "false",
},{
- :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
- :prediction => "false",
+ :compound => OpenTox::Compound.from_smiles("O=CNc1scc(n1)c1ccc(o1)[N+](=O)[O-]"),
+ :prediction => "true",
} ].each do |example|
prediction = model.predict example[:compound]
- p example[:compound]
- p prediction
- #assert_equal example[:prediction], prediction[:value]
+ assert_equal example[:prediction], prediction[:value]
end
- compound = Compound.from_smiles "CCO"
- prediction = model.predict compound
- assert_equal "true", prediction[:value]
- assert_equal ["false"], prediction[:measurements]
-
# make a dataset prediction
- compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
+ compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call.csv")
prediction_dataset = model.predict compound_dataset
- assert_equal compound_dataset.compounds, prediction_dataset.compounds
+ puts prediction_dataset.to_csv
+ assert_equal compound_dataset.compounds.size, prediction_dataset.compounds.size
+ c = Compound.from_smiles "CC(CN(CC(O)C)N=O)O"
+ prediction_feature = prediction_dataset.features.select{|f| f.class == NominalLazarPrediction}[0]
+ assert_equal ["true"], prediction_dataset.values(c, prediction_feature)
+ p_true = LazarPredictionProbability.find_by(:name => "true")
+ p_false = LazarPredictionProbability.find_by(:name => "false")
+ p p_true
+ assert_equal [0.7], prediction_dataset.values(c,p_true)
+ assert_equal [0.0], prediction_dataset.values(c,p_false)
+ assert_equal 0.0, p_false
- cid = prediction_dataset.compounds[7].id.to_s
- assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warnings][0]
- expectations = ["Cannot create prediction: Only one similar compound in the training set.",
- "Could not find similar substances with experimental data in the training dataset."]
- prediction_dataset.predictions.each do |cid,pred|
- assert_includes expectations, pred[:warnings][0] if pred[:value].nil?
- end
- cid = Compound.from_smiles("CCOC(=O)N").id.to_s
- assert_match "excluded", prediction_dataset.predictions[cid][:info]
+# cid = prediction_dataset.compounds[7].id.to_s
+# assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warnings][0]
+# expectations = ["Cannot create prediction: Only one similar compound in the training set.",
+# "Could not find similar substances with experimental data in the training dataset."]
+# prediction_dataset.predictions.each do |cid,pred|
+# assert_includes expectations, pred[:warnings][0] if pred[:value].nil?
+# end
+# cid = Compound.from_smiles("CCOC(=O)N").id.to_s
+# assert_match "excluded", prediction_dataset.predictions[cid][:info]
end
def test_classification_parameters
diff --git a/test/dataset.rb b/test/dataset.rb
index 2b439bb..163f178 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -26,8 +26,8 @@ class DatasetTest < MiniTest::Test
def test_import_pubchem
d = Dataset.from_pubchem_aid 1191
assert_equal 87, d.compounds.size
- assert_equal 2, d.features.size
- assert_equal ["Active"], d.values(d.compounds[10],d.features[1])
+ assert_equal 3, d.features.size
+ assert_equal ["Active"], d.values(d.compounds[10],d.features[2])
# TODO endpoint name
# TODO regression import
end
@@ -35,9 +35,9 @@ class DatasetTest < MiniTest::Test
def test_import_csv_with_id
d = Dataset.from_csv_file "#{DATA_DIR}/input_53.csv"
assert_equal 53, d.compounds.size
- assert_equal 1, d.features.size
- f = d.features[0]
- assert_equal "input_53.ID", f.name
+ assert_equal 2, d.features.size
+ f = d.features[1]
+ assert_equal "ID", f.name
assert_equal OriginalId, f.class
assert_equal ["123-30-8"], d.values(d.compounds.first,f)
end
@@ -45,16 +45,16 @@ class DatasetTest < MiniTest::Test
def test_import_tsv_with_id
d = Dataset.from_csv_file "#{DATA_DIR}/input_53.tsv"
assert_equal 53, d.compounds.size
- assert_equal 1, d.features.size
- f = d.features[0]
- assert_equal "input_53.ID", f.name
+ assert_equal 2, d.features.size
+ f = d.features[1]
+ assert_equal "ID", f.name
assert_equal OriginalId, f.class
assert_equal ["123-30-8"], d.values(d.compounds.first,f)
end
def test_import_sdf
d = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf"
- assert_equal 35, d.features.size
+ assert_equal 37, d.features.size
assert_kind_of NumericSubstanceProperty, d.features[1]
assert_equal NominalSubstanceProperty, d.features.last.class
assert_equal 602, d.compounds.size
@@ -64,7 +64,7 @@ class DatasetTest < MiniTest::Test
def test_import_hamster
d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
assert_equal Dataset, d.class
- assert_equal 1, d.features.size
+ assert_equal 3, d.features.size
assert_equal 85, d.compounds.size
assert_equal NominalBioActivity, d.features.first.class
csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
@@ -81,7 +81,7 @@ class DatasetTest < MiniTest::Test
d = OpenTox::Dataset.from_csv_file f
csv = CSV.read f
assert_equal csv.size-1, d.compounds.size
- assert_equal csv.first.size-1, d.features.size
+ assert_equal csv.first.size+1, d.features.size
assert_empty d.warnings
# 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1
c = d.compounds[491]
@@ -121,8 +121,9 @@ class DatasetTest < MiniTest::Test
d = OpenTox::Dataset.from_csv_file f
csv = CSV.read f
assert_equal csv.size-1, d.compounds.size
- assert_equal csv.first.size-1, d.features.size
- d.delete
+ assert_equal csv.first.size+1, d.features.size
+ # TODO fix csv output (headers, column order)
+ puts d.to_csv
end
def test_import_epafhm
@@ -131,7 +132,7 @@ class DatasetTest < MiniTest::Test
assert_equal Dataset, d.class
csv = CSV.read f
assert_equal csv.size-1, d.compounds.size
- assert_equal csv.first.size-1, d.features.size
+ assert_equal csv.first.size+1, d.features.size
assert_match "EPAFHM_log10.csv", d.source
assert_equal "EPAFHM_log10", d.name
feature = d.features.first
@@ -168,23 +169,6 @@ class DatasetTest < MiniTest::Test
# dataset operations
- def test_merge
- skip # TODO use new Features
- source_feature = Feature.where(:name => "Ames test categorisation").first
- target_feature = Feature.where(:name => "Mutagenicity").first
- kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
- hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
- efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
- d = Dataset.merge [kazius,hansen,efsa], {source_feature => target_feature}, {1 => "mutagen", 0 => "nonmutagen"}
- #File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
- assert_equal 8281, d.compounds.size
- c = Compound.from_smiles("C/C=C/C=O")
- assert_equal ["mutagen"], d.values(c,target_feature)
- assert_equal "/home/ist/lazar/test/data/cas_4337.sdf, /home/ist/lazar/test/data/hansen.csv, /home/ist/lazar/test/data/efsa.csv", d.source
- p d.features
- assert_equal 4, d.features.size
- end
-
def test_folds
dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
dataset.folds(10).each do |fold|
@@ -197,10 +181,48 @@ class DatasetTest < MiniTest::Test
end
end
+ def test_copy
+ d = Dataset.from_csv_file("#{DATA_DIR}/hamster_carcinogenicity.csv")
+ copy = d.copy
+ assert_equal d.data_entries, copy.data_entries
+ assert_equal d.name, copy.name
+ assert_equal d.id.to_s, copy.source
+ end
+
+ def test_map
+ d = Dataset.from_csv_file("#{DATA_DIR}/hamster_carcinogenicity.csv")
+ assert_equal 1, d.bioactivity_features.size
+ map = {"true" => "carcinogen", "false" => "non-carcinogen"}
+ mapped = d.map(d.bioactivity_features.first, map)
+ c = d.compounds.sample
+ assert_equal d.values(c,d.bioactivity_features.first).collect{|v| map[v]}, mapped.values(c,mapped.transformed_bioactivity_features.first)
+ assert_equal d.original_id(c), mapped.original_id(c)
+ assert_equal d.bioactivity_features.first.name, mapped.bioactivity_features.first.name
+ assert_equal ["carcinogen","non-carcinogen"], mapped.transformed_bioactivity_features.first.accept_values
+ end
+
+ def test_merge
+ kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
+ hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
+ efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
+ hansen_mapped = hansen.map hansen.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
+ efsa_mapped = efsa.map efsa.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
+ datasets = [kazius,hansen_mapped,efsa_mapped]
+ d = Dataset.merge datasets, datasets.collect{|d| d.bioactivity_features}.flatten.uniq
+ File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
+ assert_equal 8281, d.compounds.size
+ c = Compound.from_smiles("C/C=C/C=O")
+ assert_equal ["mutagen"], d.values(c,d.bioactivity_features.first)
+ assert_equal "/home/ist/lazar/test/data/cas_4337.sdf, /home/ist/lazar/test/data/hansen.csv, /home/ist/lazar/test/data/efsa.csv", d.source
+ p d.features
+ assert_equal 4, d.features.size
+ end
+
# serialisation
def test_to_csv
d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
+ # TODO warnings
refute_nil d.warnings
assert d.warnings.grep(/Duplicate compound/)
assert d.warnings.grep(/3, 5/)
@@ -268,6 +290,7 @@ class DatasetTest < MiniTest::Test
def test_create_from_file_with_wrong_smiles_compound_entries
d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
+ p d.to_csv
refute_nil d.warnings
assert_match /2|3|4|5|6|7|8/, d.warnings.join
d.delete
@@ -289,6 +312,8 @@ class DatasetTest < MiniTest::Test
def test_from_csv2
File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
+ p dataset
+ p dataset.to_csv
assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.", dataset.warnings.join
File.delete "#{DATA_DIR}/temp_test.csv"
dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
@@ -313,7 +338,7 @@ class DatasetTest < MiniTest::Test
threads << Thread.new(t) do |up|
d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
assert_equal OpenTox::Dataset, d.class
- assert_equal 1, d.features.size
+ assert_equal 3, d.features.size
assert_equal 85, d.compounds.size
csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
csv.shift