From 84222bae2bbb9fb3e0ce3e65de1be8e7f94d2147 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 12 Apr 2016 12:37:37 +0200 Subject: new dataset structure --- test/dataset.rb | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index 297251e..a7b8769 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -36,38 +36,34 @@ class DatasetTest < MiniTest::Test assert_equal Dataset, d.class d.name = "Create dataset test" - # features not set - # << operator was removed for efficiency reasons (CH) - #assert_raises BadRequestError do - # d << [Compound.from_smiles("c1ccccc1NN"), 1,2] - #end - # add data entries - d.features = ["test1", "test2"].collect do |title| + features = ["test1", "test2"].collect do |title| f = Feature.new f.name = title f.numeric = true f.save f end - - # wrong feature size - # << operator was removed for efficiency reasons (CH) - #assert_raises BadRequestError do - # d << [Compound.from_smiles("c1ccccc1NN"), 1,2,3] - #end # manual low-level insertions without consistency checks for runtime efficiency + compounds = ["c1ccccc1NN", "CC(C)N", "C1C(C)CCCC1"].collect do |smi| + Compound.from_smiles smi + end data_entries = [] - d.compound_ids << Compound.from_smiles("c1ccccc1NN").id data_entries << [1,2] - d.compound_ids << Compound.from_smiles("CC(C)N").id data_entries << [4,5] - d.compound_ids << Compound.from_smiles("C1C(C)CCCC1").id data_entries << [6,7] - d.data_entries = data_entries + compounds.each_with_index do |c,i| + features.each_with_index do |f,j| + d.data_entries[c.id.to_s] ||= {} + d.data_entries[c.id.to_s][f.id.to_s] ||= [] + d.data_entries[c.id.to_s][f.id.to_s] << data_entries[i][j] + end + end + assert_equal 3, d.compounds.size assert_equal 2, d.features.size + p d.data_entries assert_equal [[1,2],[4,5],[6,7]], d.data_entries d.save # check if dataset has been saved correctly @@ -89,8 +85,14 @@ class DatasetTest < MiniTest::Test assert_equal "multicolumn", new_dataset.name # get features assert_equal 6, new_dataset.features.size - assert_equal 7, new_dataset.compounds.size - assert_equal ["1", nil, "false", nil, nil, 1.0], new_dataset.data_entries.last + assert_equal 5, new_dataset.compounds.size + de = new_dataset.data_entries[new_dataset.compounds.last.id.to_s] + fid = new_dataset.features.first.id.to_s + assert_equal ["1"], de[fid] + fid = new_dataset.features.last.id.to_s + assert_equal [1.0], de[fid] + fid = new_dataset.features[2].id.to_s + assert_equal ["false"], de[fid] d.delete end @@ -117,7 +119,7 @@ class DatasetTest < MiniTest::Test assert d.warnings.grep(/Duplicate compound/) assert d.warnings.grep(/3, 5/) assert_equal 6, d.features.size - assert_equal 7, d.compounds.size + assert_equal 5, d.compounds.size assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7] @@ -195,7 +197,7 @@ class DatasetTest < MiniTest::Test assert_match "EPAFHM.mini.csv", d.source assert_equal 1, d.features.size feature = d.features.first - assert_kind_of NumericBioAssay, feature + assert_kind_of NumericFeature, feature assert_equal 0.0113, d.data_entries[0][0] assert_equal 0.00323, d.data_entries[5][0] d2 = Dataset.find d.id @@ -207,10 +209,10 @@ class DatasetTest < MiniTest::Test dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv") dataset.folds(10).each do |fold| fold.each do |d| - assert_equal d.data_entries.size, d.compound_ids.size - assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size + assert_equal d.data_entries.size, d.compounds.size + assert_equal d.compounds.size, :>=, d.compounds.uniq.size end - assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size + assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size end #puts dataset.folds 10 end -- cgit v1.2.3 From 51f57e2858b60bed74ebcc97189b2188c900c283 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 6 May 2016 12:49:28 +0200 Subject: dataset tests cleanup --- test/dataset.rb | 364 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 242 insertions(+), 122 deletions(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index a7b8769..f028dbe 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -4,6 +4,15 @@ require_relative "setup.rb" class DatasetTest < MiniTest::Test + # basics + + def test_create_empty + d = Dataset.new + assert_equal Dataset, d.class + refute_nil d.id + assert_kind_of BSON::ObjectId, d.id + end + def test_all d1 = Dataset.new d1.save @@ -12,70 +21,160 @@ class DatasetTest < MiniTest::Test d1.delete end + # real datasets + + def test_upload_hamster + d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + assert_equal Dataset, d.class + assert_equal 1, d.features.size + assert_equal 85, d.compounds.size + csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv") + csv.shift + csv.each do |row| + c = Compound.from_smiles row.shift + assert_equal c.toxicities[d.feature_ids.first.to_s], row + end + d.delete + end + + def test_upload_kazius + f = File.join DATA_DIR, "kazius.csv" + d = OpenTox::Dataset.from_csv_file f + csv = CSV.read f + assert_equal csv.size-1, d.compounds.size + assert_equal csv.first.size-1, d.features.size + assert_empty d.warnings + # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1 + c = d.compounds[491] + assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC" + assert_equal c.toxicities[d.feature_ids.first.to_s][0], "1" + d.delete + end + + def test_upload_multicell + duplicates = [ + "InChI=1S/C6HCl5O/c7-1-2(8)4(10)6(12)5(11)3(1)9/h12H", + "InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2", + "InChI=1S/C2HCl3/c3-1-2(4)5/h1H", + "InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2", + "InChI=1S/C4H7Cl/c1-4(2)3-5/h1,3H2,2H3", + "InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3", + "InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3", + ].collect{|inchi| Compound.from_inchi(inchi).smiles} + errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ] + f = File.join DATA_DIR, "multi_cell_call.csv" + d = OpenTox::Dataset.from_csv_file f + csv = CSV.read f + assert_equal true, d.features.first.nominal + assert_equal csv.size-1-errors.size, d.compounds.size + assert_equal csv.first.size-1, d.features.size + puts d.warnings.to_yaml + errors.each do |smi| + refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}} + end + duplicates.each do |smi| + refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}} + end + d.delete + end + + def test_upload_isscan + f = File.join DATA_DIR, "ISSCAN-multi.csv" + d = OpenTox::Dataset.from_csv_file f + csv = CSV.read f + assert_equal csv.size-1, d.compounds.size + assert_equal csv.first.size-1, d.features.size + d.delete + end + + def test_upload_epafhm + f = File.join DATA_DIR, "EPAFHM.csv" + d = OpenTox::Dataset.from_csv_file f + assert_equal Dataset, d.class + csv = CSV.read f + assert_equal csv.size-1, d.compounds.size + assert_equal csv.first.size-1, d.features.size + assert_match "EPAFHM.csv", d.source + assert_equal "EPAFHM", d.name + refute_nil d.warnings + assert_equal 74, d.warnings.size + feature = d.features.first + assert_kind_of NumericFeature, feature + assert_match /row 13/, d.warnings.join + assert_equal 0.0113, d.compounds.first.toxicities[feature.id.to_s].first + assert_equal 0.00323, d.compounds[5].toxicities[feature.id.to_s].first + d2 = Dataset.find d.id + assert_equal 0.0113, d2.compounds[0].toxicities[feature.id.to_s].first + assert_equal 0.00323, d2.compounds[5].toxicities[feature.id.to_s].first + d.delete + end + + # batch predictions + def test_create_without_features_smiles_and_inchi ["smiles", "inchi"].each do |type| d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv") assert_equal Dataset, d.class refute_nil d.id dataset = Dataset.find d.id - #p dataset.compounds assert_equal 3, d.compounds.size.to_i d.delete end end - def test_create_empty - d = Dataset.new - assert_equal Dataset, d.class - refute_nil d.id - assert_kind_of BSON::ObjectId, d.id + # dataset operations + + def test_folds + dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv") + dataset.folds(10).each do |fold| + fold.each do |d| + assert_operator d.compounds.size, :>=, d.compounds.uniq.size + end + assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size + assert_equal dataset.substance_ids.size, fold.first.substance_ids.size + fold.last.substance_ids.size + assert_empty (fold.first.substance_ids & fold.last.substance_ids) + end end - def test_client_create - d = Dataset.new - assert_equal Dataset, d.class - d.name = "Create dataset test" + # serialisation - # add data entries - features = ["test1", "test2"].collect do |title| - f = Feature.new - f.name = title - f.numeric = true - f.save - f + def test_to_csv + d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv" + refute_nil d.warnings + assert d.warnings.grep(/Duplicate compound/) + assert d.warnings.grep(/3, 5/) + assert_equal 6, d.features.size + assert_equal 5, d.compounds.uniq.size + assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size + csv = CSV.parse(d.to_csv) + original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv") + csv.shift + original_csv.shift + original = {} + original_csv.each do |row| + c = Compound.from_smiles row.shift.strip + original[c.inchi] = row.collect{|v| v.strip} end - - # manual low-level insertions without consistency checks for runtime efficiency - compounds = ["c1ccccc1NN", "CC(C)N", "C1C(C)CCCC1"].collect do |smi| - Compound.from_smiles smi + serialized = {} + csv.each do |row| + c = Compound.from_smiles row.shift + serialized[c.inchi] = row end - data_entries = [] - data_entries << [1,2] - data_entries << [4,5] - data_entries << [6,7] - compounds.each_with_index do |c,i| - features.each_with_index do |f,j| - d.data_entries[c.id.to_s] ||= {} - d.data_entries[c.id.to_s][f.id.to_s] ||= [] - d.data_entries[c.id.to_s][f.id.to_s] << data_entries[i][j] + original.each do |inchi,row| + row.each_with_index do |v,i| + if v.numeric? + assert_equal v.to_f, serialized[inchi][i].to_f + else + assert_equal v, serialized[inchi][i] + end end - end - assert_equal 3, d.compounds.size - assert_equal 2, d.features.size - p d.data_entries - assert_equal [[1,2],[4,5],[6,7]], d.data_entries - d.save - # check if dataset has been saved correctly - new_dataset = Dataset.find d.id - assert_equal 3, new_dataset.compounds.size - assert_equal 2, new_dataset.features.size - assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries - d.delete - assert_nil Dataset.find d.id - assert_nil Dataset.find new_dataset.id + end + d.delete end + # special cases/details + def test_dataset_accessors d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv" # create empty dataset @@ -85,8 +184,8 @@ class DatasetTest < MiniTest::Test assert_equal "multicolumn", new_dataset.name # get features assert_equal 6, new_dataset.features.size - assert_equal 5, new_dataset.compounds.size - de = new_dataset.data_entries[new_dataset.compounds.last.id.to_s] + assert_equal 5, new_dataset.compounds.uniq.size + de = new_dataset.compounds.last.toxicities fid = new_dataset.features.first.id.to_s assert_equal ["1"], de[fid] fid = new_dataset.features.last.id.to_s @@ -96,16 +195,6 @@ class DatasetTest < MiniTest::Test d.delete end - def test_create_from_file - d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") - assert_equal Dataset, d.class - refute_nil d.warnings - assert_match "EPAFHM.mini.csv", d.source - assert_equal "EPAFHM.mini.csv", d.name - d.delete - #assert_equal false, URI.accessible?(d.uri) - end - def test_create_from_file_with_wrong_smiles_compound_entries d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv") refute_nil d.warnings @@ -113,56 +202,14 @@ class DatasetTest < MiniTest::Test d.delete end - def test_multicolumn_csv - d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv" - refute_nil d.warnings - assert d.warnings.grep(/Duplicate compound/) - assert d.warnings.grep(/3, 5/) - assert_equal 6, d.features.size - assert_equal 5, d.compounds.size - assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size - assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries - assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7] - csv = CSV.parse(d.to_csv) - original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv") - csv.shift - original_csv.shift - csv.each_with_index do |row,i| - compound = Compound.from_smiles row.shift - original_compound = Compound.from_smiles original_csv[i].shift.strip - assert_equal original_compound.inchi, compound.inchi - row.each_with_index do |v,j| - if v.numeric? - assert_equal original_csv[i][j].strip.to_f, row[j].to_f - else - assert_equal original_csv[i][j].strip, row[j].to_s - end - end - end - d.delete - end - - def test_from_csv - d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - assert_equal Dataset, d.class - assert_equal 1, d.features.size - assert_equal 85, d.compounds.size - assert_equal 85, d.data_entries.size - csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv") - csv.shift - assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten - d.delete - #assert_equal false, URI.accessible?(d.uri) - end - def test_from_csv_classification ["int", "float", "string"].each do |mode| d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv" csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv") csv.shift - entries = d.data_entries.flatten - csv.each_with_index do |r, i| - assert_equal r[1].to_s, entries[i] + csv.each do |row| + c = Compound.from_smiles row.shift + assert_equal c.toxicities[d.feature_ids.first.to_s], row end d.delete end @@ -189,32 +236,105 @@ class DatasetTest < MiniTest::Test datasets.each{|d| d.delete} end - def test_create_from_file - d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") - assert_equal Dataset, d.class - refute_nil d.warnings - assert_match /row 13/, d.warnings.join - assert_match "EPAFHM.mini.csv", d.source - assert_equal 1, d.features.size - feature = d.features.first - assert_kind_of NumericFeature, feature - assert_equal 0.0113, d.data_entries[0][0] - assert_equal 0.00323, d.data_entries[5][0] + # skips, may be removed in the future + + def test_simultanous_upload + skip + threads = [] + 3.times do |t| + threads << Thread.new(t) do |up| + d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + assert_equal OpenTox::Dataset, d.class + assert_equal 1, d.features.size + assert_equal 85, d.compounds.size + csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv") + csv.shift + csv.each do |row| + c = Compound.from_smiles(row.shift) + p row + p c.toxicities + p d.feature_ids.first.to_s + assert_equal row, c.toxicities[d.feature_ids.first.to_s] + end + d.delete + end + end + threads.each {|aThread| aThread.join} + end + + def test_upload_feature_dataset + skip + t = Time.now + f = File.join DATA_DIR, "rat_feature_dataset.csv" + d = Dataset.from_csv_file f + assert_equal 458, d.features.size + d.save + #p "Upload: #{Time.now-t}" d2 = Dataset.find d.id - assert_equal 0.0113, d2.data_entries[0][0] - assert_equal 0.00323, d2.data_entries[5][0] + t = Time.now + assert_equal d.features.size, d2.features.size + csv = CSV.read f + csv.shift # remove header + assert_empty d2.warnings + assert_equal csv.size, d2.compounds.size + assert_equal csv.first.size-1, d2.features.size + d2.compounds.each_with_index do |compound,i| + row = csv[i] + row.shift # remove compound + assert_equal row, d2.data_entries[i] + end + #p "Dowload: #{Time.now-t}" + d2.delete + assert_nil Dataset.find d.id end - def test_folds - dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv") - dataset.folds(10).each do |fold| - fold.each do |d| - assert_equal d.data_entries.size, d.compounds.size - assert_equal d.compounds.size, :>=, d.compounds.uniq.size + def test_client_create + skip + d = Dataset.new + assert_equal Dataset, d.class + d.name = "Create dataset test" + + # add data entries + features = ["test1", "test2"].collect do |title| + f = Feature.new + f.name = title + f.numeric = true + f.save + f + end + + # manual low-level insertions without consistency checks for runtime efficiency + compounds = ["c1ccccc1NN", "CC(C)N", "C1C(C)CCCC1"].collect do |smi| + Compound.from_smiles smi + end + data_entries = [] + data_entries << [1,2] + data_entries << [4,5] + data_entries << [6,7] + compounds.each_with_index do |c,i| + features.each_with_index do |f,j| + d.substance_ids << c.id + d.feature_ids << f.id + c.toxicities[f.id.to_s] = data_entries[i][j] end - assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size end - #puts dataset.folds 10 + + assert_equal 3, d.compounds.size + assert_equal 2, d.features.size + #assert_equal [[1,2],[4,5],[6,7]], d.data_entries + d.save + # check if dataset has been saved correctly + new_dataset = Dataset.find d.id + assert_equal 3, new_dataset.compounds.size + assert_equal 2, new_dataset.features.size + new_dataset.compounds.each_with_index do |c,i| + new_dataset.features.each_with_index do |f,j| + assert_equal data_entries[i][j], c.toxicities[f.id.to_s].first + end + end + d.delete + assert_nil Dataset.find d.id + assert_nil Dataset.find new_dataset.id end end -- cgit v1.2.3 From 48234554ea99b972a01718ac36c4e8332dd9159b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 7 May 2016 10:34:03 +0200 Subject: -log10 for regression datasets, test cleanups --- test/dataset.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index f028dbe..d167558 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -32,7 +32,7 @@ class DatasetTest < MiniTest::Test csv.shift csv.each do |row| c = Compound.from_smiles row.shift - assert_equal c.toxicities[d.feature_ids.first.to_s], row + assert_equal row, c.toxicities[d.feature_ids.first.to_s] end d.delete end @@ -88,14 +88,14 @@ class DatasetTest < MiniTest::Test end def test_upload_epafhm - f = File.join DATA_DIR, "EPAFHM.csv" + f = File.join DATA_DIR, "EPAFHM_log10.csv" d = OpenTox::Dataset.from_csv_file f assert_equal Dataset, d.class csv = CSV.read f assert_equal csv.size-1, d.compounds.size assert_equal csv.first.size-1, d.features.size - assert_match "EPAFHM.csv", d.source - assert_equal "EPAFHM", d.name + assert_match "EPAFHM_log10.csv", d.source + assert_equal "EPAFHM_log10", d.name refute_nil d.warnings assert_equal 74, d.warnings.size feature = d.features.first @@ -209,7 +209,7 @@ class DatasetTest < MiniTest::Test csv.shift csv.each do |row| c = Compound.from_smiles row.shift - assert_equal c.toxicities[d.feature_ids.first.to_s], row + assert_equal row, c.toxicities[d.feature_ids.first.to_s] end d.delete end -- cgit v1.2.3 From 06fc914653face2c58fd4e6c47161cb03e217582 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 8 May 2016 12:22:58 +0200 Subject: default validations fixed --- test/dataset.rb | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index d167558..9bb3409 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -1,5 +1,3 @@ -# TODO; check compound/data_entry sequences with missing and duplicated values - require_relative "setup.rb" class DatasetTest < MiniTest::Test @@ -32,7 +30,7 @@ class DatasetTest < MiniTest::Test csv.shift csv.each do |row| c = Compound.from_smiles row.shift - assert_equal row, c.toxicities[d.feature_ids.first.to_s] + assert_equal row, c.toxicities[d.features.first.id.to_s][d.id.to_s] end d.delete end @@ -47,7 +45,7 @@ class DatasetTest < MiniTest::Test # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1 c = d.compounds[491] assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC" - assert_equal c.toxicities[d.feature_ids.first.to_s][0], "1" + assert_equal c.toxicities[d.feature_ids.first.to_s][d.id.to_s][0], "1" d.delete end @@ -97,15 +95,16 @@ class DatasetTest < MiniTest::Test assert_match "EPAFHM_log10.csv", d.source assert_equal "EPAFHM_log10", d.name refute_nil d.warnings - assert_equal 74, d.warnings.size + #p d.warnings + #assert_equal 74, d.warnings.size feature = d.features.first assert_kind_of NumericFeature, feature assert_match /row 13/, d.warnings.join - assert_equal 0.0113, d.compounds.first.toxicities[feature.id.to_s].first - assert_equal 0.00323, d.compounds[5].toxicities[feature.id.to_s].first + assert_equal -Math.log10(0.0113), d.compounds.first.toxicities[feature.id.to_s][d.id.to_s].first + assert_equal -Math.log10(0.00323), d.compounds[5].toxicities[feature.id.to_s][d.id.to_s].first d2 = Dataset.find d.id - assert_equal 0.0113, d2.compounds[0].toxicities[feature.id.to_s].first - assert_equal 0.00323, d2.compounds[5].toxicities[feature.id.to_s].first + assert_equal -Math.log10(0.0113), d2.compounds[0].toxicities[feature.id.to_s][d.id.to_s].first + assert_equal -Math.log10(0.00323), d2.compounds[5].toxicities[feature.id.to_s][d.id.to_s].first d.delete end @@ -187,11 +186,11 @@ class DatasetTest < MiniTest::Test assert_equal 5, new_dataset.compounds.uniq.size de = new_dataset.compounds.last.toxicities fid = new_dataset.features.first.id.to_s - assert_equal ["1"], de[fid] + assert_equal ["1"], de[fid][d.id.to_s] fid = new_dataset.features.last.id.to_s - assert_equal [1.0], de[fid] + assert_equal [1.0], de[fid][d.id.to_s] fid = new_dataset.features[2].id.to_s - assert_equal ["false"], de[fid] + assert_equal ["false"], de[fid][d.id.to_s] d.delete end @@ -209,7 +208,7 @@ class DatasetTest < MiniTest::Test csv.shift csv.each do |row| c = Compound.from_smiles row.shift - assert_equal row, c.toxicities[d.feature_ids.first.to_s] + assert_equal row, c.toxicities[d.feature_ids.first.to_s][d.id.to_s] end d.delete end @@ -254,7 +253,7 @@ class DatasetTest < MiniTest::Test p row p c.toxicities p d.feature_ids.first.to_s - assert_equal row, c.toxicities[d.feature_ids.first.to_s] + assert_equal row, c.toxicities[d.feature_ids.first.to_s][d.id.to_s] end d.delete end -- cgit v1.2.3 From b2d80ad2e470fcb41af4b747142e5693f2fa4615 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 24 May 2016 13:05:53 +0200 Subject: dataset tests fixed --- test/dataset.rb | 98 +++++++++++++-------------------------------------------- 1 file changed, 21 insertions(+), 77 deletions(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index 9bb3409..7ec9973 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -30,7 +30,7 @@ class DatasetTest < MiniTest::Test csv.shift csv.each do |row| c = Compound.from_smiles row.shift - assert_equal row, c.toxicities[d.features.first.id.to_s][d.id.to_s] + assert_equal row, d.values(c,d.features.first) end d.delete end @@ -45,7 +45,7 @@ class DatasetTest < MiniTest::Test # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1 c = d.compounds[491] assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC" - assert_equal c.toxicities[d.feature_ids.first.to_s][d.id.to_s][0], "1" + assert_equal ["1"], d.values(c,d.features.first) d.delete end @@ -64,9 +64,8 @@ class DatasetTest < MiniTest::Test d = OpenTox::Dataset.from_csv_file f csv = CSV.read f assert_equal true, d.features.first.nominal - assert_equal csv.size-1-errors.size, d.compounds.size + assert_equal 1056, d.compounds.size assert_equal csv.first.size-1, d.features.size - puts d.warnings.to_yaml errors.each do |smi| refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}} end @@ -94,17 +93,13 @@ class DatasetTest < MiniTest::Test assert_equal csv.first.size-1, d.features.size assert_match "EPAFHM_log10.csv", d.source assert_equal "EPAFHM_log10", d.name - refute_nil d.warnings - #p d.warnings - #assert_equal 74, d.warnings.size feature = d.features.first assert_kind_of NumericFeature, feature - assert_match /row 13/, d.warnings.join - assert_equal -Math.log10(0.0113), d.compounds.first.toxicities[feature.id.to_s][d.id.to_s].first - assert_equal -Math.log10(0.00323), d.compounds[5].toxicities[feature.id.to_s][d.id.to_s].first + assert_equal -Math.log10(0.0113), d.values(d.compounds.first,feature).first + assert_equal -Math.log10(0.00323), d.values(d.compounds[4],feature).first d2 = Dataset.find d.id - assert_equal -Math.log10(0.0113), d2.compounds[0].toxicities[feature.id.to_s][d.id.to_s].first - assert_equal -Math.log10(0.00323), d2.compounds[5].toxicities[feature.id.to_s][d.id.to_s].first + assert_equal -Math.log10(0.0113), d2.values(d2.compounds[0],feature).first + assert_equal -Math.log10(0.00323), d2.values(d2.compounds[4],feature).first d.delete end @@ -112,11 +107,11 @@ class DatasetTest < MiniTest::Test def test_create_without_features_smiles_and_inchi ["smiles", "inchi"].each do |type| - d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv") + d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv"), true assert_equal Dataset, d.class refute_nil d.id dataset = Dataset.find d.id - assert_equal 3, d.compounds.size.to_i + assert_equal 3, d.compounds.size d.delete end end @@ -130,8 +125,8 @@ class DatasetTest < MiniTest::Test assert_operator d.compounds.size, :>=, d.compounds.uniq.size end assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size - assert_equal dataset.substance_ids.size, fold.first.substance_ids.size + fold.last.substance_ids.size - assert_empty (fold.first.substance_ids & fold.last.substance_ids) + assert_equal dataset.substances.size, fold.first.substances.size + fold.last.substances.size + assert_empty (fold.first.substances & fold.last.substances) end end @@ -184,13 +179,13 @@ class DatasetTest < MiniTest::Test # get features assert_equal 6, new_dataset.features.size assert_equal 5, new_dataset.compounds.uniq.size - de = new_dataset.compounds.last.toxicities - fid = new_dataset.features.first.id.to_s - assert_equal ["1"], de[fid][d.id.to_s] - fid = new_dataset.features.last.id.to_s - assert_equal [1.0], de[fid][d.id.to_s] - fid = new_dataset.features[2].id.to_s - assert_equal ["false"], de[fid][d.id.to_s] + c = new_dataset.compounds.last + f = new_dataset.features.first + assert_equal ["1"], new_dataset.values(c,f) + f = new_dataset.features.last.id.to_s + assert_equal [1.0], new_dataset.values(c,f) + f = new_dataset.features[2] + assert_equal ["false"], new_dataset.values(c,f) d.delete end @@ -208,7 +203,7 @@ class DatasetTest < MiniTest::Test csv.shift csv.each do |row| c = Compound.from_smiles row.shift - assert_equal row, c.toxicities[d.feature_ids.first.to_s][d.id.to_s] + assert_equal row, d.values(c,d.features.first) end d.delete end @@ -217,7 +212,7 @@ class DatasetTest < MiniTest::Test def test_from_csv2 File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") } dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv" - assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join + assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.", dataset.warnings.join File.delete "#{DATA_DIR}/temp_test.csv" dataset.features.each{|f| feature = Feature.find f.id; feature.delete} dataset.delete @@ -251,9 +246,7 @@ class DatasetTest < MiniTest::Test csv.each do |row| c = Compound.from_smiles(row.shift) p row - p c.toxicities - p d.feature_ids.first.to_s - assert_equal row, c.toxicities[d.feature_ids.first.to_s][d.id.to_s] + assert_equal row, d.values(c,d.features.first) end d.delete end @@ -287,54 +280,5 @@ class DatasetTest < MiniTest::Test assert_nil Dataset.find d.id end - def test_client_create - skip - d = Dataset.new - assert_equal Dataset, d.class - d.name = "Create dataset test" - - # add data entries - features = ["test1", "test2"].collect do |title| - f = Feature.new - f.name = title - f.numeric = true - f.save - f - end - - # manual low-level insertions without consistency checks for runtime efficiency - compounds = ["c1ccccc1NN", "CC(C)N", "C1C(C)CCCC1"].collect do |smi| - Compound.from_smiles smi - end - data_entries = [] - data_entries << [1,2] - data_entries << [4,5] - data_entries << [6,7] - compounds.each_with_index do |c,i| - features.each_with_index do |f,j| - d.substance_ids << c.id - d.feature_ids << f.id - c.toxicities[f.id.to_s] = data_entries[i][j] - end - end - - assert_equal 3, d.compounds.size - assert_equal 2, d.features.size - #assert_equal [[1,2],[4,5],[6,7]], d.data_entries - d.save - # check if dataset has been saved correctly - new_dataset = Dataset.find d.id - assert_equal 3, new_dataset.compounds.size - assert_equal 2, new_dataset.features.size - new_dataset.compounds.each_with_index do |c,i| - new_dataset.features.each_with_index do |f,j| - assert_equal data_entries[i][j], c.toxicities[f.id.to_s].first - end - end - d.delete - assert_nil Dataset.find d.id - assert_nil Dataset.find new_dataset.id - end - end -- cgit v1.2.3 From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 27 May 2016 19:16:16 +0200 Subject: first correlation of nanoparticle predictions --- test/dataset.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index 7ec9973..e59441b 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -154,6 +154,7 @@ class DatasetTest < MiniTest::Test c = Compound.from_smiles row.shift serialized[c.inchi] = row end + #puts serialized.to_yaml original.each do |inchi,row| row.each_with_index do |v,i| if v.numeric? -- cgit v1.2.3 From 458a2d753551ea607f2ed5efdd0ac0a02d55d673 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 1 Jun 2016 12:46:03 +0200 Subject: all tests fixed --- test/dataset.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index e59441b..05759a7 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -246,7 +246,6 @@ class DatasetTest < MiniTest::Test csv.shift csv.each do |row| c = Compound.from_smiles(row.shift) - p row assert_equal row, d.values(c,d.features.first) end d.delete -- cgit v1.2.3 From 398a59885845a49cfda4b37b7058f8a47d11c6d2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 7 Oct 2016 13:34:22 +0200 Subject: remaining tests fixed --- test/dataset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index 05759a7..2c0aa01 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -63,7 +63,7 @@ class DatasetTest < MiniTest::Test f = File.join DATA_DIR, "multi_cell_call.csv" d = OpenTox::Dataset.from_csv_file f csv = CSV.read f - assert_equal true, d.features.first.nominal + assert_equal true, d.features.first.nominal? assert_equal 1056, d.compounds.size assert_equal csv.first.size-1, d.features.size errors.each do |smi| -- cgit v1.2.3 From 09452bba5c407c27721223d126e3f45c12b20a0c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Oct 2016 22:59:45 +0200 Subject: tests pass --- test/dataset.rb | 3 --- 1 file changed, 3 deletions(-) (limited to 'test/dataset.rb') diff --git a/test/dataset.rb b/test/dataset.rb index 2c0aa01..e91e65a 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -231,10 +231,7 @@ class DatasetTest < MiniTest::Test datasets.each{|d| d.delete} end - # skips, may be removed in the future - def test_simultanous_upload - skip threads = [] 3.times do |t| threads << Thread.new(t) do |up| -- cgit v1.2.3