diff options
author | helma@in-silico.ch <helma@in-silico.ch> | 2018-10-05 17:06:46 +0200 |
---|---|---|
committer | helma@in-silico.ch <helma@in-silico.ch> | 2018-10-05 17:06:46 +0200 |
commit | e718cf76f32fb29d6c7c3732ec82f35b0da49122 (patch) | |
tree | d72323ef501ed3122a11fbca9e2bb8b653e29f23 /test | |
parent | ea0864ae89d57839177c850e3b473f0aa5987474 (diff) |
sdf import, csv files with id column
Diffstat (limited to 'test')
-rw-r--r-- | test/data/input_53.csv | 54 | ||||
-rw-r--r-- | test/data/input_53.tsv | 54 | ||||
-rw-r--r-- | test/dataset.rb | 52 | ||||
-rw-r--r-- | test/setup.rb | 6 |
4 files changed, 163 insertions, 3 deletions
diff --git a/test/data/input_53.csv b/test/data/input_53.csv new file mode 100644 index 0000000..b213027 --- /dev/null +++ b/test/data/input_53.csv @@ -0,0 +1,54 @@ +ID,SMILES
+123-30-8,Oc1ccc(N)cc1
+68391-25-3,OC(COc1ccccc1)CNc2ccc(cc2)Cc3ccc(N)cc3
+62-53-3,Nc1ccccc1
+123-98-8,O=C(CCCCCCCC(=O)Cl)Cl
+106-51-4,O=C1C=CC(=O)C=C1
+7144-65-2,O(c1ccccc1c2ccccc2)CC3OC3
+3130-19-6,O=C(OCC1CCC2OC2(C1))CCCCC(=O)OCC3CCC4OC4(C3)
+140-95-4,O=C(NCO)NCO
+2778-42-9,O=C=NC(c1cccc(c1)C(N=C=O)(C)C)(C)C
+593-60-2,C=CBr
+75-25-2,C(Br)(Br)Br
+1852-16-0,O=C(C=C)NCOCCCC
+107-58-4,O=C(C=C)NC(C)(C)C
+592-35-8,O=C(OCCCC)N
+2426-08-6,O(CCCC)CC1OC1
+79-07-2,O=C(N)CCl
+110-75-8,O(C=C)CCCl
+67-66-3,C(Cl)(Cl)Cl
+26172-55-4,O=C1C=C(Cl)SN1C
+598-09-4,O1CC1(C)CCl
+2556-36-7,O=C=NC1CCC(N=C=O)CC1
+3271-22-5,n1c(nc(nc1OC)c2ccc3ccc4cccc5ccc2c3c45)OC
+2680-03-7,O=C(C=C)N(C)C
+13036-41-4,O=C(C=C)NCOCC
+556-52-5,OCC1OC1
+2530-83-8,O(CCC[Si](OC)(OC)OC)CC1OC1
+106-90-1,O=C(OCC1OC1)C=C
+26761-45-5,O=C(OCC1OC1)C(C)(C)CCCCCC
+122-60-1,O(c1ccccc1)CC2OC2
+2210-79-9,O(c1ccccc1C)CC2OC2
+2461-15-6,O(CC1OC1)CC(CC)CCCC
+75-02-5,FC=C
+98-01-1,O=Cc1occc1
+111-30-8,O=CCCCC=O
+107-22-2,O=CC=O
+78-84-2,O=CC(C)C
+11087-88-0,O=C(OCCCCCC(C)C)CCCCCCCC1OC1(CCCCCCCC)
+3644-11-9,O=C(C=C)NCOC
+1187-59-3,O=C(C=C)NC
+54208-63-8,O(c1ccccc1Cc3ccccc3(OCC2OC2))CC4OC4
+110-26-9,O=C(C=C)NCNC(=O)C=C
+1208-52-2,Nc1ccc(cc1)Cc2ccccc2(N)
+71033-08-4,O(c1ccc(cc1)C(c3ccc(OCC(OCC2OC2)COCCCC)cc3)(C)C)CC(OCC4OC4)COCCCC
+5165-97-9,O=C(C=C)NC(C)(C)CS(=O)(=O)O
+34813-62-2,O=C=NCCCC(C)CN=C=O
+16669-59-3,O=C(C=C)NCOCC(C)C
+80-48-8,O=S(=O)(OC)c1ccc(cc1)C
+2386-87-0,O=C(OCC1CCC2OC2(C1))C3CCC4OC4(C3)
+104-49-4,O=C=Nc1ccc(N=C=O)cc1
+103-71-9,O=C=Nc1ccccc1
+111-19-3,O=C(CCCCCCCCC(=O)Cl)Cl
+7320-37-8,O1CC1CCCCCCCCCCCCCC
+2451-62-9,O=C1N(C(=O)N(C(=O)N1CC2OC2)CC3OC3)CC4OC4
diff --git a/test/data/input_53.tsv b/test/data/input_53.tsv new file mode 100644 index 0000000..c46fdd4 --- /dev/null +++ b/test/data/input_53.tsv @@ -0,0 +1,54 @@ +Id Smiles
+123-30-8 Oc1ccc(N)cc1
+68391-25-3 OC(COc1ccccc1)CNc2ccc(cc2)Cc3ccc(N)cc3
+62-53-3 Nc1ccccc1
+123-98-8 O=C(CCCCCCCC(=O)Cl)Cl
+106-51-4 O=C1C=CC(=O)C=C1
+7144-65-2 O(c1ccccc1c2ccccc2)CC3OC3
+3130-19-6 O=C(OCC1CCC2OC2(C1))CCCCC(=O)OCC3CCC4OC4(C3)
+140-95-4 O=C(NCO)NCO
+2778-42-9 O=C=NC(c1cccc(c1)C(N=C=O)(C)C)(C)C
+593-60-2 C=CBr
+75-25-2 C(Br)(Br)Br
+1852-16-0 O=C(C=C)NCOCCCC
+107-58-4 O=C(C=C)NC(C)(C)C
+592-35-8 O=C(OCCCC)N
+2426-08-6 O(CCCC)CC1OC1
+79-07-2 O=C(N)CCl
+110-75-8 O(C=C)CCCl
+67-66-3 C(Cl)(Cl)Cl
+26172-55-4 O=C1C=C(Cl)SN1C
+598-09-4 O1CC1(C)CCl
+2556-36-7 O=C=NC1CCC(N=C=O)CC1
+3271-22-5 n1c(nc(nc1OC)c2ccc3ccc4cccc5ccc2c3c45)OC
+2680-03-7 O=C(C=C)N(C)C
+13036-41-4 O=C(C=C)NCOCC
+556-52-5 OCC1OC1
+2530-83-8 O(CCC[Si](OC)(OC)OC)CC1OC1
+106-90-1 O=C(OCC1OC1)C=C
+26761-45-5 O=C(OCC1OC1)C(C)(C)CCCCCC
+122-60-1 O(c1ccccc1)CC2OC2
+2210-79-9 O(c1ccccc1C)CC2OC2
+2461-15-6 O(CC1OC1)CC(CC)CCCC
+75-02-5 FC=C
+98-01-1 O=Cc1occc1
+111-30-8 O=CCCCC=O
+107-22-2 O=CC=O
+78-84-2 O=CC(C)C
+11087-88-0 O=C(OCCCCCC(C)C)CCCCCCCC1OC1(CCCCCCCC)
+3644-11-9 O=C(C=C)NCOC
+1187-59-3 O=C(C=C)NC
+54208-63-8 O(c1ccccc1Cc3ccccc3(OCC2OC2))CC4OC4
+110-26-9 O=C(C=C)NCNC(=O)C=C
+1208-52-2 Nc1ccc(cc1)Cc2ccccc2(N)
+71033-08-4 O(c1ccc(cc1)C(c3ccc(OCC(OCC2OC2)COCCCC)cc3)(C)C)CC(OCC4OC4)COCCCC
+5165-97-9 O=C(C=C)NC(C)(C)CS(=O)(=O)O
+34813-62-2 O=C=NCCCC(C)CN=C=O
+16669-59-3 O=C(C=C)NCOCC(C)C
+80-48-8 O=S(=O)(OC)c1ccc(cc1)C
+2386-87-0 O=C(OCC1CCC2OC2(C1))C3CCC4OC4(C3)
+104-49-4 O=C=Nc1ccc(N=C=O)cc1
+103-71-9 O=C=Nc1ccccc1
+111-19-3 O=C(CCCCCCCCC(=O)Cl)Cl
+7320-37-8 O1CC1CCCCCCCCCCCCCC
+2451-62-9 O=C1N(C(=O)N(C(=O)N1CC2OC2)CC3OC3)CC4OC4
diff --git a/test/dataset.rb b/test/dataset.rb index 055a029..11a4697 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -1,6 +1,21 @@ +# batch class + require_relative "setup.rb" class DatasetTest < MiniTest::Test + + # TODO + def test_from_pubchem + d = Dataset.from_pubchem 1190 + end + + def test_merge + skip "TODO" + end + + def test_to_sdf + skip "TODO" + end # basics @@ -21,6 +36,34 @@ class DatasetTest < MiniTest::Test # real datasets + def test_upload_csv_with_id + d = Dataset.from_csv_file "#{DATA_DIR}/input_53.csv" + assert_equal 53, d.compounds.size + assert_equal 1, d.features.size + f = d.features[0] + assert_equal "original_id", f.name + assert_equal ["123-30-8"], d.values(d.compounds.first,f) + end + + def test_upload_tsv_with_id + d = Dataset.from_csv_file "#{DATA_DIR}/input_53.tsv" + assert_equal 53, d.compounds.size + assert_equal 1, d.features.size + assert_equal 1, d.features.size + f = d.features[0] + assert_equal "original_id", f.name + assert_equal ["123-30-8"], d.values(d.compounds.first,f) + end + + def test_upload_sdf + #d = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" + d = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" + assert_equal Compound.from_smiles("C[C@H]1C(=O)O[C@@H]2CCN3[C@@H]2C(=CC3)COC(=O)[C@]([C@]1(C)O)(C)O").smiles, d.compounds.first.smiles + f = Feature.find_by(:name => "original_id") + assert_equal 35, d.features.size + assert_equal ["9415"], d.values(d.compounds.first,f) + end + def test_upload_hamster d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" assert_equal Dataset, d.class @@ -103,6 +146,15 @@ class DatasetTest < MiniTest::Test d.delete end + def test_multiple_uploads + datasets = [] + 2.times do + d = Dataset.from_csv_file("#{DATA_DIR}/hamster_carcinogenicity.csv") + datasets << d + end + assert_equal datasets[0],datasets[1] + end + # batch predictions def test_create_without_features_smiles_and_inchi diff --git a/test/setup.rb b/test/setup.rb index 4a11aa0..c4c04cb 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -5,8 +5,8 @@ require_relative '../lib/lazar.rb' include OpenTox #$mongo.database.drop #$gridfs = $mongo.database.fs # recreate GridFS indexes -PhysChem.descriptors +#PhysChem.descriptors TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first -Import::Enanomapper.import unless training_dataset +#training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first +#Import::Enanomapper.import unless training_dataset |