summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-25 18:58:19 +0200
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-25 18:58:19 +0200
commit15f4ad23eb918a91d52779887ccfb51bc6547f1b (patch)
treeace5a31be2d7e0155c1f3e5e292bbe7c3a9c2084 /test
parent24e5f9cc16ba164f860620184dc39b024bc3d384 (diff)
dataset merge
Diffstat (limited to 'test')
-rw-r--r--test/dataset.rb44
-rw-r--r--test/use_cases.rb50
2 files changed, 66 insertions, 28 deletions
diff --git a/test/dataset.rb b/test/dataset.rb
index 5a620dd..0beea2d 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -1,5 +1,3 @@
-# batch class
-
require_relative "setup.rb"
class DatasetTest < MiniTest::Test
@@ -123,8 +121,6 @@ class DatasetTest < MiniTest::Test
csv = CSV.read f
assert_equal csv.size-1, d.compounds.size
assert_equal csv.first.size+1, d.features.size
- # TODO fix csv output (headers, column order)
- #puts d.to_csv
end
def test_import_epafhm
@@ -197,48 +193,40 @@ class DatasetTest < MiniTest::Test
mapped = d.map(d.bioactivity_features.first, map)
c = d.compounds.sample
assert_equal d.values(c,d.bioactivity_features.first).collect{|v| map[v]}, mapped.values(c,mapped.transformed_bioactivity_features.first)
- assert_equal d.original_id(c), mapped.original_id(c)
+ assert_equal d.values(c,d.original_id_features.first), mapped.values(c,mapped.original_id_features.first)
assert_equal d.bioactivity_features.first.name, mapped.bioactivity_features.first.name
assert_equal ["carcinogen","non-carcinogen"], mapped.transformed_bioactivity_features.first.accept_values
end
def test_merge
- skip
kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
- hansen_mapped = hansen.map hansen.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
- efsa_mapped = efsa.map efsa.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
- datasets = [kazius,hansen_mapped,efsa_mapped]
- d = Dataset.merge datasets, datasets.collect{|d| d.bioactivity_features}.flatten.uniq
- File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
+ #p "mapping hansen"
+ #hansen_mapped = hansen.map hansen.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
+ #p "mapping efsa"
+ #efsa_mapped = efsa.map efsa.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
+ #datasets = [kazius,hansen_mapped,efsa_mapped]
+ datasets = [kazius,hansen,efsa]
+ d = Dataset.merge datasets#, datasets.collect{|d| d.bioactivity_features}.flatten.uniq
assert_equal 8281, d.compounds.size
c = Compound.from_smiles("C/C=C/C=O")
assert_equal ["mutagen"], d.values(c,d.bioactivity_features.first)
- assert_equal "/home/ist/lazar/test/data/cas_4337.sdf, /home/ist/lazar/test/data/hansen.csv, /home/ist/lazar/test/data/efsa.csv", d.source
- assert_equal 4, d.features.size
+ assert_equal datasets.collect{|d| d.id.to_s}.join(", "), d.source
+ assert_equal 8, d.features.size
+ p "serializing"
+ File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
end
# serialisation
def test_to_csv
- # TODO
- skip
d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
csv = CSV.parse(d.to_csv)
- original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv")
- header = csv.shift
- original_header = original_csv.shift.collect{|h| h.strip}
- #p header, original_header
- original_header.each_with_index do |name,i|
- name = "Original SMILES" if name == "SMILES"
- j = header.index name
- original_csv.each_with_index do |row,k|
- row.collect!{|c| c.strip}
- assert_equal csv[k][j], original_csv[k][i]
- end
- end
- d.delete
+ assert_equal "3 5", csv[3][0]
+ assert_match "3, 5", csv[3][9]
+ assert_match "Duplicate", csv[3][9]
+ assert_equal '7,c1nccc1,[N]1C=CC=C1,1,,false,,,1.0,', csv[5].join(",")
end
def test_to_sdf
diff --git a/test/use_cases.rb b/test/use_cases.rb
new file mode 100644
index 0000000..d9ae78b
--- /dev/null
+++ b/test/use_cases.rb
@@ -0,0 +1,50 @@
+require_relative "setup.rb"
+
+class UseCasesTest < MiniTest::Test
+
+ def test_PA
+ kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
+ hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
+ efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
+ datasets = [kazius,hansen,efsa]
+ training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true
+ model = Model::Validation.create training_dataset: training_dataset, species: "Salmonella typhimurium", endpoint: "Mutagenicity"
+ pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf"
+ prediction_dataset = model.predict pa
+ puts prediction_dataset.to_csv
+ assert_equal 8281, d.compounds.size
+ end
+
+ def test_public_models
+ skip
+=begin
+ #classification
+ aids = [
+ 1205, #Rodents (multiple species/sites)
+ 1208, # rat carc
+ 1199 # mouse
+ # Mutagenicity
+
+
+ 1195 #MRDD
+ 1188 #FHM
+ 1208, # rat carc td50
+ 1199 # mouse td50
+
+ # daphnia
+ # Blood Brain Barrier Penetration
+ # Lowest observed adverse effect level (LOAEL)
+
+ # 1204 estrogen receptor
+ # 1259408, # GENE-TOX
+ # 1159563 HepG2 cytotoxicity assay
+ # 588209 hepatotoxicity
+ # 1259333 cytotoxicity
+ # 1159569 HepG2 cytotoxicity counterscreen Measured in Cell-Based System Using Plate Reader - 2153-03_Inhibitor_Dose_DryPowder_Activity
+ # 2122 HTS Counterscreen for Detection of Compound Cytotoxicity in MIN6 Cells
+ # 116724 Acute toxicity determined after intravenal administration in mice
+ # 1148549 Toxicity in po dosed mouse assessed as mortality after 7 days
+=end
+
+ end
+end