summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-30 21:11:04 +0100
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-30 21:11:04 +0100
commit2d4ce39cb1b489e26b0d6d96026054566a4f77b9 (patch)
tree84de198d7c9113fbefef97ef4aa8acd5594b4129 /test
parentd61f78093f4ddf03c27a2c8ae0bab9c1f10c80f5 (diff)
dataset merge
Diffstat (limited to 'test')
-rw-r--r--test/dataset.rb31
-rw-r--r--test/use_cases.rb17
2 files changed, 18 insertions, 30 deletions
diff --git a/test/dataset.rb b/test/dataset.rb
index 8018dd2..70d26d2 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -190,37 +190,18 @@ class DatasetTest < MiniTest::Test
assert_equal d.id.to_s, copy.source
end
- def test_map
- skip
- d = Dataset.from_csv_file("#{DATA_DIR}/hamster_carcinogenicity.csv")
- assert_equal 1, d.bioactivity_features.size
- map = {"true" => "carcinogen", "false" => "non-carcinogen"}
- mapped = d.map(d.bioactivity_features.first, map)
- c = d.compounds.sample
- assert_equal d.values(c,d.bioactivity_features.first).collect{|v| map[v]}, mapped.values(c,mapped.transformed_bioactivity_features.first)
- assert_equal d.values(c,d.original_id_features.first), mapped.values(c,mapped.original_id_features.first)
- assert_equal d.bioactivity_features.first.name, mapped.bioactivity_features.first.name
- assert_equal ["carcinogen","non-carcinogen"], mapped.transformed_bioactivity_features.first.accept_values
- end
-
def test_merge
- skip
kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
- #p "mapping hansen"
- #hansen_mapped = hansen.map hansen.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
- #p "mapping efsa"
- #efsa_mapped = efsa.map efsa.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"}
- #datasets = [kazius,hansen_mapped,efsa_mapped]
datasets = [kazius,hansen,efsa]
- d = Dataset.merge datasets#, datasets.collect{|d| d.bioactivity_features}.flatten.uniq
- assert_equal 8281, d.compounds.size
+ map = {"1" => "mutagen", "0" => "nonmutagen"}
+ dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: true, remove_duplicates: false
+ assert_equal 8281, dataset.compounds.size
+ assert_equal 9, dataset.features.size
c = Compound.from_smiles("C/C=C/C=O")
- assert_equal ["mutagen"], d.values(c,d.bioactivity_features.first)
- assert_equal datasets.collect{|d| d.id.to_s}.join(", "), d.source
- assert_equal 8, d.features.size
- File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
+ assert_equal ["mutagen"], dataset.values(c,dataset.merged_features.first)
+ #File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
end
# serialisation
diff --git a/test/use_cases.rb b/test/use_cases.rb
index 15e65a3..4959f16 100644
--- a/test/use_cases.rb
+++ b/test/use_cases.rb
@@ -3,18 +3,25 @@ require_relative "setup.rb"
class UseCasesTest < MiniTest::Test
def test_PA
- skip
kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
datasets = [kazius,hansen,efsa]
- map = {"true" => "carcinogen", "false" => "non-carcinogen"}
+ map = {"1" => "mutagen", "0" => "nonmutagen"}
+ p "merging"
training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true
- model = Model::Validation.create training_dataset: training_dataset, species: "Salmonella typhimurium", endpoint: "Mutagenicity"
+ assert_equal 8281, training_dataset.compounds.size
+ p training_dataset.features.size
+ p training_dataset.id
+ training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b')
+ p "create model_validation"
+ model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity"
+ p model_validation.id
+ p "predict"
pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf"
- prediction_dataset = model.predict pa
+ prediction_dataset = model_dataset.predict pa
+ p prediction_dataset.id
puts prediction_dataset.to_csv
- assert_equal 8281, d.compounds.size
end
def test_public_models