From 2d4ce39cb1b489e26b0d6d96026054566a4f77b9 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Tue, 30 Oct 2018 21:11:04 +0100 Subject: dataset merge --- test/dataset.rb | 31 ++++++------------------------- test/use_cases.rb | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 30 deletions(-) (limited to 'test') diff --git a/test/dataset.rb b/test/dataset.rb index 8018dd2..70d26d2 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -190,37 +190,18 @@ class DatasetTest < MiniTest::Test assert_equal d.id.to_s, copy.source end - def test_map - skip - d = Dataset.from_csv_file("#{DATA_DIR}/hamster_carcinogenicity.csv") - assert_equal 1, d.bioactivity_features.size - map = {"true" => "carcinogen", "false" => "non-carcinogen"} - mapped = d.map(d.bioactivity_features.first, map) - c = d.compounds.sample - assert_equal d.values(c,d.bioactivity_features.first).collect{|v| map[v]}, mapped.values(c,mapped.transformed_bioactivity_features.first) - assert_equal d.values(c,d.original_id_features.first), mapped.values(c,mapped.original_id_features.first) - assert_equal d.bioactivity_features.first.name, mapped.bioactivity_features.first.name - assert_equal ["carcinogen","non-carcinogen"], mapped.transformed_bioactivity_features.first.accept_values - end - def test_merge - skip kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" - #p "mapping hansen" - #hansen_mapped = hansen.map hansen.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"} - #p "mapping efsa" - #efsa_mapped = efsa.map efsa.bioactivity_features.first, {"1" => "mutagen", "0" => "nonmutagen"} - #datasets = [kazius,hansen_mapped,efsa_mapped] datasets = [kazius,hansen,efsa] - d = Dataset.merge datasets#, datasets.collect{|d| d.bioactivity_features}.flatten.uniq - assert_equal 8281, d.compounds.size + map = {"1" => "mutagen", "0" => "nonmutagen"} + dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: true, remove_duplicates: false + assert_equal 8281, dataset.compounds.size + assert_equal 9, dataset.features.size c = Compound.from_smiles("C/C=C/C=O") - assert_equal ["mutagen"], d.values(c,d.bioactivity_features.first) - assert_equal datasets.collect{|d| d.id.to_s}.join(", "), d.source - assert_equal 8, d.features.size - File.open("tmp.csv","w+"){|f| f.puts d.to_csv} + assert_equal ["mutagen"], dataset.values(c,dataset.merged_features.first) + #File.open("tmp.csv","w+"){|f| f.puts d.to_csv} end # serialisation diff --git a/test/use_cases.rb b/test/use_cases.rb index 15e65a3..4959f16 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,18 +3,25 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA - skip kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" datasets = [kazius,hansen,efsa] - map = {"true" => "carcinogen", "false" => "non-carcinogen"} + map = {"1" => "mutagen", "0" => "nonmutagen"} + p "merging" training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true - model = Model::Validation.create training_dataset: training_dataset, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + assert_equal 8281, training_dataset.compounds.size + p training_dataset.features.size + p training_dataset.id + training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') + p "create model_validation" + model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + p model_validation.id + p "predict" pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" - prediction_dataset = model.predict pa + prediction_dataset = model_dataset.predict pa + p prediction_dataset.id puts prediction_dataset.to_csv - assert_equal 8281, d.compounds.size end def test_public_models -- cgit v1.2.3