From 7aac1c36369b41501edfc261e4f7ad77dec6b2a1 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 21 Jun 2019 10:45:59 +0200 Subject: test_from_csv2 fixed, prefer merged_feature and transformed_feature in to_training_csv --- lib/dataset.rb | 15 +++++++++------ test/dataset.rb | 10 ++++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 596c53c..fb1afd2 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -107,7 +107,7 @@ module OpenTox end # Get nominal and numeric merged features - # @return [Array] + # @return [Array] def merged_features features.select{|f| f._type.match("Merged")} end @@ -380,17 +380,20 @@ module OpenTox # @return [String] def to_training_csv - p features - p bioactivity_features + export_features = merged_features + export_features = transformed_bioactivity_features if export_features.empty? + export_features = bioactivity_features if export_features.empty? + export_feature = export_features.first + header = ["Canonical SMILES"] - header << bioactivity_features[0].name + header << bioactivity_features.first.name # use original bioactivity name instead of long merged name csv = [header] substances.each do |substance| nr_activities = values(substance,bioactivity_features.first).size (0..nr_activities-1).each do |n| # new row for each value row = [substance.smiles] - row << values(substance,bioactivity_features[0])[n] + row << values(substance,export_feature)[n] csv << row end end @@ -502,7 +505,7 @@ module OpenTox merged_feature = nil if feature_classes.size == 1 if features.first.kind_of? NominalFeature - merged_feature = MergedNominalBioActivity.find_or_create_by(:name => features.collect{|f| f.name}.uniq.join(", ") + " merged", :original_feature_ids => features.collect{|f| f.id}, :transformations => value_maps) + merged_feature = MergedNominalBioActivity.find_or_create_by(:name => features.collect{|f| f.name}.uniq.join(" and ") + " merged", :original_feature_ids => features.collect{|f| f.id}, :transformations => value_maps) else merged_feature = MergedNumericBioActivity.find_or_create_by(:name => features.collect{|f| f.name} + " merged", :original_feature_ids => features.collect{|f| f.id}) # TODO: regression transformations end diff --git a/test/dataset.rb b/test/dataset.rb index b978512..cee958a 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -189,6 +189,7 @@ class DatasetTest < MiniTest::Test # serialisation def test_to_csv + skip "to_csv was substituted with to_training_csv and to_prediction_csv" d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv" csv = CSV.parse(d.to_csv) assert_equal "3 5", csv[3][0] @@ -260,10 +261,11 @@ class DatasetTest < MiniTest::Test end def test_from_csv2 - File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") } - dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv" - assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.", dataset.warnings.last - File.delete "#{DATA_DIR}/temp_test.csv" + csv = File.join DATA_DIR,"temp_test.csv" + File.open(csv, "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") } + dataset = Dataset.from_csv_file csv + assert_equal "Cannot parse SMILES compound '' at line 3 of #{csv}, all entries are ignored.", dataset.warnings.last + File.delete csv end def test_same_feature -- cgit v1.2.3