test_from_csv2 fixed, prefer merged_feature and transformed_feature in to_training_csv
authorChristoph Helma <helma@in-silico.ch>
Fri, 21 Jun 2019 08:45:59 +0000 (10:45 +0200)
committerChristoph Helma <helma@in-silico.ch>
Fri, 21 Jun 2019 08:45:59 +0000 (10:45 +0200)
lib/dataset.rb
test/dataset.rb

index 596c53c..fb1afd2 100644 (file)
@@ -107,7 +107,7 @@ module OpenTox
     end
 
     # Get nominal and numeric merged features
-    # @return [Array<OpenTox::NominalLazarPrediction,OpenTox::NumericLazarPrediction>]
+    # @return [Array<OpenTox::MergedNominalBioActivity,OpenTox::MergedNumericBioActivity>]
     def merged_features
       features.select{|f| f._type.match("Merged")}
     end
@@ -380,17 +380,20 @@ module OpenTox
     # @return [String]
     def to_training_csv 
       
-      p features
-      p bioactivity_features
+      export_features = merged_features
+      export_features = transformed_bioactivity_features if export_features.empty? 
+      export_features = bioactivity_features if export_features.empty? 
+      export_feature = export_features.first
+
       header = ["Canonical SMILES"]
-      header << bioactivity_features[0].name
+      header << bioactivity_features.first.name # use original bioactivity name instead of long merged name
       csv = [header]
 
       substances.each do |substance|
         nr_activities = values(substance,bioactivity_features.first).size
         (0..nr_activities-1).each do |n| # new row for each value
           row = [substance.smiles]
-          row << values(substance,bioactivity_features[0])[n] 
+          row << values(substance,export_feature)[n] 
           csv << row
         end
       end
@@ -502,7 +505,7 @@ module OpenTox
       merged_feature = nil
       if feature_classes.size == 1
         if features.first.kind_of? NominalFeature
-          merged_feature = MergedNominalBioActivity.find_or_create_by(:name => features.collect{|f| f.name}.uniq.join(", ") + " merged", :original_feature_ids => features.collect{|f| f.id}, :transformations => value_maps)
+          merged_feature = MergedNominalBioActivity.find_or_create_by(:name => features.collect{|f| f.name}.uniq.join(" and ") + " merged", :original_feature_ids => features.collect{|f| f.id}, :transformations => value_maps)
         else
           merged_feature = MergedNumericBioActivity.find_or_create_by(:name => features.collect{|f| f.name} + " merged", :original_feature_ids => features.collect{|f| f.id}) # TODO: regression transformations 
         end
index b978512..cee958a 100644 (file)
@@ -189,6 +189,7 @@ class DatasetTest < MiniTest::Test
   # serialisation
 
   def test_to_csv
+    skip "to_csv was substituted with to_training_csv and to_prediction_csv"
     d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
     csv = CSV.parse(d.to_csv)
     assert_equal "3 5", csv[3][0]
@@ -260,10 +261,11 @@ class DatasetTest < MiniTest::Test
   end
 
   def test_from_csv2
-    File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
-    dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
-    assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.",  dataset.warnings.last
-    File.delete "#{DATA_DIR}/temp_test.csv"
+    csv = File.join DATA_DIR,"temp_test.csv"
+    File.open(csv, "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
+    dataset = Dataset.from_csv_file csv
+    assert_equal "Cannot parse SMILES compound '' at line 3 of #{csv}, all entries are ignored.",  dataset.warnings.last
+    File.delete csv
   end
 
   def test_same_feature