summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-06-21 10:45:59 +0200
committerChristoph Helma <helma@in-silico.ch>2019-06-21 10:45:59 +0200
commit7aac1c36369b41501edfc261e4f7ad77dec6b2a1 (patch)
tree19f8578f5cd9009178e7ab7becaaf8c91a2e4703
parent455da06aa6459da0d25b286ca6cb866ff64c4c34 (diff)
test_from_csv2 fixed, prefer merged_feature and transformed_feature in to_training_csv
-rw-r--r--lib/dataset.rb15
-rw-r--r--test/dataset.rb10
2 files changed, 15 insertions, 10 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 596c53c..fb1afd2 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -107,7 +107,7 @@ module OpenTox
end
# Get nominal and numeric merged features
- # @return [Array<OpenTox::NominalLazarPrediction,OpenTox::NumericLazarPrediction>]
+ # @return [Array<OpenTox::MergedNominalBioActivity,OpenTox::MergedNumericBioActivity>]
def merged_features
features.select{|f| f._type.match("Merged")}
end
@@ -380,17 +380,20 @@ module OpenTox
# @return [String]
def to_training_csv
- p features
- p bioactivity_features
+ export_features = merged_features
+ export_features = transformed_bioactivity_features if export_features.empty?
+ export_features = bioactivity_features if export_features.empty?
+ export_feature = export_features.first
+
header = ["Canonical SMILES"]
- header << bioactivity_features[0].name
+ header << bioactivity_features.first.name # use original bioactivity name instead of long merged name
csv = [header]
substances.each do |substance|
nr_activities = values(substance,bioactivity_features.first).size
(0..nr_activities-1).each do |n| # new row for each value
row = [substance.smiles]
- row << values(substance,bioactivity_features[0])[n]
+ row << values(substance,export_feature)[n]
csv << row
end
end
@@ -502,7 +505,7 @@ module OpenTox
merged_feature = nil
if feature_classes.size == 1
if features.first.kind_of? NominalFeature
- merged_feature = MergedNominalBioActivity.find_or_create_by(:name => features.collect{|f| f.name}.uniq.join(", ") + " merged", :original_feature_ids => features.collect{|f| f.id}, :transformations => value_maps)
+ merged_feature = MergedNominalBioActivity.find_or_create_by(:name => features.collect{|f| f.name}.uniq.join(" and ") + " merged", :original_feature_ids => features.collect{|f| f.id}, :transformations => value_maps)
else
merged_feature = MergedNumericBioActivity.find_or_create_by(:name => features.collect{|f| f.name} + " merged", :original_feature_ids => features.collect{|f| f.id}) # TODO: regression transformations
end
diff --git a/test/dataset.rb b/test/dataset.rb
index b978512..cee958a 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -189,6 +189,7 @@ class DatasetTest < MiniTest::Test
# serialisation
def test_to_csv
+ skip "to_csv was substituted with to_training_csv and to_prediction_csv"
d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
csv = CSV.parse(d.to_csv)
assert_equal "3 5", csv[3][0]
@@ -260,10 +261,11 @@ class DatasetTest < MiniTest::Test
end
def test_from_csv2
- File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
- dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
- assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.", dataset.warnings.last
- File.delete "#{DATA_DIR}/temp_test.csv"
+ csv = File.join DATA_DIR,"temp_test.csv"
+ File.open(csv, "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
+ dataset = Dataset.from_csv_file csv
+ assert_equal "Cannot parse SMILES compound '' at line 3 of #{csv}, all entries are ignored.", dataset.warnings.last
+ File.delete csv
end
def test_same_feature