summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-08-18 18:58:25 +0200
committerChristoph Helma <helma@in-silico.ch>2019-08-18 18:58:25 +0200
commitd87996495e118223419a83dc22cbac097253be7e (patch)
tree81ef8e18177a78d1c2e5a8d9314f9aed48c4e455
parent22ff252ddcb8da087b50fc23ee22a857eb202738 (diff)
fingerprint csv export
-rw-r--r--lib/dataset.rb33
1 files changed, 31 insertions, 2 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index fb1afd2..49208ce 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -349,6 +349,30 @@ module OpenTox
# Serialisation
+ # Convert dataset into csv formatted training data
+ # @return [String]
+ def to_training_csv
+
+ export_features = merged_features
+ export_features = transformed_bioactivity_features if export_features.empty?
+ export_features = bioactivity_features if export_features.empty?
+ export_feature = export_features.first
+
+ header = ["Canonical SMILES"]
+ header << bioactivity_features.first.name # use original bioactivity name instead of long merged name
+ csv = [header]
+
+ substances.each do |substance|
+ nr_activities = values(substance,bioactivity_features.first).size
+ (0..nr_activities-1).each do |n| # new row for each value
+ row = [substance.smiles]
+ row << values(substance,export_feature)[n]
+ csv << row
+ end
+ end
+ csv.collect{|r| r.join(",")}.join("\n")
+ end
+
# Convert lazar prediction dataset to csv format
# @return [String]
def to_prediction_csv
@@ -376,16 +400,18 @@ module OpenTox
csv.collect{|r| r.join(",")}.join("\n")
end
- # Convert dataset into csv formatted training data
+ # Export fingerprints in csv format
# @return [String]
- def to_training_csv
+ def to_fingerprint_csv type=Compound::DEFAULT_FINGERPRINT
+ fingerprints = substances.collect{|s| s.fingerprints[type]}.flatten.sort.uniq
export_features = merged_features
export_features = transformed_bioactivity_features if export_features.empty?
export_features = bioactivity_features if export_features.empty?
export_feature = export_features.first
header = ["Canonical SMILES"]
+ header += fingerprints
header << bioactivity_features.first.name # use original bioactivity name instead of long merged name
csv = [header]
@@ -393,6 +419,9 @@ module OpenTox
nr_activities = values(substance,bioactivity_features.first).size
(0..nr_activities-1).each do |n| # new row for each value
row = [substance.smiles]
+ fingerprints.each do |f|
+ substance.fingerprints[type].include?(f) ? row << 1 : row << 0
+ end
row << values(substance,export_feature)[n]
csv << row
end