summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/dataset.rb50
1 files changed, 40 insertions, 10 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index fb1afd2..7037679 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -349,6 +349,30 @@ module OpenTox
# Serialisation
+ # Convert dataset into csv formatted training data
+ # @return [String]
+ def to_training_csv
+
+ export_features = merged_features
+ export_features = transformed_bioactivity_features if export_features.empty?
+ export_features = bioactivity_features if export_features.empty?
+ export_feature = export_features.first
+
+ header = ["Canonical SMILES"]
+ header << bioactivity_features.first.name # use original bioactivity name instead of long merged name
+ csv = [header]
+
+ substances.each do |substance|
+ nr_activities = values(substance,bioactivity_features.first).size
+ (0..nr_activities-1).each do |n| # new row for each value
+ row = [substance.smiles]
+ row << values(substance,export_feature)[n]
+ csv << row
+ end
+ end
+ csv.collect{|r| r.join(",")}.join("\n")
+ end
+
# Convert lazar prediction dataset to csv format
# @return [String]
def to_prediction_csv
@@ -376,16 +400,18 @@ module OpenTox
csv.collect{|r| r.join(",")}.join("\n")
end
- # Convert dataset into csv formatted training data
+ # Export fingerprints in csv format
# @return [String]
- def to_training_csv
+ def to_fingerprint_csv type=Compound::DEFAULT_FINGERPRINT
+ fingerprints = substances.collect{|s| s.fingerprints[type]}.flatten.sort.uniq
export_features = merged_features
export_features = transformed_bioactivity_features if export_features.empty?
export_features = bioactivity_features if export_features.empty?
export_feature = export_features.first
header = ["Canonical SMILES"]
+ header += fingerprints
header << bioactivity_features.first.name # use original bioactivity name instead of long merged name
csv = [header]
@@ -393,6 +419,9 @@ module OpenTox
nr_activities = values(substance,bioactivity_features.first).size
(0..nr_activities-1).each do |n| # new row for each value
row = [substance.smiles]
+ fingerprints.each do |f|
+ substance.fingerprints[type].include?(f) ? row << 1 : row << 0
+ end
row << values(substance,export_feature)[n]
csv << row
end
@@ -403,19 +432,20 @@ module OpenTox
# Convert dataset to SDF format
# @return [String] SDF string
def to_sdf
+
+ export_features = merged_features
+ export_features = transformed_bioactivity_features if export_features.empty?
+ export_features = bioactivity_features if export_features.empty?
+ export_feature = export_features.first
+
sdf = ""
compounds.each do |compound|
sdf_lines = compound.sdf.sub(/\$\$\$\$\n/,"").split("\n")
sdf_lines[0] = compound.smiles
sdf += sdf_lines.join("\n")
- bioactivity_features.each do |f|
- v = values(compound,f)
- unless v.empty?
- sdf += "\n> <#{f.name}>\n"
- sdf += v.uniq.join ","
- sdf += "\n"
- end
- end
+ sdf += "\n> <#{export_feature.name}>\n"
+ sdf += values(compound,export_feature).uniq.join ","
+ sdf += "\n"
sdf += "\n$$$$\n"
end
sdf