From d87996495e118223419a83dc22cbac097253be7e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 18 Aug 2019 18:58:25 +0200 Subject: fingerprint csv export --- lib/dataset.rb | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index fb1afd2..49208ce 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -349,6 +349,30 @@ module OpenTox # Serialisation + # Convert dataset into csv formatted training data + # @return [String] + def to_training_csv + + export_features = merged_features + export_features = transformed_bioactivity_features if export_features.empty? + export_features = bioactivity_features if export_features.empty? + export_feature = export_features.first + + header = ["Canonical SMILES"] + header << bioactivity_features.first.name # use original bioactivity name instead of long merged name + csv = [header] + + substances.each do |substance| + nr_activities = values(substance,bioactivity_features.first).size + (0..nr_activities-1).each do |n| # new row for each value + row = [substance.smiles] + row << values(substance,export_feature)[n] + csv << row + end + end + csv.collect{|r| r.join(",")}.join("\n") + end + # Convert lazar prediction dataset to csv format # @return [String] def to_prediction_csv @@ -376,16 +400,18 @@ module OpenTox csv.collect{|r| r.join(",")}.join("\n") end - # Convert dataset into csv formatted training data + # Export fingerprints in csv format # @return [String] - def to_training_csv + def to_fingerprint_csv type=Compound::DEFAULT_FINGERPRINT + fingerprints = substances.collect{|s| s.fingerprints[type]}.flatten.sort.uniq export_features = merged_features export_features = transformed_bioactivity_features if export_features.empty? export_features = bioactivity_features if export_features.empty? export_feature = export_features.first header = ["Canonical SMILES"] + header += fingerprints header << bioactivity_features.first.name # use original bioactivity name instead of long merged name csv = [header] @@ -393,6 +419,9 @@ module OpenTox nr_activities = values(substance,bioactivity_features.first).size (0..nr_activities-1).each do |n| # new row for each value row = [substance.smiles] + fingerprints.each do |f| + substance.fingerprints[type].include?(f) ? row << 1 : row << 0 + end row << values(substance,export_feature)[n] csv << row end -- cgit v1.2.3