diff options
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r-- | lib/dataset.rb | 50 |
1 files changed, 40 insertions, 10 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb index fb1afd2..7037679 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -349,6 +349,30 @@ module OpenTox # Serialisation + # Convert dataset into csv formatted training data + # @return [String] + def to_training_csv + + export_features = merged_features + export_features = transformed_bioactivity_features if export_features.empty? + export_features = bioactivity_features if export_features.empty? + export_feature = export_features.first + + header = ["Canonical SMILES"] + header << bioactivity_features.first.name # use original bioactivity name instead of long merged name + csv = [header] + + substances.each do |substance| + nr_activities = values(substance,bioactivity_features.first).size + (0..nr_activities-1).each do |n| # new row for each value + row = [substance.smiles] + row << values(substance,export_feature)[n] + csv << row + end + end + csv.collect{|r| r.join(",")}.join("\n") + end + # Convert lazar prediction dataset to csv format # @return [String] def to_prediction_csv @@ -376,16 +400,18 @@ module OpenTox csv.collect{|r| r.join(",")}.join("\n") end - # Convert dataset into csv formatted training data + # Export fingerprints in csv format # @return [String] - def to_training_csv + def to_fingerprint_csv type=Compound::DEFAULT_FINGERPRINT + fingerprints = substances.collect{|s| s.fingerprints[type]}.flatten.sort.uniq export_features = merged_features export_features = transformed_bioactivity_features if export_features.empty? export_features = bioactivity_features if export_features.empty? export_feature = export_features.first header = ["Canonical SMILES"] + header += fingerprints header << bioactivity_features.first.name # use original bioactivity name instead of long merged name csv = [header] @@ -393,6 +419,9 @@ module OpenTox nr_activities = values(substance,bioactivity_features.first).size (0..nr_activities-1).each do |n| # new row for each value row = [substance.smiles] + fingerprints.each do |f| + substance.fingerprints[type].include?(f) ? row << 1 : row << 0 + end row << values(substance,export_feature)[n] csv << row end @@ -403,19 +432,20 @@ module OpenTox # Convert dataset to SDF format # @return [String] SDF string def to_sdf + + export_features = merged_features + export_features = transformed_bioactivity_features if export_features.empty? + export_features = bioactivity_features if export_features.empty? + export_feature = export_features.first + sdf = "" compounds.each do |compound| sdf_lines = compound.sdf.sub(/\$\$\$\$\n/,"").split("\n") sdf_lines[0] = compound.smiles sdf += sdf_lines.join("\n") - bioactivity_features.each do |f| - v = values(compound,f) - unless v.empty? - sdf += "\n> <#{f.name}>\n" - sdf += v.uniq.join "," - sdf += "\n" - end - end + sdf += "\n> <#{export_feature.name}>\n" + sdf += values(compound,export_feature).uniq.join "," + sdf += "\n" sdf += "\n$$$$\n" end sdf |