summaryrefslogtreecommitdiff
path: root/lib/dataset.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r--lib/dataset.rb75
1 files changed, 51 insertions, 24 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index df17569..596c53c 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -96,8 +96,14 @@ module OpenTox
# Get nominal and numeric prediction features
# @return [Array<OpenTox::NominalLazarPrediction,OpenTox::NumericLazarPrediction>]
- def prediction_features
- features.select{|f| f._type.match("Prediction")}
+ def prediction_feature
+ features.select{|f| f._type.match(/Prediction$/)}.first
+ end
+
+ # Get supporting nominal and numeric prediction features (class probabilities, prediction interval)
+ # @return [Array<OpenTox::LazarPredictionProbability,OpenTox::LazarPredictionInterval>]
+ def prediction_supporting_features
+ features.select{|f| f.is_a?(LazarPredictionProbability) or f.is_a?(LazarPredictionInterval)}
end
# Get nominal and numeric merged features
@@ -259,7 +265,7 @@ module OpenTox
feature_names = table.shift.collect{|f| f.strip}
raise ArgumentError, "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size
- if feature_names[0] =~ /ID/i # check ID column
+ if feature_names[0] !~ /SMILES|InChI/i # check ID column
original_id = OriginalId.find_or_create_by(:dataset_id => self.id,:name => feature_names.shift)
else
original_id = OriginalId.find_or_create_by(:dataset_id => self.id,:name => "LineID")
@@ -343,30 +349,52 @@ module OpenTox
# Serialisation
- # Convert dataset to csv format
+ # Convert lazar prediction dataset to csv format
# @return [String]
- def to_csv #inchi=false
- CSV.generate() do |csv|
-
- compound = substances.first.is_a? Compound
- f = features - original_id_features - original_smiles_features - warnings_features
- header = original_id_features.collect{|f| "ID "+Dataset.find(f.dataset_id).name}
- header += original_smiles_features.collect{|f| "SMILES "+Dataset.find(f.dataset_id).name} if compound
- compound ? header << "Canonical SMILES" : header << "Name"
- header += f.collect{|f| f.name}
- header += warnings_features.collect{|f| "Warnings "+Dataset.find(f.dataset_id).name}
- csv << header
-
- substances.each do |substance|
- row = original_id_features.collect{|f| values(substance,f).join(" ")}
- row += original_smiles_features.collect{|f| values(substance,f).join(" ")} if compound
- compound ? row << substance.smiles : row << substance.name
- row += f.collect{|f| values(substance,f).join(" ")}
- row += warnings_features.collect{|f| values(substance,f).uniq.join(" ")}
+ def to_prediction_csv
+
+ compound = substances.first.is_a? Compound
+ header = ["ID"]
+ header << "Original SMILES" if compound
+ compound ? header << "Canonical SMILES" : header << "Name"
+ header << "Prediction" if prediction_feature
+ header << "Confidence" if confidence_feature
+ header += prediction_supporting_features.collect{|f| f.name}
+ header << "Measurements"
+ csv = [header]
+
+ substances.each do |substance|
+ row = original_id_features.collect{|f| values(substance,f).join(" ")}
+ row += original_smiles_features.collect{|f| values(substance,f).join(" ")} if compound
+ compound ? row << substance.smiles : row << substance.name
+ row << values(substance,prediction_feature).join(" ")
+ row << values(substance,confidence_feature).join(" ")
+ row += prediction_supporting_features.collect{|f| values(substance,f).join(" ")}
+ row << values(substance,bioactivity_features[0]).join(" ")
+ csv << row
+ end
+ csv.collect{|r| r.join(",")}.join("\n")
+ end
+
+ # Convert dataset into csv formatted training data
+ # @return [String]
+ def to_training_csv
+
+ p features
+ p bioactivity_features
+ header = ["Canonical SMILES"]
+ header << bioactivity_features[0].name
+ csv = [header]
+
+ substances.each do |substance|
+ nr_activities = values(substance,bioactivity_features.first).size
+ (0..nr_activities-1).each do |n| # new row for each value
+ row = [substance.smiles]
+ row << values(substance,bioactivity_features[0])[n]
csv << row
end
-
end
+ csv.collect{|r| r.join(",")}.join("\n")
end
# Convert dataset to SDF format
@@ -396,7 +424,6 @@ module OpenTox
predictions = {}
substances.each do |s|
predictions[s] ||= {}
- prediction_feature = prediction_features.first
predictions[s][:value] = values(s,prediction_feature).first
#predictions[s][:warnings] = []
#warnings_features.each { |w| predictions[s][:warnings] += values(s,w) }