summaryrefslogtreecommitdiff
path: root/lib/dataset.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r--lib/dataset.rb41
1 files changed, 38 insertions, 3 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 95c1918..c916722 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -197,7 +197,12 @@ module OpenTox
accept_values
end
- # Detect feature type(s) in the dataset
+ # Detect feature type (reduced to one across all features)
+ # Classification takes precedence over regression
+ # DEPRECATED --
+ # HAS NO SENSE FOR DATASETS WITH MORE THAN 1 FEATURE
+ # FEATURES CAN HAVE MULTIPLE TYPES
+ # Replacement: see feature_types()
# @return [String] `classification", "regression", "mixed" or unknown`
def feature_type(subjectid=nil)
load_features(subjectid)
@@ -210,6 +215,24 @@ module OpenTox
"unknown"
end
end
+
+
+ # Detect feature types. A feature can have multiple types.
+ # Returns types hashed by feature URI, with missing features omitted.
+ # Example (YAML):
+ # http://toxcreate3.in-silico.ch:8082/dataset/152/feature/nHal:
+ # - http://www.opentox.org/api/1.1#NumericFeature
+ # - http://www.opentox.org/api/1.1#NominalFeature
+ # ...
+ #
+ # @return [Hash] Keys: feature URIs, Values: Array of types
+ def feature_types(subjectid=nil)
+ load_features(subjectid)
+ @features.inject({}){ |h,(f,metadata)|
+ h[f]=metadata[RDF.type] unless metadata[RDF.type][0].include? "MissingFeature"
+ h
+ }
+ end
=begin
=end
@@ -316,11 +339,14 @@ module OpenTox
end
# Complete feature values by adding zeroes
- def complete_data_entries
+ # @param [Hash] key: compound, value: duplicate sizes
+ def complete_data_entries(compound_sizes)
all_features = @features.keys
@data_entries.each { |c, e|
(Set.new(all_features.collect)).subtract(Set.new e.keys).to_a.each { |f|
- self.add(c,f,0)
+ compound_sizes[c].times {
+ self.add(c,f,0)
+ }
}
}
end
@@ -454,6 +480,14 @@ module OpenTox
end
end
+ def value_map(prediction_feature_uri)
+ training_classes = accept_values(prediction_feature_uri).sort
+ value_map=Hash.new
+ training_classes.each_with_index { |c,i| value_map[i+1] = c }
+ value_map
+ end
+
+
private
# Copy a dataset (rewrites URI)
def copy(dataset)
@@ -504,6 +538,7 @@ module OpenTox
@data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
end
+
# def errors(compound)
# features = @data_entries[compound.uri].keys
# features.collect{|f| @features[f][OT.error]}.join(" ") if features