summaryrefslogtreecommitdiff
path: root/lib/utils/shims/dataset.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/utils/shims/dataset.rb')
-rw-r--r--lib/utils/shims/dataset.rb98
1 files changed, 98 insertions, 0 deletions
diff --git a/lib/utils/shims/dataset.rb b/lib/utils/shims/dataset.rb
index 912510c..f72ff1b 100644
--- a/lib/utils/shims/dataset.rb
+++ b/lib/utils/shims/dataset.rb
@@ -21,7 +21,104 @@ module OpenTox
ds.get
ds
end
+
+ def self.exist?(uri, subjectid=nil)
+ ds = OpenTox::Dataset.new uri, subjectid
+ begin
+ ds.get_metadata
+ true
+ rescue
+ false
+ end
+ end
+
+ def split( compound_indices, feats, metadata, subjectid=nil)
+
+ raise "Dataset.split : pls give compounds as indices" if compound_indices.size==0 or !compound_indices[0].is_a?(Fixnum)
+ raise "Dataset.split : pls give features as feature objects (given: #{feats})" if feats!=nil and feats.size>0 and !feats[0].is_a?(OpenTox::Feature)
+ $logger.debug "split dataset using "+compound_indices.size.to_s+"/"+@compounds.size.to_s+" compounds"
+
+ dataset = OpenTox::Dataset.new(nil, subjectid)
+ dataset.metadata = metadata
+ dataset.features = (feats ? feats : self.features)
+ compound_indices.each do |c_idx|
+ dataset << [ self.compounds[c_idx] ] + dataset.features.each_with_index.collect{|f,f_idx| self.data_entries[c_idx][f_idx]}
+ end
+ #compound_indices.each do |c_idx|
+ # c = @compounds[c_idx]
+ # dataset.add_compound(c)
+ # if @data_entries[c]
+ # features.each do |f|
+ # if @data_entries[c][f]
+ # dataset.add_data_entry c,f,@data_entries[c][f][entry_index(c_idx)]
+ # else
+ # dataset.add_data_entry c,f,nil
+ # end
+ # end
+ # end
+ # end
+
+ dataset.put subjectid
+ dataset
+ end
+
+
+ # maps a compound-index from another dataset to a compound-index from this dataset
+ # mapping works as follows:
+ # (compound c is the compound identified by the compound-index of the other dataset)
+ # * c occurs only once in this dataset? map compound-index of other dataset to index in this dataset
+ # * c occurs >1 in this dataset?
+ # ** number of occurences is equal in both datasets? assume order is preserved(!) and map accordingly
+ # ** number of occurences is not equal in both datasets? cannot map, raise error
+ # @param [OpenTox::Dataset] dataset that should be mapped to this dataset (fully loaded)
+ # @param [Fixnum] compound_index, corresponding to dataset
+ def compound_index( dataset, compound_index )
+ unless defined?(@index_map) and @index_map[dataset.uri]
+ map = {}
+ dataset.compounds.collect{|c| c.uri}.uniq.each do |compound|
+ self_indices = compound_indices(compound)
+ next unless self_indices
+ dataset_indices = dataset.compound_indices(compound)
+ if self_indices.size==1
+ dataset_indices.size.times do |i|
+ map[dataset_indices[i]] = self_indices[0]
+ end
+ elsif self_indices.size==dataset_indices.size
+ # we do assume that the order is preseverd!
+ dataset_indices.size.times do |i|
+ map[dataset_indices[i]] = self_indices[i]
+ end
+ else
+ raise "cannot map compound #{compound} from dataset #{dataset.uri} to dataset #{uri}, "+
+ "compound occurs #{dataset_indices.size} times and #{self_indices.size} times"
+ end
+ end
+ @index_map = {} unless defined?(@index_map)
+ @index_map[dataset.uri] = map
+ end
+ @index_map[dataset.uri][compound_index]
+ end
+
+ def compound_indices( compound )
+ unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound)
+ @cmp_indices = {}
+ @compounds.size.times do |i|
+ c = @compounds[i].uri
+ if @cmp_indices[c]==nil
+ @cmp_indices[c] = [i]
+ else
+ @cmp_indices[c] = @cmp_indices[c]+[i]
+ end
+ end
+ end
+ @cmp_indices[compound]
+ end
+
+ def data_entry_value(compound_index, feature_uri)
+ build_feature_positions unless @feature_positions
+ @data_entries[compound_index][@feature_positions[feature_uri]]
+ end
### Index Structures
@@ -30,6 +127,7 @@ module OpenTox
# @return [Hash] A hash with keys 1...feature.training_classes.size and values training classes
def value_map(feature)
training_classes = feature.accept_values
+ raise "no accept values for feature #{feature.uri} in dataset #{uri}" unless training_classes
training_classes.each_index.inject({}) { |h,idx| h[idx+1]=training_classes[idx]; h }
end