diff options
Diffstat (limited to 'lib/utils/shims/dataset.rb')
-rw-r--r-- | lib/utils/shims/dataset.rb | 201 |
1 files changed, 0 insertions, 201 deletions
diff --git a/lib/utils/shims/dataset.rb b/lib/utils/shims/dataset.rb deleted file mode 100644 index f72ff1b..0000000 --- a/lib/utils/shims/dataset.rb +++ /dev/null @@ -1,201 +0,0 @@ -=begin -* Name: dataset.rb -* Description: Dataset shims -* Author: Andreas Maunz <andreas@maunz.de> -* Date: 10/2012 -=end - -module OpenTox - - # Shims for the Dataset Class - class Dataset - - attr_accessor :feature_positions, :compound_positions - - # Load a dataset from URI - # @param [String] Dataset URI - # @return [OpenTox::Dataset] Dataset object - def self.find(uri, subjectid=nil) - return nil unless uri - ds = OpenTox::Dataset.new uri, subjectid - ds.get - ds - end - - def self.exist?(uri, subjectid=nil) - ds = OpenTox::Dataset.new uri, subjectid - begin - ds.get_metadata - true - rescue - false - end - end - - def split( compound_indices, feats, metadata, subjectid=nil) - - raise "Dataset.split : pls give compounds as indices" if compound_indices.size==0 or !compound_indices[0].is_a?(Fixnum) - raise "Dataset.split : pls give features as feature objects (given: #{feats})" if feats!=nil and feats.size>0 and !feats[0].is_a?(OpenTox::Feature) - $logger.debug "split dataset using "+compound_indices.size.to_s+"/"+@compounds.size.to_s+" compounds" - - dataset = OpenTox::Dataset.new(nil, subjectid) - dataset.metadata = metadata - dataset.features = (feats ? feats : self.features) - compound_indices.each do |c_idx| - dataset << [ self.compounds[c_idx] ] + dataset.features.each_with_index.collect{|f,f_idx| self.data_entries[c_idx][f_idx]} - end - - #compound_indices.each do |c_idx| - # c = @compounds[c_idx] - # dataset.add_compound(c) - # if @data_entries[c] - # features.each do |f| - # if @data_entries[c][f] - # dataset.add_data_entry c,f,@data_entries[c][f][entry_index(c_idx)] - # else - # dataset.add_data_entry c,f,nil - # end - # end - # end - # end - - dataset.put subjectid - dataset - end - - - # maps a compound-index from another dataset to a compound-index from this dataset - # mapping works as follows: - # (compound c is the compound identified by the compound-index of the other dataset) - # * c occurs only once in this dataset? map compound-index of other dataset to index in this dataset - # * c occurs >1 in this dataset? - # ** number of occurences is equal in both datasets? assume order is preserved(!) and map accordingly - # ** number of occurences is not equal in both datasets? cannot map, raise error - # @param [OpenTox::Dataset] dataset that should be mapped to this dataset (fully loaded) - # @param [Fixnum] compound_index, corresponding to dataset - def compound_index( dataset, compound_index ) - unless defined?(@index_map) and @index_map[dataset.uri] - map = {} - dataset.compounds.collect{|c| c.uri}.uniq.each do |compound| - self_indices = compound_indices(compound) - next unless self_indices - dataset_indices = dataset.compound_indices(compound) - if self_indices.size==1 - dataset_indices.size.times do |i| - map[dataset_indices[i]] = self_indices[0] - end - elsif self_indices.size==dataset_indices.size - # we do assume that the order is preseverd! - dataset_indices.size.times do |i| - map[dataset_indices[i]] = self_indices[i] - end - else - raise "cannot map compound #{compound} from dataset #{dataset.uri} to dataset #{uri}, "+ - "compound occurs #{dataset_indices.size} times and #{self_indices.size} times" - end - end - @index_map = {} unless defined?(@index_map) - @index_map[dataset.uri] = map - end - @index_map[dataset.uri][compound_index] - end - - def compound_indices( compound ) - unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound) - @cmp_indices = {} - @compounds.size.times do |i| - c = @compounds[i].uri - if @cmp_indices[c]==nil - @cmp_indices[c] = [i] - else - @cmp_indices[c] = @cmp_indices[c]+[i] - end - end - end - @cmp_indices[compound] - end - - def data_entry_value(compound_index, feature_uri) - build_feature_positions unless @feature_positions - @data_entries[compound_index][@feature_positions[feature_uri]] - end - - ### Index Structures - - # Create value map - # @param [OpenTox::Feature] A feature - # @return [Hash] A hash with keys 1...feature.training_classes.size and values training classes - def value_map(feature) - training_classes = feature.accept_values - raise "no accept values for feature #{feature.uri} in dataset #{uri}" unless training_classes - training_classes.each_index.inject({}) { |h,idx| h[idx+1]=training_classes[idx]; h } - end - - # Create feature positions map - # @return [Hash] A hash with keys feature uris and values feature positions - def build_feature_positions - unless @feature_positions - @feature_positions = @features.each_index.inject({}) { |h,idx| - internal_server_error "Duplicate Feature '#{@features[idx].uri}' in dataset '#{@uri}'" if h[@features[idx].uri] - h[@features[idx].uri] = idx - h - } - end - end - - # Create compounds positions map - # @return [Hash] A hash with keys compound uris and values compound position arrays - def build_compound_positions - unless @compound_positions - @compound_positions = @compounds.each_index.inject({}) { |h,idx| - inchi=OpenTox::Compound.new(@compounds[idx].uri).inchi - h[inchi] = [] unless h[inchi] - h[inchi] << idx if inchi =~ /InChI/ - h - } - end - end - - - ### Associative Search Operations - - # Search a dataset for a feature given its URI - # @param [String] Feature URI - # @return [OpenTox::Feature] Feature object, or nil if not present - def find_feature(uri) - build_feature_positions - res = @features[@feature_positions[uri]] if @feature_positions[uri] - res - end - - # Search a dataset for a compound given its URI - # @param [String] Compound URI - # @return [OpenTox::Compound] Array of compound objects, or nil if not present - def find_compound(uri) - build_compound_positions - inchi = OpenTox::Compound.new(uri).inchi - res = @compounds[@compound_positions[inchi]] if inchi =~ /InChI/ and @compound_positions[inchi] - res - end - - # Search a dataset for a data entry given compound URI and feature URI - # @param [String] Compound URI - # @param [String] Feature URI - # @return [Object] Data entry, or nil if not present - def find_data_entry(compound_uri, feature_uri) - build_compound_positions - build_feature_positions - inchi = OpenTox::Compound.new(compound_uri).inchi - if @compound_positions[inchi] && @feature_positions[feature_uri] - res = [] - @compound_positions[inchi].each { |idx| - res << data_entries[idx][@feature_positions[feature_uri]] - } - end - res - end - - end - - -end |