summaryrefslogtreecommitdiff
path: root/lib/utils/shims/dataset.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/utils/shims/dataset.rb')
-rw-r--r--lib/utils/shims/dataset.rb201
1 files changed, 0 insertions, 201 deletions
diff --git a/lib/utils/shims/dataset.rb b/lib/utils/shims/dataset.rb
deleted file mode 100644
index f72ff1b..0000000
--- a/lib/utils/shims/dataset.rb
+++ /dev/null
@@ -1,201 +0,0 @@
-=begin
-* Name: dataset.rb
-* Description: Dataset shims
-* Author: Andreas Maunz <andreas@maunz.de>
-* Date: 10/2012
-=end
-
-module OpenTox
-
- # Shims for the Dataset Class
- class Dataset
-
- attr_accessor :feature_positions, :compound_positions
-
- # Load a dataset from URI
- # @param [String] Dataset URI
- # @return [OpenTox::Dataset] Dataset object
- def self.find(uri, subjectid=nil)
- return nil unless uri
- ds = OpenTox::Dataset.new uri, subjectid
- ds.get
- ds
- end
-
- def self.exist?(uri, subjectid=nil)
- ds = OpenTox::Dataset.new uri, subjectid
- begin
- ds.get_metadata
- true
- rescue
- false
- end
- end
-
- def split( compound_indices, feats, metadata, subjectid=nil)
-
- raise "Dataset.split : pls give compounds as indices" if compound_indices.size==0 or !compound_indices[0].is_a?(Fixnum)
- raise "Dataset.split : pls give features as feature objects (given: #{feats})" if feats!=nil and feats.size>0 and !feats[0].is_a?(OpenTox::Feature)
- $logger.debug "split dataset using "+compound_indices.size.to_s+"/"+@compounds.size.to_s+" compounds"
-
- dataset = OpenTox::Dataset.new(nil, subjectid)
- dataset.metadata = metadata
- dataset.features = (feats ? feats : self.features)
- compound_indices.each do |c_idx|
- dataset << [ self.compounds[c_idx] ] + dataset.features.each_with_index.collect{|f,f_idx| self.data_entries[c_idx][f_idx]}
- end
-
- #compound_indices.each do |c_idx|
- # c = @compounds[c_idx]
- # dataset.add_compound(c)
- # if @data_entries[c]
- # features.each do |f|
- # if @data_entries[c][f]
- # dataset.add_data_entry c,f,@data_entries[c][f][entry_index(c_idx)]
- # else
- # dataset.add_data_entry c,f,nil
- # end
- # end
- # end
- # end
-
- dataset.put subjectid
- dataset
- end
-
-
- # maps a compound-index from another dataset to a compound-index from this dataset
- # mapping works as follows:
- # (compound c is the compound identified by the compound-index of the other dataset)
- # * c occurs only once in this dataset? map compound-index of other dataset to index in this dataset
- # * c occurs >1 in this dataset?
- # ** number of occurences is equal in both datasets? assume order is preserved(!) and map accordingly
- # ** number of occurences is not equal in both datasets? cannot map, raise error
- # @param [OpenTox::Dataset] dataset that should be mapped to this dataset (fully loaded)
- # @param [Fixnum] compound_index, corresponding to dataset
- def compound_index( dataset, compound_index )
- unless defined?(@index_map) and @index_map[dataset.uri]
- map = {}
- dataset.compounds.collect{|c| c.uri}.uniq.each do |compound|
- self_indices = compound_indices(compound)
- next unless self_indices
- dataset_indices = dataset.compound_indices(compound)
- if self_indices.size==1
- dataset_indices.size.times do |i|
- map[dataset_indices[i]] = self_indices[0]
- end
- elsif self_indices.size==dataset_indices.size
- # we do assume that the order is preseverd!
- dataset_indices.size.times do |i|
- map[dataset_indices[i]] = self_indices[i]
- end
- else
- raise "cannot map compound #{compound} from dataset #{dataset.uri} to dataset #{uri}, "+
- "compound occurs #{dataset_indices.size} times and #{self_indices.size} times"
- end
- end
- @index_map = {} unless defined?(@index_map)
- @index_map[dataset.uri] = map
- end
- @index_map[dataset.uri][compound_index]
- end
-
- def compound_indices( compound )
- unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound)
- @cmp_indices = {}
- @compounds.size.times do |i|
- c = @compounds[i].uri
- if @cmp_indices[c]==nil
- @cmp_indices[c] = [i]
- else
- @cmp_indices[c] = @cmp_indices[c]+[i]
- end
- end
- end
- @cmp_indices[compound]
- end
-
- def data_entry_value(compound_index, feature_uri)
- build_feature_positions unless @feature_positions
- @data_entries[compound_index][@feature_positions[feature_uri]]
- end
-
- ### Index Structures
-
- # Create value map
- # @param [OpenTox::Feature] A feature
- # @return [Hash] A hash with keys 1...feature.training_classes.size and values training classes
- def value_map(feature)
- training_classes = feature.accept_values
- raise "no accept values for feature #{feature.uri} in dataset #{uri}" unless training_classes
- training_classes.each_index.inject({}) { |h,idx| h[idx+1]=training_classes[idx]; h }
- end
-
- # Create feature positions map
- # @return [Hash] A hash with keys feature uris and values feature positions
- def build_feature_positions
- unless @feature_positions
- @feature_positions = @features.each_index.inject({}) { |h,idx|
- internal_server_error "Duplicate Feature '#{@features[idx].uri}' in dataset '#{@uri}'" if h[@features[idx].uri]
- h[@features[idx].uri] = idx
- h
- }
- end
- end
-
- # Create compounds positions map
- # @return [Hash] A hash with keys compound uris and values compound position arrays
- def build_compound_positions
- unless @compound_positions
- @compound_positions = @compounds.each_index.inject({}) { |h,idx|
- inchi=OpenTox::Compound.new(@compounds[idx].uri).inchi
- h[inchi] = [] unless h[inchi]
- h[inchi] << idx if inchi =~ /InChI/
- h
- }
- end
- end
-
-
- ### Associative Search Operations
-
- # Search a dataset for a feature given its URI
- # @param [String] Feature URI
- # @return [OpenTox::Feature] Feature object, or nil if not present
- def find_feature(uri)
- build_feature_positions
- res = @features[@feature_positions[uri]] if @feature_positions[uri]
- res
- end
-
- # Search a dataset for a compound given its URI
- # @param [String] Compound URI
- # @return [OpenTox::Compound] Array of compound objects, or nil if not present
- def find_compound(uri)
- build_compound_positions
- inchi = OpenTox::Compound.new(uri).inchi
- res = @compounds[@compound_positions[inchi]] if inchi =~ /InChI/ and @compound_positions[inchi]
- res
- end
-
- # Search a dataset for a data entry given compound URI and feature URI
- # @param [String] Compound URI
- # @param [String] Feature URI
- # @return [Object] Data entry, or nil if not present
- def find_data_entry(compound_uri, feature_uri)
- build_compound_positions
- build_feature_positions
- inchi = OpenTox::Compound.new(compound_uri).inchi
- if @compound_positions[inchi] && @feature_positions[feature_uri]
- res = []
- @compound_positions[inchi].each { |idx|
- res << data_entries[idx][@feature_positions[feature_uri]]
- }
- end
- res
- end
-
- end
-
-
-end