summaryrefslogtreecommitdiff
path: root/lib/algorithm/feature_values.rb
blob: 284fae1b5f03a18b180e123eeca830467884e6c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# feature_values.rb
# Feature Value library
# Author: Andreas Maunz

module OpenTox
  class Algorithm

    class FeatureValues
      # Substructure matching
      # @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings
      # @return [Array] Array with matching Smarts
      def self.match(params, subjectid)
        features = params[:feature_dataset].features.collect{ |f| f[DC.title] }
        params[:compound].match(features)
      end

      # Substructure matching with number of non-unique hits
      # @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings
      # @return [Hash] Hash with matching Smarts and number of hits 
      def self.match_hits(params, subjectid)
        features = params[:feature_dataset].features.collect{ |f| f[DC.title] },
        params[:compound].match_hits(features)
      end

      # PC descriptor calculation
      # @param [Hash] keys: compound, feature_dataset, pc_type, lib, values: OpenTox::Compound, String, String
      # @return [Hash] Hash with feature name as key and value as value
      def self.lookup(params, subjectid)
        ds = params[:feature_dataset]
        ds.build_feature_positions
        cmpd_inchi = params[:compound].inchi
        cmpd_idxs = ds.compounds.each_with_index.collect{ |cmpd,idx|
          idx if cmpd.inchi == cmpd_inchi
        }.compact
        if cmpd_idxs.size > 0 # We have entries
          cmpd_numeric_f = ds.features.collect { |f|
            f if f[RDF.type].include? OT.NumericFeature
          }.compact
          cmpd_data_entries = cmpd_idxs.collect { |idx|
            ds.data_entries[idx]
          }
          cmpd_fingerprints = cmpd_numeric_f.inject({}) { |h,f|
            values = cmpd_data_entries.collect { |entry| 
              val = entry[ds.feature_positions[f.uri]]
              val.nil? ? nil : val.to_f
            }.compact
            h[f.title] = (values.size > 0) ? values.to_scale.median : nil # AM: median for numeric features
            h
          }
          (ds.features - cmpd_numeric_f).each { |f|
            values = cmpd_data_entries.collect { |entry|
              val = entry[ds.feature_positions[f.uri]]
              val.nil? ? nil : val
            }.compact
            cmpd_fingerprints[f.title] = values.to_scale.mode # AM: mode for the others
          }
        else # We need lookup
          params[:subjectid] = subjectid
          [:compound, :feature_dataset].each { |p| params.delete(p) }; [:pc_type, :lib].each { |p| params.delete(p) if params[p] == "" }
          single_cmpd_ds = OpenTox::Dataset.new(nil,subjectid)
          single_cmpd_ds.parse_rdfxml(RestClient.post("#{$compound[:uri]}/#{cmpd_inchi}/pc", params, {:accept => "application/rdf+xml"}))
          single_cmpd_ds.get(true)
          single_cmpd_ds.build_feature_positions
          cmpd_fingerprints = single_cmpd_ds.features.inject({}) { |h,f|
            h[f.title] = single_cmpd_ds.data_entries[0][single_cmpd_ds.feature_positions[f.uri]]
            h
          }
        end
        cmpd_fingerprints
      end
    end

  end
end