summaryrefslogtreecommitdiff
path: root/lib/bbrc.rb
blob: f7d29f9037bd9c8e1cbb7f8c09b92d862fc77198 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
ENV['FMINER_SMARTS'] = 'true'
ENV['FMINER_NO_AROMATIC'] = 'true'
ENV['FMINER_PVALUES'] = 'true'
ENV['FMINER_SILENT'] = 'true'
ENV['FMINER_NR_HITS'] = 'true'

module OpenTox
  module Algorithm
    class Fminer
      # Run bbrc algorithm on dataset
      #
      # @param [String] dataset_uri URI of the training dataset
      # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
      # @param [optional] parameters BBRC parameters, accepted parameters are
      #   - min_frequency  Minimum frequency (default 5)
      #   - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
      #   - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
      #   - min_chisq_significance Significance threshold (between 0 and 1)
      #   - nr_hits Set to "true" to get hit count instead of presence
      #   - get_target Set to "true" to obtain target variable as feature
      # @return [text/uri-list] Task URI
      def self.bbrc params
        
        @fminer=OpenTox::Algorithm::Fminer.new
        @fminer.check_params(params,5)
      
        #task = OpenTox::Task.run("Mining BBRC features", __FILE__ ) do |task|

          time = Time.now

          @bbrc = Bbrc::Bbrc.new
          @bbrc.Reset
          if @fminer.prediction_feature.feature_type == "regression"
            @bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
          else
            bad_request_error "No accept values for "\
                              "dataset '#{@fminer.training_dataset.id}' and "\
                              "feature '#{@fminer.prediction_feature.id}'" unless 
                               @fminer.prediction_feature.accept_values
            value_map=@fminer.prediction_feature.value_map
          end
          @bbrc.SetMinfreq(@fminer.minfreq)
          @bbrc.SetType(1) if params[:feature_type] == "paths"
          @bbrc.SetBackbone(false) if params[:backbone] == "false"
          @bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
          @bbrc.SetConsoleOut(false)

          feature_dataset = OpenTox::Dataset.new
          feature_dataset.title = "BBRC representatives"
          feature_dataset.creator = __FILE__
          feature_dataset.parameters = [
              { "title" => "dataset_id", "paramValue" => params[:dataset].id },
              { "title" => "prediction_feature", "paramValue" => params[:prediction_feature].id },
              { "title" => "min_frequency", "paramValue" => @fminer.minfreq },
              { "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") },
              { "title" => "backbone", "paramValue" => (params[:backbone] == "false" ? "false" : "true") }
          ] 

          @fminer.compounds = []
          @fminer.db_class_sizes = Array.new # AM: effect
          @fminer.all_activities = Hash.new # DV: for effect calculation in regression part
          @fminer.smi = [] # AM LAST: needed for matching the patterns back
    
          # Add data to fminer
          @fminer.add_fminer_data(@bbrc, value_map)
          g_median=@fminer.all_activities.values.to_scale.median

          #task.progress 10
          step_width = 80 / @bbrc.GetNoRootNodes().to_f
          features = []
          data_entries = [[]]

          puts "Setup: #{Time.now-time}"
          time = Time.now
          ftime = 0
    
          # run @bbrc
          
          # prepare to receive results as hash { c => [ [f,v], ... ] }
          fminer_results = {}

          (0 .. @bbrc.GetNoRootNodes()-1).each do |j|
            results = @bbrc.MineRoot(j)
            #task.progress 10+step_width*(j+1)
            results.each do |result|
              f = YAML.load(result)[0]
              smarts = f[0]
              p_value = f[1]
    
              if (!@bbrc.GetRegression)
                id_arrs = f[2..-1].flatten
                max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @fminer.db_class_sizes) # f needs reversal for bbrc
                effect = max+1
              else #regression part
                id_arrs = f[2]
                # DV: effect calculation
                f_arr=Array.new
                f[2].each do |id|
                  id=id.keys[0] # extract id from hit count hash
                  f_arr.push(@fminer.all_activities[id])
                end
                f_median=f_arr.to_scale.median
                if g_median >= f_median
                  effect = 'activating'
                else
                  effect = 'deactivating'
                end
              end
    
              ft = Time.now
              feature = OpenTox::Feature.find_or_create_by({
                "title" => smarts.dup,
                "numeric" => true,
                "substructure" => true,
                "smarts" => smarts.dup,
                "pValue" => p_value.to_f.abs.round(5),
                "effect" => effect
              })
              features << feature
              features.uniq!
              ftime += Time.now - ft

              id_arrs.each { |id_count_hash|
                id=id_count_hash.keys[0].to_i
                count=id_count_hash.values[0].to_i
                compound_idx = params[:dataset].compounds.index @fminer.compounds[id]
                feature_idx = features.index feature
                data_entries[compound_idx] ||= []
                if params[:nr_hits] == "true"
                  data_entries[compound_idx][feature_idx] = count
                else
                  data_entries[compound_idx][feature_idx] = 1
                end
              }
    
            end # end of
          end   # feature parsing
          #p features
          p data_entries
          #p params[:dataset].compounds
          #p @fminer.compounds


          puts "Fminer: #{Time.now-time} (find/create Features: #{ftime})"
          time = Time.now
          #puts JSON.pretty_generate(fminer_results)
=begin
          fminer_compounds = @fminer.training_dataset.compounds
          prediction_feature_idx = @fminer.training_dataset.features.index @fminer.prediction_feature
          prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| 
            @fminer.training_dataset.data_entries[idx][prediction_feature_idx] 
          }
          fminer_noact_compounds = fminer_compounds - @fminer.compounds

          feature_dataset.features = features
          feature_dataset.features = [ @fminer.prediction_feature ] + feature_dataset.features if params[:get_target] == "true"
          feature_dataset.compounds = fminer_compounds
          fminer_compounds.each_with_index { |c,idx|
            # TODO: reenable option
            #if (params[:get_target] == "true")
              #row = row + [ prediction_feature_all_acts[idx] ]
            #end
            features.each { |f|
              v = fminer_results[c][f] if fminer_results[c] 
              unless fminer_noact_compounds.include? c
                v = 0 if v.nil?
              end
              feature_dataset.add_data_entry c, f, v.to_i
            }
          }
=end
          feature_dataset.compounds = params[:dataset].compounds
          feature_dataset.features = features
          feature_dataset.data_entries = data_entries

          puts "Prepare save: #{Time.now-time}"
          time = Time.now
          feature_dataset.save

          puts "Save: #{Time.now-time}"
          p feature_dataset
          feature_dataset

    
        end
      #end
    end
  end
end