diff options
Diffstat (limited to 'lib/bbrc.rb')
-rw-r--r-- | lib/bbrc.rb | 284 |
1 files changed, 127 insertions, 157 deletions
diff --git a/lib/bbrc.rb b/lib/bbrc.rb index 40de186..2c2b8a2 100644 --- a/lib/bbrc.rb +++ b/lib/bbrc.rb @@ -18,178 +18,148 @@ module OpenTox @fminer=OpenTox::Algorithm::Fminer.new @fminer.check_params(params,5) - - # TODO introduce task again - #task = OpenTox::Task.run("Mining BBRC features", __FILE__ ) do |task| - time = Time.now + time = Time.now - @bbrc = Bbrc::Bbrc.new - @bbrc.Reset - if @fminer.prediction_feature.feature_type == "regression" - @bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - bad_request_error "No accept values for "\ - "dataset '#{@fminer.training_dataset.id}' and "\ - "feature '#{@fminer.prediction_feature.id}'" unless - @fminer.prediction_feature.accept_values - value_map=@fminer.prediction_feature.value_map - end - @bbrc.SetMinfreq(@fminer.minfreq) - @bbrc.SetType(1) if params[:feature_type] == "paths" - @bbrc.SetBackbone(false) if params[:backbone] == "false" - @bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] - @bbrc.SetConsoleOut(false) + @bbrc = Bbrc::Bbrc.new + @bbrc.Reset + if @fminer.prediction_feature.feature_type == "regression" + @bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! + else + bad_request_error "No accept values for "\ + "dataset '#{@fminer.training_dataset.id}' and "\ + "feature '#{@fminer.prediction_feature.id}'" unless + @fminer.prediction_feature.accept_values + value_map=@fminer.prediction_feature.value_map + end + @bbrc.SetMinfreq(@fminer.minfreq) + @bbrc.SetType(1) if params[:feature_type] == "paths" + @bbrc.SetBackbone(false) if params[:backbone] == "false" + @bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] + @bbrc.SetConsoleOut(false) - feature_dataset = OpenTox::Dataset.new - feature_dataset.title = "BBRC representatives" - feature_dataset.creator = __FILE__ - feature_dataset.parameters = [ - { "title" => "dataset_id", "paramValue" => params[:dataset].id }, - { "title" => "prediction_feature_id", "paramValue" => params[:prediction_feature].id }, - { "title" => "min_frequency", "paramValue" => @fminer.minfreq }, - { "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") }, - { "title" => "backbone", "paramValue" => (params[:backbone] == "false" ? "false" : "true") } - ] + feature_dataset = OpenTox::CalculatedDataset.new + feature_dataset.title = "BBRC representatives" + feature_dataset.creator = __FILE__ + feature_dataset.parameters = [ + { "title" => "dataset_id", "paramValue" => params[:dataset].id }, + { "title" => "prediction_feature_id", "paramValue" => params[:prediction_feature].id }, + { "title" => "min_frequency", "paramValue" => @fminer.minfreq }, + { "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") }, + { "title" => "backbone", "paramValue" => (params[:backbone] == "false" ? "false" : "true") } + ] - @fminer.compounds = [] - @fminer.db_class_sizes = Array.new # AM: effect - @fminer.all_activities = Hash.new # DV: for effect calculation in regression part - @fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - @fminer.add_fminer_data(@bbrc, value_map) - g_median=@fminer.all_activities.values.to_scale.median + @fminer.compounds = [] + @fminer.db_class_sizes = Array.new # AM: effect + @fminer.all_activities = Hash.new # DV: for effect calculation in regression part + @fminer.smi = [] # AM LAST: needed for matching the patterns back + + # Add data to fminer + @fminer.add_fminer_data(@bbrc, value_map) + g_median=@fminer.all_activities.values.to_scale.median - #task.progress 10 - step_width = 80 / @bbrc.GetNoRootNodes().to_f - #features_smarts = Set.new - features = [] - data_entries = Array.new(params[:dataset].compounds.size) {[]} + #task.progress 10 + step_width = 80 / @bbrc.GetNoRootNodes().to_f + #features_smarts = Set.new + features = [] + data_entries = Array.new(params[:dataset].compounds.size) {[]} - puts "Setup: #{Time.now-time}" - time = Time.now - ftime = 0 - - # run @bbrc - - fminer_results = {} + $logger.debug "Setup: #{Time.now-time}" + time = Time.now + ftime = 0 + + # run @bbrc + + fminer_results = {} - (0 .. @bbrc.GetNoRootNodes()-1).each do |j| - results = @bbrc.MineRoot(j) - #task.progress 10+step_width*(j+1) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - - if (!@bbrc.GetRegression) - id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @fminer.db_class_sizes) # f needs reversal for bbrc - effect = max+1 - else #regression part - id_arrs = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - id=id.keys[0] # extract id from hit count hash - f_arr.push(@fminer.all_activities[id]) - end - f_median=f_arr.to_scale.median - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end + (0 .. @bbrc.GetNoRootNodes()-1).each do |j| + results = @bbrc.MineRoot(j) + #task.progress 10+step_width*(j+1) + results.each do |result| + f = YAML.load(result)[0] + smarts = f[0] + p_value = f[1] + + if (!@bbrc.GetRegression) + id_arrs = f[2..-1].flatten + max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @fminer.db_class_sizes) # f needs reversal for bbrc + effect = max+1 + else #regression part + id_arrs = f[2] + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + id=id.keys[0] # extract id from hit count hash + f_arr.push(@fminer.all_activities[id]) end - - ft = Time.now - feature = OpenTox::Feature.find_or_create_by({ - "title" => smarts.dup, - "numeric" => true, - "substructure" => true, - "smarts" => smarts.dup, - "pValue" => p_value.to_f.abs.round(5), - "effect" => effect, - "parameters" => [ - { "title" => "dataset_id", "paramValue" => params[:dataset].id }, - { "title" => "prediction_feature_id", "paramValue" => params[:prediction_feature].id } - ] - }) - features << feature - ftime += Time.now - ft + f_median=f_arr.to_scale.median + if g_median >= f_median + effect = 'activating' + else + effect = 'deactivating' + end + end + + ft = Time.now + feature = OpenTox::Feature.find_or_create_by({ + "title" => smarts.dup, + "numeric" => true, + "substructure" => true, + "smarts" => smarts.dup, + "pValue" => p_value.to_f.abs.round(5), + "effect" => effect, + "parameters" => [ + { "title" => "dataset_id", "paramValue" => params[:dataset].id }, + { "title" => "prediction_feature_id", "paramValue" => params[:prediction_feature].id } + ] + }) + features << feature + ftime += Time.now - ft - id_arrs.each { |id_count_hash| - id=id_count_hash.keys[0].to_i - count=id_count_hash.values[0].to_i - fminer_results[@fminer.compounds[id]] || fminer_results[@fminer.compounds[id]] = {} - compound_idx = params[:dataset].compounds.index @fminer.compounds[id] - feature_idx = features.index feature - data_entries[compound_idx] ||= [] - if params[:nr_hits] == "true" - fminer_results[@fminer.compounds[id]][feature] = count - data_entries[compound_idx][feature_idx] = count - else - fminer_results[@fminer.compounds[id]][feature] = 1 - data_entries[compound_idx][feature_idx] = 1 - end - } - - end # end of - end # feature parsing + id_arrs.each { |id_count_hash| + id=id_count_hash.keys[0].to_i + count=id_count_hash.values[0].to_i + fminer_results[@fminer.compounds[id]] || fminer_results[@fminer.compounds[id]] = {} + compound_idx = params[:dataset].compounds.index @fminer.compounds[id] + feature_idx = features.index feature + data_entries[compound_idx] ||= [] + if params[:nr_hits] == "true" + fminer_results[@fminer.compounds[id]][feature] = count + data_entries[compound_idx][feature_idx] = count + else + fminer_results[@fminer.compounds[id]][feature] = 1 + data_entries[compound_idx][feature_idx] = 1 + end + } + + end # end of + end # feature parsing - puts "Fminer: #{Time.now-time} (find/create Features: #{ftime})" - time = Time.now + $logger.debug "Fminer: #{Time.now-time} (find/create Features: #{ftime})" + time = Time.now - # convert nil entries to 0 - data_entries.collect! do |r| - if r.empty? - Array.new(features.size,0) - else - r[features.size-1] = 0 if r.size < features.size # grow array to match feature size - r.collect!{|c| c.nil? ? 0 : c} # remove nils - end + # convert nil entries to 0 + data_entries.collect! do |r| + if r.empty? + Array.new(features.size,0) + else + r[features.size-1] = 0 if r.size < features.size # grow array to match feature size + r.collect!{|c| c.nil? ? 0 : c} # remove nils end + end -=begin - # This part increases runtime by a factor of ~65 - # TODO: check if any information is lost due to simplification - fminer_compounds = @fminer.training_dataset.compounds - prediction_feature_idx = @fminer.training_dataset.features.index @fminer.prediction_feature - prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| - @fminer.training_dataset.data_entries[idx][prediction_feature_idx] - } - fminer_noact_compounds = fminer_compounds - @fminer.compounds - - feature_dataset.features = features - feature_dataset.features = [ @fminer.prediction_feature ] + feature_dataset.features if params[:get_target] == "true" - feature_dataset.compounds = fminer_compounds - fminer_compounds.each_with_index { |c,idx| - # TODO: reenable option - #if (params[:get_target] == "true") - #row = row + [ prediction_feature_all_acts[idx] ] - #end - features.each { |f| - v = fminer_results[c][f] if fminer_results[c] - unless fminer_noact_compounds.include? c - v = 0 if v.nil? - end - feature_dataset.add_data_entry c, f, v.to_i - } - } -=end - feature_dataset.compounds = params[:dataset].compounds - feature_dataset.features = features - feature_dataset.data_entries = data_entries + feature_dataset.compounds = params[:dataset].compounds + feature_dataset.features = features + feature_dataset.data_entries = data_entries - puts "Prepare save: #{Time.now-time}" - time = Time.now - feature_dataset.save + $logger.debug "Prepare save: #{Time.now-time}" + time = Time.now + #File.open("kazius.json","w+"){|f| f.puts feature_dataset.inspect} + feature_dataset.save - puts "Save: #{Time.now-time}" - feature_dataset - - #end + $logger.debug "Save: #{Time.now-time}" + feature_dataset + end end end |