summaryrefslogtreecommitdiff
path: root/fminer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'fminer.rb')
-rw-r--r--fminer.rb172
1 files changed, 0 insertions, 172 deletions
diff --git a/fminer.rb b/fminer.rb
index b429250..bfc6879 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -107,178 +107,6 @@ module OpenTox
end
- # Run bbrc algorithm on dataset
- #
- # @param [String] dataset_uri URI of the training dataset
- # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
- # @param [optional] parameters BBRC parameters, accepted parameters are
- # - min_frequency Minimum frequency (default 5)
- # - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
- # - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
- # - min_chisq_significance Significance threshold (between 0 and 1)
- # - nr_hits Set to "true" to get hit count instead of presence
- # - get_target Set to "true" to obtain target variable as feature
- # @return [text/uri-list] Task URI
- post '/fminer/bbrc/?' do
-
- @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/bbrc',:full))
- @@fminer.check_params(params,5)
-
- task = OpenTox::Task.run("Mining BBRC features", uri('/fminer/bbrc')) do |task|
-
- time = Time.now
-
- @@bbrc.Reset
- if @@fminer.prediction_feature.feature_type == "regression"
- @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
- else
- bad_request_error "No accept values for "\
- "dataset '#{@@fminer.training_dataset.uri}' and "\
- "feature '#{@@fminer.prediction_feature.uri}'" unless
- @@fminer.prediction_feature.accept_values
- value_map=@@fminer.prediction_feature.value_map
- end
- @@bbrc.SetMinfreq(@@fminer.minfreq)
- @@bbrc.SetType(1) if params[:feature_type] == "paths"
- @@bbrc.SetBackbone(false) if params[:backbone] == "false"
- @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
- @@bbrc.SetConsoleOut(false)
-
- feature_dataset = OpenTox::Dataset.new
- feature_dataset.metadata = {
- RDF::DC.title => "BBRC representatives",
- RDF::DC.creator => to('/fminer/bbrc',:full),
- RDF::OT.hasSource => to('/fminer/bbrc', :full),
- }
- feature_dataset.parameters = [
- { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
- { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] },
- { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq },
- { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") },
- { RDF::DC.title => "backbone", RDF::OT.paramValue => (params[:backbone] == "false" ? "false" : "true") }
- ]
-
- @@fminer.compounds = []
- @@fminer.db_class_sizes = Array.new # AM: effect
- @@fminer.all_activities = Hash.new # DV: for effect calculation in regression part
- @@fminer.smi = [] # AM LAST: needed for matching the patterns back
-
- # Add data to fminer
- @@fminer.add_fminer_data(@@bbrc, value_map)
- g_median=@@fminer.all_activities.values.to_scale.median
-
- #task.progress 10
- step_width = 80 / @@bbrc.GetNoRootNodes().to_f
- features_smarts = Set.new
- features = Array.new
-
- puts "Setup: #{Time.now-time}"
- time = Time.now
- ftime = 0
-
- # run @@bbrc
-
- # prepare to receive results as hash { c => [ [f,v], ... ] }
- fminer_results = {}
-
- (0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
- results = @@bbrc.MineRoot(j)
- #task.progress 10+step_width*(j+1)
- results.each do |result|
- f = YAML.load(result)[0]
- smarts = f[0]
- p_value = f[1]
-
- if (!@@bbrc.GetRegression)
- id_arrs = f[2..-1].flatten
- max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @@fminer.db_class_sizes) # f needs reversal for bbrc
- effect = max+1
- else #regression part
- id_arrs = f[2]
- # DV: effect calculation
- f_arr=Array.new
- f[2].each do |id|
- id=id.keys[0] # extract id from hit count hash
- f_arr.push(@@fminer.all_activities[id])
- end
- f_median=f_arr.to_scale.median
- if g_median >= f_median
- effect = 'activating'
- else
- effect = 'deactivating'
- end
- end
-
- ft = Time.now
- unless features_smarts.include? smarts
- features_smarts << smarts
- feature = OpenTox::Feature.find_or_create({
- RDF::DC.title => smarts.dup,
- RDF::OT.hasSource => to('/fminer/bbrc', :full),
- RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature],
- RDF::OT.smarts => smarts.dup,
- RDF::OT.pValue => p_value.to_f.abs.round(5),
- RDF::OT.effect => effect
- })
- features << feature
- end
- ftime += Time.now - ft
-
- id_arrs.each { |id_count_hash|
- id=id_count_hash.keys[0].to_i
- count=id_count_hash.values[0].to_i
- fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {}
- if params[:nr_hits] == "true"
- fminer_results[@@fminer.compounds[id]][feature.uri] = count
- else
- fminer_results[@@fminer.compounds[id]][feature.uri] = 1
- end
- }
-
- end # end of
- end # feature parsing
-
-
- puts "Fminer: #{Time.now-time} (find/create Features: #{ftime})"
- time = Time.now
-
- fminer_compounds = @@fminer.training_dataset.compounds
- prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri
- prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
- @@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
- }
- fminer_noact_compounds = fminer_compounds - @@fminer.compounds
-
- feature_dataset.features = features
- feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features if params[:get_target] == "true"
- feature_dataset.compounds = fminer_compounds
- fminer_compounds.each_with_index { |c,idx|
- # TODO: reenable option
- #if (params[:get_target] == "true")
- #row = row + [ prediction_feature_all_acts[idx] ]
- #end
- features.each { |f|
- v = fminer_results[c][f.uri] if fminer_results[c]
- unless fminer_noact_compounds.include? c
- v = 0 if v.nil?
- end
- feature_dataset.add_data_entry c, f, v.to_i
- }
- }
-
- puts "Prepare save: #{Time.now-time}"
- time = Time.now
- feature_dataset.put
-
- puts "Save: #{Time.now-time}"
- feature_dataset.uri
-
-
- end
- response['Content-Type'] = 'text/uri-list'
- halt 202,task.uri
- end
-