summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-05-07 15:31:22 +0200
committerAndreas Maunz <andreas@maunz.de>2012-05-07 15:31:22 +0200
commit615b105230453e04bf6ec0354f2c391170e76974 (patch)
treebd4b15b22542f73b5d2fd4fd2a85737a00f980c3
parentdcd8379fcd2448b83f1b390fc74bb0b4025f8dd8 (diff)
Better comments
-rw-r--r--fminer.rb27
1 files changed, 15 insertions, 12 deletions
diff --git a/fminer.rb b/fminer.rb
index cec902e..ee7f8e9 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -307,7 +307,7 @@ post '/fminer/bbrc/sample/?' do
# min_sampling_support
unless params[:min_sampling_support]
min_sampling_support = (num_boots * 0.3).ceil
- LOGGER.debug "Set num_boots to default value #{min_sampling_support}"
+ LOGGER.debug "Set min_sampling_support to default value #{min_sampling_support}"
else
raise OpenTox::BadRequestError.new "min_sampling_support is not numeric" unless OpenTox::Algorithm.numeric? params[:min_sampling_support]
min_sampling_support= params[:min_sampling_support].to_i.ceil
@@ -324,7 +324,7 @@ post '/fminer/bbrc/sample/?' do
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.add_metadata({
- DC.title => "BBRC sampled representatives for " + fminer.training_dataset.metadata[DC.title].to_s,
+ DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s + "(bootstrapped)",
DC.creator => url_for('/fminer/bbrc/sample',:full),
OT.hasSource => url_for('/fminer/bbrc/sample', :full),
OT.parameters => [
@@ -335,13 +335,12 @@ post '/fminer/bbrc/sample/?' do
})
feature_dataset.save(@subjectid)
- fminer.compounds = []
- fminer.db_class_sizes = Array.new # AM: effect
- fminer.all_activities = Hash.new # DV: for effect calculation (class and regr)
- fminer.smi = [] # AM LAST: needed for matching the patterns back
-
- # Add data to fminer
- fminer.add_fminer_data(nil, @value_map) # AM: 'nil' as instance to only fill in administrative data
+ # filled by add_fminer_data:
+ fminer.compounds = [] # indexed by id, starting from 1 (not 0)
+ fminer.db_class_sizes = Array.new # for effect calculation
+ fminer.all_activities = Hash.new # for effect calculation, indexed by id, starting from 1 (not 0)
+ fminer.smi = [] # needed for matching the patterns back, indexed by id, starting from 1 (not 0)
+ fminer.add_fminer_data(nil, @value_map) # To only fill in administrative data (no fminer priming) pass 'nil' as instance
raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
@@ -362,24 +361,28 @@ post '/fminer/bbrc/sample/?' do
@r.eval "source(\"bbrc-sample/bbrc-sample.R\")"
@r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, F)"
- smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes
+ smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts
r_p_values = @r.pull "ans.p.values"
# matching
task.progress 90
lu = LU.new # AM LAST: uses last-utils here
params[:nr_hits] == "true" ? hit_count=true: hit_count=false
+
+
+ LOGGER.debug fminer.smi.to_yaml
+ LOGGER.debug smarts.to_yaml
matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations
matches.each do |smarts, ids|
feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f
- g=Array.new
+ g = Array.new
@value_map.each { |y,act| g[y-1]=Array.new }
feat_hash.each { |x,y| g[y-1].push(x) }
max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes)
effect = g.size-max
- feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s
+ feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
unless features.include? smarts
features << smarts
metadata = {