diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-06-24 15:53:33 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-06-24 15:53:33 +0200 |
commit | d73c1aa193f0ffb14123ab3438b10d253c4b5ddc (patch) | |
tree | ea1f6424544b817786b0a0161b74ca0168ab0070 | |
parent | 8d8880685ddaeeb968e1f1f0addf6cbf66eef67d (diff) | |
parent | cc24e7d94bf8759a39600cd5a748365f729102b8 (diff) |
Merge branch 'multi' into development
-rw-r--r-- | fminer.rb | 113 | ||||
-rw-r--r-- | lazar.rb | 35 |
2 files changed, 75 insertions, 73 deletions
@@ -117,6 +117,9 @@ post '/fminer/bbrc/?' do raise "no accept values for dataset '"+training_dataset.uri.to_s+"' and feature '"+prediction_feature.uri.to_s+ "'" unless training_dataset.accept_values(prediction_feature.uri) @training_classes = training_dataset.accept_values(prediction_feature.uri).sort + puts @training_classes.to_yaml + @value_map=Hash.new + @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end @@bbrc.SetMinfreq(minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" @@ -138,8 +141,8 @@ post '/fminer/bbrc/?' do id = 1 # fminer start id is not 0 compounds = [] - nr_active=0 - nr_inactive=0 + nr_classes={} + nr_total=0 all_activities = Hash.new# DV: for effect calculation in regression part training_dataset.data_entries.each do |compound,entry| @@ -166,30 +169,19 @@ post '/fminer/bbrc/?' do end end end + + @value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| if feature == prediction_feature.uri values.each do |value| if value.nil? - LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - when /#{@training_classes.last}/ - nr_active += 1 - activity = 1 - when /#{@training_classes.first}/ - nr_inactive += 1 - activity = 0 - else - LOGGER.warn "Unknown class \"#{value.to_s}\"." - end - elsif prediction_feature.feature_type == "regression" + activity= @value_map.invert[value].to_f + nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + nr_total+=1 + elsif prediction_feature.feature_type == "regression" activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@ -223,14 +215,22 @@ post '/fminer/bbrc/?' do p_value = f[1] if (!@@bbrc.GetRegression) - ids = f[2] + f[3] - if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) - effect = 'activating' - else - effect = 'deactivating' - end + id_arrs = f[2..-1].flatten + max=nil + max_value=0 + f[2..-1].reverse.each_with_index { |id,i| # fminer outputs occurrences sorted reverse by activity. + actual = id.size.to_f/id_arrs.size + expected = nr_classes[i].to_f/nr_total + if actual > expected + if ((actual - expected) / actual) > max_value + max_value = (actual - expected) / actual # 'Schleppzeiger' + max = i + end + end + } + effect = @value_map[f[2..-1].size-max].to_s else #regression part - ids = f[2] + id_arrs = f[2] # DV: effect calculation f_arr=Array.new f[2].each do |id| @@ -261,7 +261,7 @@ post '/fminer/bbrc/?' do feature_dataset.add_feature feature_uri, metadata #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters end - ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + id_arrs.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end feature_dataset.save(@subjectid) @@ -306,6 +306,8 @@ post '/fminer/last/?' do @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else @training_classes = training_dataset.accept_values(prediction_feature.uri) + @value_map=Hash.new + @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end @@last.SetMinfreq(minfreq) @@last.SetType(1) if params[:feature_type] == "paths" @@ -328,8 +330,8 @@ post '/fminer/last/?' do id = 1 # fminer start id is not 0 compounds = [] smi = [] # AM LAST: needed for matching the patterns back - nr_active=0 - nr_inactive=0 + nr_classes = [] + nr_total=0 all_activities = Hash.new #DV: for effect calculation (class and regr) training_dataset.data_entries.each do |compound,entry| @@ -343,38 +345,40 @@ post '/fminer/last/?' do LOGGER.warn "Cannot find smiles for #{compound.to_s}." next end + + # AM: take log if appropriate + #take_logs=true + #entry.each do |feature,values| + # values.each do |value| + # if prediction_feature.feature_type == "regression" + # if (! value.nil?) && (value.to_f <= 0) + # take_logs=false + # end + # end + # end + #end + + @value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| if feature == prediction_feature.uri values.each do |value| if value.nil? - LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - when /#{@training_classes.last}/ - nr_active += 1 - activity = 1 - when /#{@training_classes.first}/ - nr_inactive += 1 - activity = 0 - else - LOGGER.warn "Unknown class \"#{value.to_s}." - end + activity= @value_map.invert[value].to_f + nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + nr_total+=1 elsif prediction_feature.feature_type == "regression" - activity = value.to_f + #activity= take_logs ? Math.log10(value.to_f) : value.to_f + activity = value.to_f end begin @@last.AddCompound(smiles,id) @@last.AddActivity(activity, id) all_activities[id]=activity # DV: insert global information compounds[id] = compound - smi[id] = smiles # AM LAST: changed this to store SMILES. + smi[id] = smiles # AM LAST: changed this to store SMILES. id += 1 rescue LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" @@ -404,10 +408,13 @@ post '/fminer/last/?' do instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations instances.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test - - - effect = (p_value > 0) ? "activating" : "deactivating" + if @@last.GetRegression() + p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + effect = (p_value > 0) ? "activating" : "deactivating" + else + p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f + effect = "unknown" + end feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s unless features.include? smarts features << smarts @@ -53,7 +53,7 @@ post '/lazar/?' do prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) params[:prediction_feature] = prediction_feature.uri # pass to feature mining service end - + feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ @@ -62,6 +62,18 @@ post '/lazar/?' do lazar = OpenTox::Model::Lazar.new lazar.min_sim = params[:min_sim] if params[:min_sim] + + if prediction_feature.feature_type == "classification" + @training_classes = training_activities.accept_values(prediction_feature.uri).sort + @training_classes.each_with_index { |c,i| + lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later. + params[:value_map] = lazar.value_map + } + elsif prediction_feature.feature_type == "regression" + lazar.prediction_algorithm = "Neighbors.local_svm_regression" + end + + if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) @@ -129,34 +141,17 @@ post '/lazar/?' do end end - if prediction_feature.feature_type == "classification" - @training_classes = training_activities.accept_values(prediction_feature.uri).sort - lazar.value_map = { true => @training_classes.last, false => @training_classes.first } - elsif prediction_feature.feature_type == "regression" - lazar.prediction_algorithm = "Neighbors.local_svm_regression" - end - # AM: allow prediction_algorithm override by user for classification AND regression lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? lazar.prop_kernel = true if params[:local_svm_kernel] == "propositionalized" + lazar.balanced = true if params[:balanced] == "true" training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] unless entry[prediction_feature.uri].empty? entry[prediction_feature.uri].each do |value| if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - when /#{@training_classes.last}/ - lazar.activities[compound] << true - when /#{@training_classes.first}/ - lazar.activities[compound] << false - else - LOGGER.warn "Unknown class \"#{value.to_s}\"." - end + lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals elsif prediction_feature.feature_type == "regression" #never use halt in tasks, do not raise exception when, print warning instead if value.to_f==0 |