diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-06-21 13:43:05 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-06-21 13:43:05 +0200 |
commit | c1ee06638871ee7a88b07ebea05f4ecf3fab6392 (patch) | |
tree | 87ef4cc11b2c181df4e0cfb7c49c36ab16f786da | |
parent | 8d8880685ddaeeb968e1f1f0addf6cbf66eef67d (diff) |
Multinomial capability for Fminer and Lazar
-rw-r--r-- | fminer.rb | 106 | ||||
-rw-r--r-- | lazar.rb | 17 |
2 files changed, 56 insertions, 67 deletions
@@ -138,8 +138,8 @@ post '/fminer/bbrc/?' do id = 1 # fminer start id is not 0 compounds = [] - nr_active=0 - nr_inactive=0 + nr_classes={} + nr_total=0 all_activities = Hash.new# DV: for effect calculation in regression part training_dataset.data_entries.each do |compound,entry| @@ -160,7 +160,7 @@ post '/fminer/bbrc/?' do entry.each do |feature,values| values.each do |value| if prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f < 1) + if (! value.nil?) && (value.to_f <= 0) take_logs=false end end @@ -170,26 +170,13 @@ post '/fminer/bbrc/?' do if feature == prediction_feature.uri values.each do |value| if value.nil? - LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - when /#{@training_classes.last}/ - nr_active += 1 - activity = 1 - when /#{@training_classes.first}/ - nr_inactive += 1 - activity = 0 - else - LOGGER.warn "Unknown class \"#{value.to_s}\"." - end - elsif prediction_feature.feature_type == "regression" + activity= value.to_f + nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + nr_total+=1 + elsif prediction_feature.feature_type == "regression" activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@ -223,14 +210,22 @@ post '/fminer/bbrc/?' do p_value = f[1] if (!@@bbrc.GetRegression) - ids = f[2] + f[3] - if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) - effect = 'activating' - else - effect = 'deactivating' - end + id_arrs = f[2..-1].flatten + max=nil + max_value=0 + f[2..-1].reverse.each_with_index { |id,i| # fminer outputs occurrences sorted reverse by activity. + actual = id.size.to_f/id_arrs.size + expected = nr_classes[i].to_f/nr_total + if actual > expected + if ((actual - expected) / actual) > max_value + max_value = (actual - expected) / actual # 'Schleppzeiger' + max = i + end + end + } + effect = max.to_s else #regression part - ids = f[2] + id_arrs = f[2] # DV: effect calculation f_arr=Array.new f[2].each do |id| @@ -261,7 +256,7 @@ post '/fminer/bbrc/?' do feature_dataset.add_feature feature_uri, metadata #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters end - ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + id_arrs.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end feature_dataset.save(@subjectid) @@ -328,8 +323,8 @@ post '/fminer/last/?' do id = 1 # fminer start id is not 0 compounds = [] smi = [] # AM LAST: needed for matching the patterns back - nr_active=0 - nr_inactive=0 + nr_classes = [] + nr_total=0 all_activities = Hash.new #DV: for effect calculation (class and regr) training_dataset.data_entries.each do |compound,entry| @@ -343,38 +338,38 @@ post '/fminer/last/?' do LOGGER.warn "Cannot find smiles for #{compound.to_s}." next end + + # AM: take log if appropriate + take_logs=true + entry.each do |feature,values| + values.each do |value| + if prediction_feature.feature_type == "regression" + if (! value.nil?) && (value.to_f <= 0) + take_logs=false + end + end + end + end + entry.each do |feature,values| if feature == prediction_feature.uri values.each do |value| if value.nil? - LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - when /#{@training_classes.last}/ - nr_active += 1 - activity = 1 - when /#{@training_classes.first}/ - nr_inactive += 1 - activity = 0 - else - LOGGER.warn "Unknown class \"#{value.to_s}." - end + activity= value.to_f + nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + nr_total+=1 elsif prediction_feature.feature_type == "regression" - activity = value.to_f + activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@last.AddCompound(smiles,id) @@last.AddActivity(activity, id) all_activities[id]=activity # DV: insert global information compounds[id] = compound - smi[id] = smiles # AM LAST: changed this to store SMILES. + smi[id] = smiles # AM LAST: changed this to store SMILES. id += 1 rescue LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" @@ -404,10 +399,13 @@ post '/fminer/last/?' do instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations instances.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test - - - effect = (p_value > 0) ? "activating" : "deactivating" + if @@last.GetRegression() + p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + effect = (p_value > 0) ? "activating" : "deactivating" + else + p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f + effect = "unknown" + end feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s unless features.include? smarts features << smarts @@ -131,7 +131,9 @@ post '/lazar/?' do if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort - lazar.value_map = { true => @training_classes.last, false => @training_classes.first } + @training_classes.each_with_index { |c,i| + lazar.value_map[i] = c + } elsif prediction_feature.feature_type == "regression" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end @@ -145,18 +147,7 @@ post '/lazar/?' do unless entry[prediction_feature.uri].empty? entry[prediction_feature.uri].each do |value| if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - when /#{@training_classes.last}/ - lazar.activities[compound] << true - when /#{@training_classes.first}/ - lazar.activities[compound] << false - else - LOGGER.warn "Unknown class \"#{value.to_s}\"." - end + lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals elsif prediction_feature.feature_type == "regression" #never use halt in tasks, do not raise exception when, print warning instead if value.to_f==0 |