summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-06-21 13:43:05 +0200
committerAndreas Maunz <andreas@maunz.de>2011-06-21 13:43:05 +0200
commitc1ee06638871ee7a88b07ebea05f4ecf3fab6392 (patch)
tree87ef4cc11b2c181df4e0cfb7c49c36ab16f786da
parent8d8880685ddaeeb968e1f1f0addf6cbf66eef67d (diff)
Multinomial capability for Fminer and Lazar
-rw-r--r--fminer.rb106
-rw-r--r--lazar.rb17
2 files changed, 56 insertions, 67 deletions
diff --git a/fminer.rb b/fminer.rb
index ee031ff..8c6451c 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -138,8 +138,8 @@ post '/fminer/bbrc/?' do
id = 1 # fminer start id is not 0
compounds = []
- nr_active=0
- nr_inactive=0
+ nr_classes={}
+ nr_total=0
all_activities = Hash.new# DV: for effect calculation in regression part
training_dataset.data_entries.each do |compound,entry|
@@ -160,7 +160,7 @@ post '/fminer/bbrc/?' do
entry.each do |feature,values|
values.each do |value|
if prediction_feature.feature_type == "regression"
- if (! value.nil?) && (value.to_f < 1)
+ if (! value.nil?) && (value.to_f <= 0)
take_logs=false
end
end
@@ -170,26 +170,13 @@ post '/fminer/bbrc/?' do
if feature == prediction_feature.uri
values.each do |value|
if value.nil?
- LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
+ LOGGER.warn "No #{feature} activity for #{compound.to_s}."
else
if prediction_feature.feature_type == "classification"
- case value.to_s
- when "true"
- nr_active += 1
- activity = 1
- when "false"
- nr_inactive += 1
- activity = 0
- when /#{@training_classes.last}/
- nr_active += 1
- activity = 1
- when /#{@training_classes.first}/
- nr_inactive += 1
- activity = 0
- else
- LOGGER.warn "Unknown class \"#{value.to_s}\"."
- end
- elsif prediction_feature.feature_type == "regression"
+ activity= value.to_f
+ nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1
+ nr_total+=1
+ elsif prediction_feature.feature_type == "regression"
activity= take_logs ? Math.log10(value.to_f) : value.to_f
end
begin
@@ -223,14 +210,22 @@ post '/fminer/bbrc/?' do
p_value = f[1]
if (!@@bbrc.GetRegression)
- ids = f[2] + f[3]
- if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive)
- effect = 'activating'
- else
- effect = 'deactivating'
- end
+ id_arrs = f[2..-1].flatten
+ max=nil
+ max_value=0
+ f[2..-1].reverse.each_with_index { |id,i| # fminer outputs occurrences sorted reverse by activity.
+ actual = id.size.to_f/id_arrs.size
+ expected = nr_classes[i].to_f/nr_total
+ if actual > expected
+ if ((actual - expected) / actual) > max_value
+ max_value = (actual - expected) / actual # 'Schleppzeiger'
+ max = i
+ end
+ end
+ }
+ effect = max.to_s
else #regression part
- ids = f[2]
+ id_arrs = f[2]
# DV: effect calculation
f_arr=Array.new
f[2].each do |id|
@@ -261,7 +256,7 @@ post '/fminer/bbrc/?' do
feature_dataset.add_feature feature_uri, metadata
#feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
end
- ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
+ id_arrs.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
end
end
feature_dataset.save(@subjectid)
@@ -328,8 +323,8 @@ post '/fminer/last/?' do
id = 1 # fminer start id is not 0
compounds = []
smi = [] # AM LAST: needed for matching the patterns back
- nr_active=0
- nr_inactive=0
+ nr_classes = []
+ nr_total=0
all_activities = Hash.new #DV: for effect calculation (class and regr)
training_dataset.data_entries.each do |compound,entry|
@@ -343,38 +338,38 @@ post '/fminer/last/?' do
LOGGER.warn "Cannot find smiles for #{compound.to_s}."
next
end
+
+ # AM: take log if appropriate
+ take_logs=true
+ entry.each do |feature,values|
+ values.each do |value|
+ if prediction_feature.feature_type == "regression"
+ if (! value.nil?) && (value.to_f <= 0)
+ take_logs=false
+ end
+ end
+ end
+ end
+
entry.each do |feature,values|
if feature == prediction_feature.uri
values.each do |value|
if value.nil?
- LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
+ LOGGER.warn "No #{feature} activity for #{compound.to_s}."
else
if prediction_feature.feature_type == "classification"
- case value.to_s
- when "true"
- nr_active += 1
- activity = 1
- when "false"
- nr_inactive += 1
- activity = 0
- when /#{@training_classes.last}/
- nr_active += 1
- activity = 1
- when /#{@training_classes.first}/
- nr_inactive += 1
- activity = 0
- else
- LOGGER.warn "Unknown class \"#{value.to_s}."
- end
+ activity= value.to_f
+ nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1
+ nr_total+=1
elsif prediction_feature.feature_type == "regression"
- activity = value.to_f
+ activity= take_logs ? Math.log10(value.to_f) : value.to_f
end
begin
@@last.AddCompound(smiles,id)
@@last.AddActivity(activity, id)
all_activities[id]=activity # DV: insert global information
compounds[id] = compound
- smi[id] = smiles # AM LAST: changed this to store SMILES.
+ smi[id] = smiles # AM LAST: changed this to store SMILES.
id += 1
rescue
LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
@@ -404,10 +399,13 @@ post '/fminer/last/?' do
instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations
instances.each do |smarts, ids|
feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
- @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test
-
-
- effect = (p_value > 0) ? "activating" : "deactivating"
+ if @@last.GetRegression()
+ p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test
+ effect = (p_value > 0) ? "activating" : "deactivating"
+ else
+ p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f
+ effect = "unknown"
+ end
feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s
unless features.include? smarts
features << smarts
diff --git a/lazar.rb b/lazar.rb
index 4441726..31cdd4d 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -131,7 +131,9 @@ post '/lazar/?' do
if prediction_feature.feature_type == "classification"
@training_classes = training_activities.accept_values(prediction_feature.uri).sort
- lazar.value_map = { true => @training_classes.last, false => @training_classes.first }
+ @training_classes.each_with_index { |c,i|
+ lazar.value_map[i] = c
+ }
elsif prediction_feature.feature_type == "regression"
lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
@@ -145,18 +147,7 @@ post '/lazar/?' do
unless entry[prediction_feature.uri].empty?
entry[prediction_feature.uri].each do |value|
if prediction_feature.feature_type == "classification"
- case value.to_s
- when "true"
- lazar.activities[compound] << true
- when "false"
- lazar.activities[compound] << false
- when /#{@training_classes.last}/
- lazar.activities[compound] << true
- when /#{@training_classes.first}/
- lazar.activities[compound] << false
- else
- LOGGER.warn "Unknown class \"#{value.to_s}\"."
- end
+ lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals
elsif prediction_feature.feature_type == "regression"
#never use halt in tasks, do not raise exception when, print warning instead
if value.to_f==0