summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-12-04 11:44:02 +0100
committerAndreas Maunz <andreas@maunz.de>2012-12-04 11:44:02 +0100
commit4399c5903a7d6e111704c84881cb89010f72b96d (patch)
tree261f91a2ffca1cc6ddab067c0738041e17f3cea8
parenta67af6bf5875bb96e8253d8c954f8033919eaec2 (diff)
Fixed fminer data addition
-rw-r--r--lib/algorithm.rb56
-rw-r--r--lib/utils.rb12
2 files changed, 41 insertions, 27 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index c3a5c80..f3f58ad 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -52,7 +52,7 @@ module OpenTox
# Fminer algorithms (https://github.com/amaunz/fminer2)
class Fminer
include Algorithm
- attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi
+ attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi, :weight_feature
def check_params(params,per_mil,subjectid=nil)
raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
@@ -67,6 +67,11 @@ module OpenTox
raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature])
+ unless params[:weight_feature].nil?
+ @weight_feature = OpenTox::Feature.find params[:weight_feature], subjectid
+ raise OpenTox::NotFoundError.new "No feature #{params[:weight_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:weight_feature])
+ end
+
unless params[:min_frequency].nil?
# check for percentage
if params[:min_frequency].include? "pc"
@@ -104,6 +109,8 @@ module OpenTox
def add_fminer_data(fminer_instance, value_map)
id = 1 # fminer start id is not 0
+ which_row=@training_dataset.compounds.inject({}) {|h,c| h[c]=0; h}
+
@training_dataset.compounds.each do |compound|
entry=@training_dataset.data_entries[compound]
begin
@@ -119,30 +126,35 @@ module OpenTox
entry && entry.each do |feature,values|
if feature == @prediction_feature.uri
- values.each do |value|
- if value.nil?
- LOGGER.warn "No #{feature} activity for #{compound.to_s}."
- else
- if @prediction_feature.feature_type == "classification"
- activity= value_map.invert[value].to_i # activities are mapped to 1..n
- raise "activity should be mapped to 1..n is 0, for value '#{value}', value_map: #{value_map.inspect}" if activity==0
- @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
- elsif @prediction_feature.feature_type == "regression"
- activity= value.to_f
- end
- begin
- fminer_instance.AddCompound(smiles,id) if fminer_instance
- fminer_instance.AddActivity(activity, id) if fminer_instance
- @all_activities[id]=activity # DV: insert global information
- @compounds[id] = compound
- @smi[id] = smiles
- id += 1
- rescue Exception => e
- LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
- LOGGER.warn e.backtrace
+ value=values[which_row[compound]]
+ if value.nil?
+ LOGGER.warn "No #{feature} activity for #{compound.to_s}."
+ else
+ if @prediction_feature.feature_type == "classification"
+ activity= value_map.invert[value].to_i # activities are mapped to 1..n
+ raise "activity should be mapped to 1..n for id '#{id}' with value '#{value}', value_map: #{value_map.inspect}" if activity==0
+ @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
+ elsif @prediction_feature.feature_type == "regression"
+ activity= value.to_f
+ end
+ begin
+ fminer_instance.AddCompound(smiles,id) if fminer_instance
+ fminer_instance.AddActivity(activity, id) if fminer_instance
+ @all_activities[id]=activity # DV: insert global information
+ @compounds[id] = compound
+ @smi[id] = smiles
+ if ((not fminer_instance.nil?) and (not @weight_feature.nil?) and (@prediction_feature.feature_type == "classification"))
+ weight=entry[@weight_feature.uri][which_row[compound]].to_f # nil.to_f = 0
+ raise "weights should be positive for id '#{id}' with weight '#{weight}'" unless weight>0.0
+ fminer_instance.AddWeight(weight, id)
end
+ id += 1
+ rescue Exception => e
+ LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
+ LOGGER.warn e.backtrace
end
end
+ which_row[compound] += 1
end
end
end
diff --git a/lib/utils.rb b/lib/utils.rb
index 324fc58..bcc1f1b 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -429,21 +429,23 @@ module OpenTox
# Effect calculation for classification. It is assumed that the elements of the arrays match each other pairwise
- # @param [Array] Array of occurrences per class (in the form of Enumerables).
- # @param [Array] Array of database instance counts per class.
+ # @param [Array] Array of occurrences per class (in the form of Enumerables)
+ # @param [Array] Array of database instance counts per class, starting from the lowest with index 0
def self.effect(occurrences, db_instances)
max=0
max_value=0
nr_o = self.sum_size(occurrences)
nr_db = db_instances.to_scale.sum
- occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity.
+ # Crawl occurrences per class i
+ # starting from the lowest with index 0
+ occurrences.each_with_index { |o,i|
actual = o.size.to_f/nr_o
- expected = db_instances[i].to_f/nr_db
+ expected = db_instances[i].to_f/nr_db
if actual > expected
if ((actual - expected) / actual) > max_value
max_value = (actual - expected) / actual # 'Schleppzeiger'
- max = i
+ max = i
end
end
}