summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-05-03 16:35:18 +0200
committerAndreas Maunz <andreas@maunz.de>2012-05-03 16:35:18 +0200
commit1b02cef5f5af5930a2c0a449357618c9266c29ed (patch)
tree8341cc230de1656e23d349a5533f2fd349fc9cbb
parent1747f27dc73bf39c900e07d43efbf7a1f7267e0c (diff)
Ordered addition of entries to fminer
-rw-r--r--lib/algorithm.rb36
1 files changed, 26 insertions, 10 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index b921b9c..64fa508 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -75,10 +75,26 @@ module OpenTox
end
end
- def add_fminer_data(fminer_instance, value_map)
+ def add_fminer_data(fminer_instance, value_map, prepare_backend=true)
+
+
+ # detect nr duplicates per compound
+ compound_sizes = {}
+ @training_dataset.compounds.each do |compound|
+ entries=@training_dataset.data_entries[compound]
+ entries.each do |feature, values|
+ compound_sizes[compound] || compound_sizes[compound] = []
+ compound_sizes[compound] << values.size
+ end
+ compound_sizes[compound].uniq!
+ raise "Inappropriate data for fminer" if compound_sizes[compound].size > 1
+ compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
+ end
id = 1 # fminer start id is not 0
- @training_dataset.data_entries.each do |compound,entry| #order of compounds does not influence result
+
+ @training_dataset.compounds.each do |compound|
+ entry=@training_dataset.data_entries[compound]
begin
smiles = OpenTox::Compound.smiles(compound.to_s)
rescue
@@ -92,29 +108,29 @@ module OpenTox
entry.each do |feature,values|
if feature == @prediction_feature.uri
- values.each do |value|
- if value.nil?
+ (0...compound_sizes[compound]).each { |i|
+ if values[i].nil?
LOGGER.warn "No #{feature} activity for #{compound.to_s}."
else
if @prediction_feature.feature_type == "classification"
- activity= value_map.invert[value].to_i # activities are mapped to 1..n
+ activity= value_map.invert[values[i]].to_i # activities are mapped to 1..n
@db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
elsif @prediction_feature.feature_type == "regression"
- activity= value.to_f
+ activity= values[i].to_f
end
begin
- fminer_instance.AddCompound(smiles,id)
- fminer_instance.AddActivity(activity, id)
+ fminer_instance.AddCompound(smiles,id) if prepare_backend
+ fminer_instance.AddActivity(activity, id) if prepare_backend
@all_activities[id]=activity # DV: insert global information
@compounds[id] = compound
@smi[id] = smiles
id += 1
rescue Exception => e
- LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
+ LOGGER.warn "Could not add " + smiles + "\t" + values[i].to_s + " to fminer"
LOGGER.warn e.backtrace
end
end
- end
+ }
end
end
end