diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-05-03 16:35:18 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-05-03 16:35:18 +0200 |
commit | 1b02cef5f5af5930a2c0a449357618c9266c29ed (patch) | |
tree | 8341cc230de1656e23d349a5533f2fd349fc9cbb | |
parent | 1747f27dc73bf39c900e07d43efbf7a1f7267e0c (diff) |
Ordered addition of entries to fminer
-rw-r--r-- | lib/algorithm.rb | 36 |
1 files changed, 26 insertions, 10 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index b921b9c..64fa508 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -75,10 +75,26 @@ module OpenTox end end - def add_fminer_data(fminer_instance, value_map) + def add_fminer_data(fminer_instance, value_map, prepare_backend=true) + + + # detect nr duplicates per compound + compound_sizes = {} + @training_dataset.compounds.each do |compound| + entries=@training_dataset.data_entries[compound] + entries.each do |feature, values| + compound_sizes[compound] || compound_sizes[compound] = [] + compound_sizes[compound] << values.size + end + compound_sizes[compound].uniq! + raise "Inappropriate data for fminer" if compound_sizes[compound].size > 1 + compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array + end id = 1 # fminer start id is not 0 - @training_dataset.data_entries.each do |compound,entry| #order of compounds does not influence result + + @training_dataset.compounds.each do |compound| + entry=@training_dataset.data_entries[compound] begin smiles = OpenTox::Compound.smiles(compound.to_s) rescue @@ -92,29 +108,29 @@ module OpenTox entry.each do |feature,values| if feature == @prediction_feature.uri - values.each do |value| - if value.nil? + (0...compound_sizes[compound]).each { |i| + if values[i].nil? LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if @prediction_feature.feature_type == "classification" - activity= value_map.invert[value].to_i # activities are mapped to 1..n + activity= value_map.invert[values[i]].to_i # activities are mapped to 1..n @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect elsif @prediction_feature.feature_type == "regression" - activity= value.to_f + activity= values[i].to_f end begin - fminer_instance.AddCompound(smiles,id) - fminer_instance.AddActivity(activity, id) + fminer_instance.AddCompound(smiles,id) if prepare_backend + fminer_instance.AddActivity(activity, id) if prepare_backend @all_activities[id]=activity # DV: insert global information @compounds[id] = compound @smi[id] = smiles id += 1 rescue Exception => e - LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" + LOGGER.warn "Could not add " + smiles + "\t" + values[i].to_s + " to fminer" LOGGER.warn e.backtrace end end - end + } end end end |