From 4427ba258e0bfdac2056960bf84f13477e13e87a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 18 Jul 2015 16:44:56 +0200 Subject: trying to speed up bbrc parsing --- Gemfile | 2 +- algorithm.gemspec | 5 +- fminer.rb | 172 ------------------------------------- java/CdkDescriptorInfo.class | Bin 1702 -> 1702 bytes java/CdkDescriptors.class | Bin 3781 -> 6106 bytes java/JoelibDescriptorInfo.class | Bin 1039 -> 1035 bytes java/JoelibDescriptors.class | Bin 2578 -> 2578 bytes lib/bbrc.rb | 183 ++++++++++++++++++++++++++++++++++++++++ lib/descriptor.rb | 63 +++++++++----- lib/fminer.rb | 22 +++-- lib/opentox-algorithm.rb | 10 +++ 11 files changed, 253 insertions(+), 204 deletions(-) create mode 100644 lib/bbrc.rb create mode 100644 lib/opentox-algorithm.rb diff --git a/Gemfile b/Gemfile index 4e3c41b..e3002f8 100644 --- a/Gemfile +++ b/Gemfile @@ -1,4 +1,4 @@ source "http://rubygems.org" gemspec -gem 'opentox-server', :path => "../opentox-server" +#gem 'opentox-server', :path => "../opentox-server" gem "opentox-client", :path => "../opentox-client" diff --git a/algorithm.gemspec b/algorithm.gemspec index 3bda834..c3119e6 100644 --- a/algorithm.gemspec +++ b/algorithm.gemspec @@ -13,11 +13,12 @@ Gem::Specification.new do |s| s.rubyforge_project = "algorithm" - s.files = `git ls-files`.split("\n") + s.files = ["lib/algorithm.rb"] + #s.files = `git ls-files`.split("\n") s.required_ruby_version = '>= 1.9.2' # specify any dependencies here; for example: - s.add_runtime_dependency "opentox-server" + #s.add_runtime_dependency "opentox-server" s.add_runtime_dependency "opentox-client" s.add_runtime_dependency 'rinruby'#, "~>2.0.2" s.add_runtime_dependency 'nokogiri'#, "~>1.4.4" diff --git a/fminer.rb b/fminer.rb index b429250..bfc6879 100644 --- a/fminer.rb +++ b/fminer.rb @@ -107,178 +107,6 @@ module OpenTox end - # Run bbrc algorithm on dataset - # - # @param [String] dataset_uri URI of the training dataset - # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) - # @param [optional] parameters BBRC parameters, accepted parameters are - # - min_frequency Minimum frequency (default 5) - # - feature_type Feature type, can be 'paths' or 'trees' (default "trees") - # - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") - # - min_chisq_significance Significance threshold (between 0 and 1) - # - nr_hits Set to "true" to get hit count instead of presence - # - get_target Set to "true" to obtain target variable as feature - # @return [text/uri-list] Task URI - post '/fminer/bbrc/?' do - - @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/bbrc',:full)) - @@fminer.check_params(params,5) - - task = OpenTox::Task.run("Mining BBRC features", uri('/fminer/bbrc')) do |task| - - time = Time.now - - @@bbrc.Reset - if @@fminer.prediction_feature.feature_type == "regression" - @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - bad_request_error "No accept values for "\ - "dataset '#{@@fminer.training_dataset.uri}' and "\ - "feature '#{@@fminer.prediction_feature.uri}'" unless - @@fminer.prediction_feature.accept_values - value_map=@@fminer.prediction_feature.value_map - end - @@bbrc.SetMinfreq(@@fminer.minfreq) - @@bbrc.SetType(1) if params[:feature_type] == "paths" - @@bbrc.SetBackbone(false) if params[:backbone] == "false" - @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] - @@bbrc.SetConsoleOut(false) - - feature_dataset = OpenTox::Dataset.new - feature_dataset.metadata = { - RDF::DC.title => "BBRC representatives", - RDF::DC.creator => to('/fminer/bbrc',:full), - RDF::OT.hasSource => to('/fminer/bbrc', :full), - } - feature_dataset.parameters = [ - { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }, - { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] }, - { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq }, - { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }, - { RDF::DC.title => "backbone", RDF::OT.paramValue => (params[:backbone] == "false" ? "false" : "true") } - ] - - @@fminer.compounds = [] - @@fminer.db_class_sizes = Array.new # AM: effect - @@fminer.all_activities = Hash.new # DV: for effect calculation in regression part - @@fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - @@fminer.add_fminer_data(@@bbrc, value_map) - g_median=@@fminer.all_activities.values.to_scale.median - - #task.progress 10 - step_width = 80 / @@bbrc.GetNoRootNodes().to_f - features_smarts = Set.new - features = Array.new - - puts "Setup: #{Time.now-time}" - time = Time.now - ftime = 0 - - # run @@bbrc - - # prepare to receive results as hash { c => [ [f,v], ... ] } - fminer_results = {} - - (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| - results = @@bbrc.MineRoot(j) - #task.progress 10+step_width*(j+1) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - - if (!@@bbrc.GetRegression) - id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @@fminer.db_class_sizes) # f needs reversal for bbrc - effect = max+1 - else #regression part - id_arrs = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - id=id.keys[0] # extract id from hit count hash - f_arr.push(@@fminer.all_activities[id]) - end - f_median=f_arr.to_scale.median - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end - - ft = Time.now - unless features_smarts.include? smarts - features_smarts << smarts - feature = OpenTox::Feature.find_or_create({ - RDF::DC.title => smarts.dup, - RDF::OT.hasSource => to('/fminer/bbrc', :full), - RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature], - RDF::OT.smarts => smarts.dup, - RDF::OT.pValue => p_value.to_f.abs.round(5), - RDF::OT.effect => effect - }) - features << feature - end - ftime += Time.now - ft - - id_arrs.each { |id_count_hash| - id=id_count_hash.keys[0].to_i - count=id_count_hash.values[0].to_i - fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {} - if params[:nr_hits] == "true" - fminer_results[@@fminer.compounds[id]][feature.uri] = count - else - fminer_results[@@fminer.compounds[id]][feature.uri] = 1 - end - } - - end # end of - end # feature parsing - - - puts "Fminer: #{Time.now-time} (find/create Features: #{ftime})" - time = Time.now - - fminer_compounds = @@fminer.training_dataset.compounds - prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri - prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| - @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] - } - fminer_noact_compounds = fminer_compounds - @@fminer.compounds - - feature_dataset.features = features - feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features if params[:get_target] == "true" - feature_dataset.compounds = fminer_compounds - fminer_compounds.each_with_index { |c,idx| - # TODO: reenable option - #if (params[:get_target] == "true") - #row = row + [ prediction_feature_all_acts[idx] ] - #end - features.each { |f| - v = fminer_results[c][f.uri] if fminer_results[c] - unless fminer_noact_compounds.include? c - v = 0 if v.nil? - end - feature_dataset.add_data_entry c, f, v.to_i - } - } - - puts "Prepare save: #{Time.now-time}" - time = Time.now - feature_dataset.put - - puts "Save: #{Time.now-time}" - feature_dataset.uri - - - end - response['Content-Type'] = 'text/uri-list' - halt 202,task.uri - end - diff --git a/java/CdkDescriptorInfo.class b/java/CdkDescriptorInfo.class index 922c779..687d68c 100644 Binary files a/java/CdkDescriptorInfo.class and b/java/CdkDescriptorInfo.class differ diff --git a/java/CdkDescriptors.class b/java/CdkDescriptors.class index 34b973f..9785405 100644 Binary files a/java/CdkDescriptors.class and b/java/CdkDescriptors.class differ diff --git a/java/JoelibDescriptorInfo.class b/java/JoelibDescriptorInfo.class index 0ee67bf..c091e54 100644 Binary files a/java/JoelibDescriptorInfo.class and b/java/JoelibDescriptorInfo.class differ diff --git a/java/JoelibDescriptors.class b/java/JoelibDescriptors.class index d88ac63..a49018d 100644 Binary files a/java/JoelibDescriptors.class and b/java/JoelibDescriptors.class differ diff --git a/lib/bbrc.rb b/lib/bbrc.rb new file mode 100644 index 0000000..740a763 --- /dev/null +++ b/lib/bbrc.rb @@ -0,0 +1,183 @@ +ENV['FMINER_SMARTS'] = 'true' +ENV['FMINER_NO_AROMATIC'] = 'true' +ENV['FMINER_PVALUES'] = 'true' +ENV['FMINER_SILENT'] = 'true' +ENV['FMINER_NR_HITS'] = 'true' + +module OpenTox + module Algorithm + class Fminer + # Run bbrc algorithm on dataset + # + # @param [String] dataset_uri URI of the training dataset + # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) + # @param [optional] parameters BBRC parameters, accepted parameters are + # - min_frequency Minimum frequency (default 5) + # - feature_type Feature type, can be 'paths' or 'trees' (default "trees") + # - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") + # - min_chisq_significance Significance threshold (between 0 and 1) + # - nr_hits Set to "true" to get hit count instead of presence + # - get_target Set to "true" to obtain target variable as feature + # @return [text/uri-list] Task URI + def self.bbrc params + + @fminer=OpenTox::Algorithm::Fminer.new + @fminer.check_params(params,5) + + #task = OpenTox::Task.run("Mining BBRC features", __FILE__ ) do |task| + + time = Time.now + + @bbrc = Bbrc::Bbrc.new + @bbrc.Reset + if @fminer.prediction_feature.feature_type == "regression" + @bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! + else + bad_request_error "No accept values for "\ + "dataset '#{@fminer.training_dataset.uri}' and "\ + "feature '#{@fminer.prediction_feature.uri}'" unless + @fminer.prediction_feature.accept_values + value_map=@fminer.prediction_feature.value_map + end + @bbrc.SetMinfreq(@fminer.minfreq) + @bbrc.SetType(1) if params[:feature_type] == "paths" + @bbrc.SetBackbone(false) if params[:backbone] == "false" + @bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] + @bbrc.SetConsoleOut(false) + + feature_dataset = OpenTox::Dataset.new + feature_dataset.title = "BBRC representatives" + feature_dataset.creator = __FILE__ + feature_dataset.parameters = [ + { "title" => "dataset_id", "paramValue" => params[:dataset].id }, + { "title" => "prediction_feature", "paramValue" => params[:prediction_feature].id }, + { "title" => "min_frequency", "paramValue" => @fminer.minfreq }, + { "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") }, + { "title" => "backbone", "paramValue" => (params[:backbone] == "false" ? "false" : "true") } + ] + + @fminer.compounds = [] + @fminer.db_class_sizes = Array.new # AM: effect + @fminer.all_activities = Hash.new # DV: for effect calculation in regression part + @fminer.smi = [] # AM LAST: needed for matching the patterns back + + # Add data to fminer + @fminer.add_fminer_data(@bbrc, value_map) + g_median=@fminer.all_activities.values.to_scale.median + + #task.progress 10 + step_width = 80 / @bbrc.GetNoRootNodes().to_f + features_smarts = Set.new + features = Array.new + + puts "Setup: #{Time.now-time}" + time = Time.now + ftime = 0 + + # run @bbrc + + # prepare to receive results as hash { c => [ [f,v], ... ] } + fminer_results = {} + + (0 .. @bbrc.GetNoRootNodes()-1).each do |j| + results = @bbrc.MineRoot(j) + #task.progress 10+step_width*(j+1) + results.each do |result| + f = YAML.load(result)[0] + smarts = f[0] + p_value = f[1] + + if (!@bbrc.GetRegression) + id_arrs = f[2..-1].flatten + max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @fminer.db_class_sizes) # f needs reversal for bbrc + effect = max+1 + else #regression part + id_arrs = f[2] + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + id=id.keys[0] # extract id from hit count hash + f_arr.push(@fminer.all_activities[id]) + end + f_median=f_arr.to_scale.median + if g_median >= f_median + effect = 'activating' + else + effect = 'deactivating' + end + end + + ft = Time.now + unless features_smarts.include? smarts + features_smarts << smarts + feature = OpenTox::Feature.find_or_create_by({ + "title" => smarts.dup, + "numeric" => true, + "substructure" => true, + "smarts" => smarts.dup, + "pValue" => p_value.to_f.abs.round(5), + "effect" => effect + }) + features << feature + end + ftime += Time.now - ft + + id_arrs.each { |id_count_hash| + id=id_count_hash.keys[0].to_i + count=id_count_hash.values[0].to_i + fminer_results[@fminer.compounds[id]] || fminer_results[@fminer.compounds[id]] = {} + if params[:nr_hits] == "true" + fminer_results[@fminer.compounds[id]][feature] = count + else + fminer_results[@fminer.compounds[id]][feature] = 1 + end + } + + end # end of + end # feature parsing + + + puts "Fminer: #{Time.now-time} (find/create Features: #{ftime})" + time = Time.now + puts JSON.pretty_generate(fminer_results) + + fminer_compounds = @fminer.training_dataset.compounds + prediction_feature_idx = @fminer.training_dataset.features.index @fminer.prediction_feature + prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| + @fminer.training_dataset.data_entries[idx][prediction_feature_idx] + } + fminer_noact_compounds = fminer_compounds - @fminer.compounds + + feature_dataset.features = features + feature_dataset.features = [ @fminer.prediction_feature ] + feature_dataset.features if params[:get_target] == "true" + feature_dataset.compounds = fminer_compounds + fminer_compounds.each_with_index { |c,idx| + # TODO: reenable option + #if (params[:get_target] == "true") + #row = row + [ prediction_feature_all_acts[idx] ] + #end + features.each { |f| + v = fminer_results[c][f.uri] if fminer_results[c] + unless fminer_noact_compounds.include? c + v = 0 if v.nil? + end + feature_dataset.add_data_entry c, f, v.to_i + } + } + + puts "Prepare save: #{Time.now-time}" + time = Time.now + feature_dataset.save + + puts "Save: #{Time.now-time}" + feature_dataset + + + end + #end + end + end +end + + + diff --git a/lib/descriptor.rb b/lib/descriptor.rb index a8f5123..9a93b32 100644 --- a/lib/descriptor.rb +++ b/lib/descriptor.rb @@ -1,6 +1,8 @@ require 'digest/md5' ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk" -BABEL_3D_CACHE_DIR = File.join(Dir.pwd,'/babel_3d_cache') +BABEL_3D_CACHE_DIR = File.join(File.dirname(__FILE__),"..",'/babel_3d_cache') +# TODO store 3D structures in mongodb +# TODO store descriptors in mongodb module OpenTox @@ -35,6 +37,7 @@ module OpenTox DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS)) DESCRIPTOR_VALUES = OBDESCRIPTORS.keys + CDKDESCRIPTOR_VALUES + JOELIBDESCRIPTORS.keys + require_relative "unique_descriptors.rb" def self.description descriptor @@ -53,6 +56,7 @@ module OpenTox end def self.smarts_match compounds, smarts, count=false + compounds = parse compounds obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new obconversion.set_in_format('inchi') @@ -80,7 +84,8 @@ module OpenTox smarts_match compounds,smarts,true end - def self.physchem compounds, descriptors + def self.physchem compounds, descriptors=UNIQUEDESCRIPTORS + compounds = parse compounds des = {} descriptors.each do |d| lib, descriptor = d.split(".",2) @@ -99,6 +104,7 @@ module OpenTox end def self.openbabel compounds, descriptors + compounds = parse compounds $logger.debug "compute #{descriptors.size} openbabel descriptors for #{compounds.size} compounds" obdescriptors = descriptors.collect{|d| OpenBabel::OBDescriptor.find_type d} obmol = OpenBabel::OBMol.new @@ -115,19 +121,8 @@ module OpenTox fingerprint end - def self.run_cmd cmd - cmd = "#{cmd} 2>&1" - $logger.debug "running external cmd: '#{cmd}'" - p = IO.popen(cmd) do |io| - while line = io.gets - $logger.debug "> #{line.chomp}" - end - io.close - raise "external cmd failed '#{cmd}' (see log file for error msg)" unless $?.to_i == 0 - end - end - def self.cdk compounds, descriptors + compounds = parse compounds $logger.debug "compute #{descriptors.size} cdk descriptors for #{compounds.size} compounds" sdf = sdf_3d compounds # use java system call (rjb blocks within tasks) @@ -135,7 +130,7 @@ module OpenTox run_cmd "java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf} #{descriptors.join(" ")}" fingerprint = {} YAML.load_file(sdf+"cdk.yaml").each_with_index do |calculation,i| - $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty? + $logger.error "Descriptor calculation failed for compound #{compounds[i].inchi}." if calculation.empty? descriptors.each do |descriptor| fingerprint[compounds[i]] = calculation end @@ -145,6 +140,7 @@ module OpenTox end def self.joelib compounds, descriptors + compounds = parse compounds $logger.debug "compute #{descriptors.size} joelib descriptors for #{compounds.size} compounds" # use java system call (rjb blocks within tasks) # use Tempfiles to avoid "Argument list too long" error @@ -152,7 +148,7 @@ module OpenTox run_cmd "java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf} #{descriptors.join(' ')}" fingerprint = {} YAML.load_file(sdf+"joelib.yaml").each_with_index do |calculation,i| - $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty? + $logger.error "Descriptor calculation failed for compound #{compounds[i].inchi}." if calculation.empty? descriptors.each do |descriptor| fingerprint[compounds[i]] = calculation end @@ -162,6 +158,7 @@ module OpenTox end def self.lookup compounds, features, dataset + compounds = parse compounds fingerprint = [] compounds.each do |compound| fingerprint << [] @@ -170,13 +167,26 @@ module OpenTox end end + def self.run_cmd cmd + cmd = "#{cmd} 2>&1" + $logger.debug "running external cmd: '#{cmd}'" + p = IO.popen(cmd) do |io| + while line = io.gets + $logger.debug "> #{line.chomp}" + end + io.close + raise "external cmd failed '#{cmd}' (see log file for error msg)" unless $?.to_i == 0 + end + end + def self.sdf_3d compounds + compounds = parse compounds obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new obconversion.set_in_format 'inchi' obconversion.set_out_format 'sdf' - digest = Digest::MD5.hexdigest compounds.collect{|c| c.uri}.inspect + digest = Digest::MD5.hexdigest compounds.collect{|c| c.inchi}.inspect sdf_file = "/tmp/#{digest}.sdf" if File.exists? sdf_file # do not recreate existing 3d sdfs $logger.debug "re-using cached 3d structures from #{sdf_file}" @@ -200,11 +210,11 @@ module OpenTox sdf_2d = obconversion.write_string(obmol) error = nil if compound.inchi.include?(";") # component includes multiple compounds (; in inchi, . in smiles) - error = "OpenBabel 3D generation failes for multi-compound #{compound.uri}, trying to calculate descriptors from 2D structure." + error = "OpenBabel 3D generation failes for multi-compound #{compound.inchi}, trying to calculate descriptors from 2D structure." else OpenBabel::OBOp.find_type("Gen3D").do(obmol) sdf_3d = obconversion.write_string(obmol) - error = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure." if sdf_3d.match(/.nan/) + error = "3D generation failed for compound #{compound.inchi}, trying to calculate descriptors from 2D structure." if sdf_3d.match(/.nan/) end if error $logger.warn error @@ -226,6 +236,19 @@ module OpenTox sdf_file end + def self.parse compounds + case compounds.class.to_s + when "OpenTox::Compound" + compounds = [compounds] + when "Array" + compounds + when "OpenTox::Dataset" + compounds = compounds.compounds + else + bad_request_error "Cannot calculate descriptors for #{compounds.class} objects." + end + end + def self.fix_value val val = val.first if val.is_a? Array and val.size == 1 if val.numeric? @@ -236,7 +259,7 @@ module OpenTox end val end - private_class_method :sdf_3d, :fix_value + private_class_method :sdf_3d, :fix_value, :parse, :run_cmd end end end diff --git a/lib/fminer.rb b/lib/fminer.rb index 8023bdd..3333517 100644 --- a/lib/fminer.rb +++ b/lib/fminer.rb @@ -1,3 +1,4 @@ +require_relative 'bbrc' =begin * Name: fminer.rb * Description: Fminer library @@ -10,6 +11,7 @@ module OpenTox # Fminer algorithms (https://github.com/amaunz/fminer2) class Fminer + include OpenTox attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi @@ -19,15 +21,16 @@ module OpenTox # @param[Integer] per-mil value for min frequency def check_params(params,per_mil) - bad_request_error "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - @training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" + bad_request_error "Please submit a dataset." unless params[:dataset] and !params[:dataset].nil? + @training_dataset = OpenTox::Dataset.new + @training_dataset = params[:dataset] unless params[:prediction_feature] # try to read prediction_feature from dataset resource_not_found_error "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1 - params[:prediction_feature] = @training_dataset.features.first.uri + params[:prediction_feature] = @training_dataset.features.first end - @prediction_feature = OpenTox::Feature.find params[:prediction_feature] - resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" unless - @training_dataset.find_feature_uri( params[:prediction_feature] ) + @prediction_feature = params[:prediction_feature] + resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset]}'" unless + @training_dataset.features.include?( params[:prediction_feature] ) unless params[:min_frequency].nil? # check for percentage if params[:min_frequency].include? "pc" @@ -58,7 +61,8 @@ module OpenTox end if @minfreq.nil? @minfreq=min_frequency(@training_dataset,@prediction_feature,per_mil) - $logger.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)" + p "min_frequency #{@minfreq} (input was #{per_mil} per-mil)" + #$logger.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)" end end @@ -109,7 +113,7 @@ module OpenTox end if compound_activities.nil? - $logger.warn "No activity for '#{compound.uri}' and feature '#{@prediction_feature.uri}'" + $logger.warn "No activity for '#{compound.inchi}' and feature '#{@prediction_feature.title}'" else if @prediction_feature.feature_type == "classification" activity= value_map.invert[compound_activities].to_i # activities are mapped to 1..n @@ -178,7 +182,7 @@ module OpenTox # return [Integer] min-frequency def min_frequency(training_dataset,prediction_feature,per_mil) nr_labeled_cmpds=0 - f_idx=training_dataset.features.collect{|f| f.uri}.index prediction_feature.uri + f_idx=training_dataset.features.index prediction_feature training_dataset.compounds.each_with_index { |cmpd, c_idx| if ( training_dataset.data_entries[c_idx] ) unless training_dataset.data_entries[c_idx][f_idx].nil? diff --git a/lib/opentox-algorithm.rb b/lib/opentox-algorithm.rb new file mode 100644 index 0000000..4aaad9c --- /dev/null +++ b/lib/opentox-algorithm.rb @@ -0,0 +1,10 @@ +require 'statsample' + +# Require sub-Repositories +require_relative '../libfminer/libbbrc/bbrc' # include before openbabel +require_relative '../libfminer/liblast/last' # +require_relative '../last-utils/lu.rb' + +#Dir[File.join(File.dirname(__FILE__),"*.rb")].each{ |f| require_relative f} +require_relative "descriptor.rb" +require_relative "fminer.rb" -- cgit v1.2.3