summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-07-18 16:44:56 +0200
committerChristoph Helma <helma@in-silico.ch>2015-07-18 16:44:56 +0200
commit4427ba258e0bfdac2056960bf84f13477e13e87a (patch)
tree6aca4ec156a5dee1bab998525cc410b05721090a
parent7033f642303601d007fe15115d9025466dffa190 (diff)
trying to speed up bbrc parsing
-rw-r--r--Gemfile2
-rw-r--r--algorithm.gemspec5
-rw-r--r--fminer.rb172
-rw-r--r--java/CdkDescriptorInfo.classbin1702 -> 1702 bytes
-rw-r--r--java/CdkDescriptors.classbin3781 -> 6106 bytes
-rw-r--r--java/JoelibDescriptorInfo.classbin1039 -> 1035 bytes
-rw-r--r--java/JoelibDescriptors.classbin2578 -> 2578 bytes
-rw-r--r--lib/bbrc.rb183
-rw-r--r--lib/descriptor.rb63
-rw-r--r--lib/fminer.rb22
-rw-r--r--lib/opentox-algorithm.rb10
11 files changed, 253 insertions, 204 deletions
diff --git a/Gemfile b/Gemfile
index 4e3c41b..e3002f8 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,4 +1,4 @@
source "http://rubygems.org"
gemspec
-gem 'opentox-server', :path => "../opentox-server"
+#gem 'opentox-server', :path => "../opentox-server"
gem "opentox-client", :path => "../opentox-client"
diff --git a/algorithm.gemspec b/algorithm.gemspec
index 3bda834..c3119e6 100644
--- a/algorithm.gemspec
+++ b/algorithm.gemspec
@@ -13,11 +13,12 @@ Gem::Specification.new do |s|
s.rubyforge_project = "algorithm"
- s.files = `git ls-files`.split("\n")
+ s.files = ["lib/algorithm.rb"]
+ #s.files = `git ls-files`.split("\n")
s.required_ruby_version = '>= 1.9.2'
# specify any dependencies here; for example:
- s.add_runtime_dependency "opentox-server"
+ #s.add_runtime_dependency "opentox-server"
s.add_runtime_dependency "opentox-client"
s.add_runtime_dependency 'rinruby'#, "~>2.0.2"
s.add_runtime_dependency 'nokogiri'#, "~>1.4.4"
diff --git a/fminer.rb b/fminer.rb
index b429250..bfc6879 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -107,178 +107,6 @@ module OpenTox
end
- # Run bbrc algorithm on dataset
- #
- # @param [String] dataset_uri URI of the training dataset
- # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
- # @param [optional] parameters BBRC parameters, accepted parameters are
- # - min_frequency Minimum frequency (default 5)
- # - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
- # - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
- # - min_chisq_significance Significance threshold (between 0 and 1)
- # - nr_hits Set to "true" to get hit count instead of presence
- # - get_target Set to "true" to obtain target variable as feature
- # @return [text/uri-list] Task URI
- post '/fminer/bbrc/?' do
-
- @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/bbrc',:full))
- @@fminer.check_params(params,5)
-
- task = OpenTox::Task.run("Mining BBRC features", uri('/fminer/bbrc')) do |task|
-
- time = Time.now
-
- @@bbrc.Reset
- if @@fminer.prediction_feature.feature_type == "regression"
- @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
- else
- bad_request_error "No accept values for "\
- "dataset '#{@@fminer.training_dataset.uri}' and "\
- "feature '#{@@fminer.prediction_feature.uri}'" unless
- @@fminer.prediction_feature.accept_values
- value_map=@@fminer.prediction_feature.value_map
- end
- @@bbrc.SetMinfreq(@@fminer.minfreq)
- @@bbrc.SetType(1) if params[:feature_type] == "paths"
- @@bbrc.SetBackbone(false) if params[:backbone] == "false"
- @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
- @@bbrc.SetConsoleOut(false)
-
- feature_dataset = OpenTox::Dataset.new
- feature_dataset.metadata = {
- RDF::DC.title => "BBRC representatives",
- RDF::DC.creator => to('/fminer/bbrc',:full),
- RDF::OT.hasSource => to('/fminer/bbrc', :full),
- }
- feature_dataset.parameters = [
- { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
- { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] },
- { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq },
- { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") },
- { RDF::DC.title => "backbone", RDF::OT.paramValue => (params[:backbone] == "false" ? "false" : "true") }
- ]
-
- @@fminer.compounds = []
- @@fminer.db_class_sizes = Array.new # AM: effect
- @@fminer.all_activities = Hash.new # DV: for effect calculation in regression part
- @@fminer.smi = [] # AM LAST: needed for matching the patterns back
-
- # Add data to fminer
- @@fminer.add_fminer_data(@@bbrc, value_map)
- g_median=@@fminer.all_activities.values.to_scale.median
-
- #task.progress 10
- step_width = 80 / @@bbrc.GetNoRootNodes().to_f
- features_smarts = Set.new
- features = Array.new
-
- puts "Setup: #{Time.now-time}"
- time = Time.now
- ftime = 0
-
- # run @@bbrc
-
- # prepare to receive results as hash { c => [ [f,v], ... ] }
- fminer_results = {}
-
- (0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
- results = @@bbrc.MineRoot(j)
- #task.progress 10+step_width*(j+1)
- results.each do |result|
- f = YAML.load(result)[0]
- smarts = f[0]
- p_value = f[1]
-
- if (!@@bbrc.GetRegression)
- id_arrs = f[2..-1].flatten
- max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @@fminer.db_class_sizes) # f needs reversal for bbrc
- effect = max+1
- else #regression part
- id_arrs = f[2]
- # DV: effect calculation
- f_arr=Array.new
- f[2].each do |id|
- id=id.keys[0] # extract id from hit count hash
- f_arr.push(@@fminer.all_activities[id])
- end
- f_median=f_arr.to_scale.median
- if g_median >= f_median
- effect = 'activating'
- else
- effect = 'deactivating'
- end
- end
-
- ft = Time.now
- unless features_smarts.include? smarts
- features_smarts << smarts
- feature = OpenTox::Feature.find_or_create({
- RDF::DC.title => smarts.dup,
- RDF::OT.hasSource => to('/fminer/bbrc', :full),
- RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature],
- RDF::OT.smarts => smarts.dup,
- RDF::OT.pValue => p_value.to_f.abs.round(5),
- RDF::OT.effect => effect
- })
- features << feature
- end
- ftime += Time.now - ft
-
- id_arrs.each { |id_count_hash|
- id=id_count_hash.keys[0].to_i
- count=id_count_hash.values[0].to_i
- fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {}
- if params[:nr_hits] == "true"
- fminer_results[@@fminer.compounds[id]][feature.uri] = count
- else
- fminer_results[@@fminer.compounds[id]][feature.uri] = 1
- end
- }
-
- end # end of
- end # feature parsing
-
-
- puts "Fminer: #{Time.now-time} (find/create Features: #{ftime})"
- time = Time.now
-
- fminer_compounds = @@fminer.training_dataset.compounds
- prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri
- prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
- @@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
- }
- fminer_noact_compounds = fminer_compounds - @@fminer.compounds
-
- feature_dataset.features = features
- feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features if params[:get_target] == "true"
- feature_dataset.compounds = fminer_compounds
- fminer_compounds.each_with_index { |c,idx|
- # TODO: reenable option
- #if (params[:get_target] == "true")
- #row = row + [ prediction_feature_all_acts[idx] ]
- #end
- features.each { |f|
- v = fminer_results[c][f.uri] if fminer_results[c]
- unless fminer_noact_compounds.include? c
- v = 0 if v.nil?
- end
- feature_dataset.add_data_entry c, f, v.to_i
- }
- }
-
- puts "Prepare save: #{Time.now-time}"
- time = Time.now
- feature_dataset.put
-
- puts "Save: #{Time.now-time}"
- feature_dataset.uri
-
-
- end
- response['Content-Type'] = 'text/uri-list'
- halt 202,task.uri
- end
-
diff --git a/java/CdkDescriptorInfo.class b/java/CdkDescriptorInfo.class
index 922c779..687d68c 100644
--- a/java/CdkDescriptorInfo.class
+++ b/java/CdkDescriptorInfo.class
Binary files differ
diff --git a/java/CdkDescriptors.class b/java/CdkDescriptors.class
index 34b973f..9785405 100644
--- a/java/CdkDescriptors.class
+++ b/java/CdkDescriptors.class
Binary files differ
diff --git a/java/JoelibDescriptorInfo.class b/java/JoelibDescriptorInfo.class
index 0ee67bf..c091e54 100644
--- a/java/JoelibDescriptorInfo.class
+++ b/java/JoelibDescriptorInfo.class
Binary files differ
diff --git a/java/JoelibDescriptors.class b/java/JoelibDescriptors.class
index d88ac63..a49018d 100644
--- a/java/JoelibDescriptors.class
+++ b/java/JoelibDescriptors.class
Binary files differ
diff --git a/lib/bbrc.rb b/lib/bbrc.rb
new file mode 100644
index 0000000..740a763
--- /dev/null
+++ b/lib/bbrc.rb
@@ -0,0 +1,183 @@
+ENV['FMINER_SMARTS'] = 'true'
+ENV['FMINER_NO_AROMATIC'] = 'true'
+ENV['FMINER_PVALUES'] = 'true'
+ENV['FMINER_SILENT'] = 'true'
+ENV['FMINER_NR_HITS'] = 'true'
+
+module OpenTox
+ module Algorithm
+ class Fminer
+ # Run bbrc algorithm on dataset
+ #
+ # @param [String] dataset_uri URI of the training dataset
+ # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
+ # @param [optional] parameters BBRC parameters, accepted parameters are
+ # - min_frequency Minimum frequency (default 5)
+ # - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
+ # - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
+ # - min_chisq_significance Significance threshold (between 0 and 1)
+ # - nr_hits Set to "true" to get hit count instead of presence
+ # - get_target Set to "true" to obtain target variable as feature
+ # @return [text/uri-list] Task URI
+ def self.bbrc params
+
+ @fminer=OpenTox::Algorithm::Fminer.new
+ @fminer.check_params(params,5)
+
+ #task = OpenTox::Task.run("Mining BBRC features", __FILE__ ) do |task|
+
+ time = Time.now
+
+ @bbrc = Bbrc::Bbrc.new
+ @bbrc.Reset
+ if @fminer.prediction_feature.feature_type == "regression"
+ @bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
+ else
+ bad_request_error "No accept values for "\
+ "dataset '#{@fminer.training_dataset.uri}' and "\
+ "feature '#{@fminer.prediction_feature.uri}'" unless
+ @fminer.prediction_feature.accept_values
+ value_map=@fminer.prediction_feature.value_map
+ end
+ @bbrc.SetMinfreq(@fminer.minfreq)
+ @bbrc.SetType(1) if params[:feature_type] == "paths"
+ @bbrc.SetBackbone(false) if params[:backbone] == "false"
+ @bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
+ @bbrc.SetConsoleOut(false)
+
+ feature_dataset = OpenTox::Dataset.new
+ feature_dataset.title = "BBRC representatives"
+ feature_dataset.creator = __FILE__
+ feature_dataset.parameters = [
+ { "title" => "dataset_id", "paramValue" => params[:dataset].id },
+ { "title" => "prediction_feature", "paramValue" => params[:prediction_feature].id },
+ { "title" => "min_frequency", "paramValue" => @fminer.minfreq },
+ { "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") },
+ { "title" => "backbone", "paramValue" => (params[:backbone] == "false" ? "false" : "true") }
+ ]
+
+ @fminer.compounds = []
+ @fminer.db_class_sizes = Array.new # AM: effect
+ @fminer.all_activities = Hash.new # DV: for effect calculation in regression part
+ @fminer.smi = [] # AM LAST: needed for matching the patterns back
+
+ # Add data to fminer
+ @fminer.add_fminer_data(@bbrc, value_map)
+ g_median=@fminer.all_activities.values.to_scale.median
+
+ #task.progress 10
+ step_width = 80 / @bbrc.GetNoRootNodes().to_f
+ features_smarts = Set.new
+ features = Array.new
+
+ puts "Setup: #{Time.now-time}"
+ time = Time.now
+ ftime = 0
+
+ # run @bbrc
+
+ # prepare to receive results as hash { c => [ [f,v], ... ] }
+ fminer_results = {}
+
+ (0 .. @bbrc.GetNoRootNodes()-1).each do |j|
+ results = @bbrc.MineRoot(j)
+ #task.progress 10+step_width*(j+1)
+ results.each do |result|
+ f = YAML.load(result)[0]
+ smarts = f[0]
+ p_value = f[1]
+
+ if (!@bbrc.GetRegression)
+ id_arrs = f[2..-1].flatten
+ max = OpenTox::Algorithm::Fminer.effect(f[2..-1].reverse, @fminer.db_class_sizes) # f needs reversal for bbrc
+ effect = max+1
+ else #regression part
+ id_arrs = f[2]
+ # DV: effect calculation
+ f_arr=Array.new
+ f[2].each do |id|
+ id=id.keys[0] # extract id from hit count hash
+ f_arr.push(@fminer.all_activities[id])
+ end
+ f_median=f_arr.to_scale.median
+ if g_median >= f_median
+ effect = 'activating'
+ else
+ effect = 'deactivating'
+ end
+ end
+
+ ft = Time.now
+ unless features_smarts.include? smarts
+ features_smarts << smarts
+ feature = OpenTox::Feature.find_or_create_by({
+ "title" => smarts.dup,
+ "numeric" => true,
+ "substructure" => true,
+ "smarts" => smarts.dup,
+ "pValue" => p_value.to_f.abs.round(5),
+ "effect" => effect
+ })
+ features << feature
+ end
+ ftime += Time.now - ft
+
+ id_arrs.each { |id_count_hash|
+ id=id_count_hash.keys[0].to_i
+ count=id_count_hash.values[0].to_i
+ fminer_results[@fminer.compounds[id]] || fminer_results[@fminer.compounds[id]] = {}
+ if params[:nr_hits] == "true"
+ fminer_results[@fminer.compounds[id]][feature] = count
+ else
+ fminer_results[@fminer.compounds[id]][feature] = 1
+ end
+ }
+
+ end # end of
+ end # feature parsing
+
+
+ puts "Fminer: #{Time.now-time} (find/create Features: #{ftime})"
+ time = Time.now
+ puts JSON.pretty_generate(fminer_results)
+
+ fminer_compounds = @fminer.training_dataset.compounds
+ prediction_feature_idx = @fminer.training_dataset.features.index @fminer.prediction_feature
+ prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
+ @fminer.training_dataset.data_entries[idx][prediction_feature_idx]
+ }
+ fminer_noact_compounds = fminer_compounds - @fminer.compounds
+
+ feature_dataset.features = features
+ feature_dataset.features = [ @fminer.prediction_feature ] + feature_dataset.features if params[:get_target] == "true"
+ feature_dataset.compounds = fminer_compounds
+ fminer_compounds.each_with_index { |c,idx|
+ # TODO: reenable option
+ #if (params[:get_target] == "true")
+ #row = row + [ prediction_feature_all_acts[idx] ]
+ #end
+ features.each { |f|
+ v = fminer_results[c][f.uri] if fminer_results[c]
+ unless fminer_noact_compounds.include? c
+ v = 0 if v.nil?
+ end
+ feature_dataset.add_data_entry c, f, v.to_i
+ }
+ }
+
+ puts "Prepare save: #{Time.now-time}"
+ time = Time.now
+ feature_dataset.save
+
+ puts "Save: #{Time.now-time}"
+ feature_dataset
+
+
+ end
+ #end
+ end
+ end
+end
+
+
+
diff --git a/lib/descriptor.rb b/lib/descriptor.rb
index a8f5123..9a93b32 100644
--- a/lib/descriptor.rb
+++ b/lib/descriptor.rb
@@ -1,6 +1,8 @@
require 'digest/md5'
ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
-BABEL_3D_CACHE_DIR = File.join(Dir.pwd,'/babel_3d_cache')
+BABEL_3D_CACHE_DIR = File.join(File.dirname(__FILE__),"..",'/babel_3d_cache')
+# TODO store 3D structures in mongodb
+# TODO store descriptors in mongodb
module OpenTox
@@ -35,6 +37,7 @@ module OpenTox
DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS))
DESCRIPTOR_VALUES = OBDESCRIPTORS.keys + CDKDESCRIPTOR_VALUES + JOELIBDESCRIPTORS.keys
+
require_relative "unique_descriptors.rb"
def self.description descriptor
@@ -53,6 +56,7 @@ module OpenTox
end
def self.smarts_match compounds, smarts, count=false
+ compounds = parse compounds
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
obconversion.set_in_format('inchi')
@@ -80,7 +84,8 @@ module OpenTox
smarts_match compounds,smarts,true
end
- def self.physchem compounds, descriptors
+ def self.physchem compounds, descriptors=UNIQUEDESCRIPTORS
+ compounds = parse compounds
des = {}
descriptors.each do |d|
lib, descriptor = d.split(".",2)
@@ -99,6 +104,7 @@ module OpenTox
end
def self.openbabel compounds, descriptors
+ compounds = parse compounds
$logger.debug "compute #{descriptors.size} openbabel descriptors for #{compounds.size} compounds"
obdescriptors = descriptors.collect{|d| OpenBabel::OBDescriptor.find_type d}
obmol = OpenBabel::OBMol.new
@@ -115,19 +121,8 @@ module OpenTox
fingerprint
end
- def self.run_cmd cmd
- cmd = "#{cmd} 2>&1"
- $logger.debug "running external cmd: '#{cmd}'"
- p = IO.popen(cmd) do |io|
- while line = io.gets
- $logger.debug "> #{line.chomp}"
- end
- io.close
- raise "external cmd failed '#{cmd}' (see log file for error msg)" unless $?.to_i == 0
- end
- end
-
def self.cdk compounds, descriptors
+ compounds = parse compounds
$logger.debug "compute #{descriptors.size} cdk descriptors for #{compounds.size} compounds"
sdf = sdf_3d compounds
# use java system call (rjb blocks within tasks)
@@ -135,7 +130,7 @@ module OpenTox
run_cmd "java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf} #{descriptors.join(" ")}"
fingerprint = {}
YAML.load_file(sdf+"cdk.yaml").each_with_index do |calculation,i|
- $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
+ $logger.error "Descriptor calculation failed for compound #{compounds[i].inchi}." if calculation.empty?
descriptors.each do |descriptor|
fingerprint[compounds[i]] = calculation
end
@@ -145,6 +140,7 @@ module OpenTox
end
def self.joelib compounds, descriptors
+ compounds = parse compounds
$logger.debug "compute #{descriptors.size} joelib descriptors for #{compounds.size} compounds"
# use java system call (rjb blocks within tasks)
# use Tempfiles to avoid "Argument list too long" error
@@ -152,7 +148,7 @@ module OpenTox
run_cmd "java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf} #{descriptors.join(' ')}"
fingerprint = {}
YAML.load_file(sdf+"joelib.yaml").each_with_index do |calculation,i|
- $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
+ $logger.error "Descriptor calculation failed for compound #{compounds[i].inchi}." if calculation.empty?
descriptors.each do |descriptor|
fingerprint[compounds[i]] = calculation
end
@@ -162,6 +158,7 @@ module OpenTox
end
def self.lookup compounds, features, dataset
+ compounds = parse compounds
fingerprint = []
compounds.each do |compound|
fingerprint << []
@@ -170,13 +167,26 @@ module OpenTox
end
end
+ def self.run_cmd cmd
+ cmd = "#{cmd} 2>&1"
+ $logger.debug "running external cmd: '#{cmd}'"
+ p = IO.popen(cmd) do |io|
+ while line = io.gets
+ $logger.debug "> #{line.chomp}"
+ end
+ io.close
+ raise "external cmd failed '#{cmd}' (see log file for error msg)" unless $?.to_i == 0
+ end
+ end
+
def self.sdf_3d compounds
+ compounds = parse compounds
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
obconversion.set_in_format 'inchi'
obconversion.set_out_format 'sdf'
- digest = Digest::MD5.hexdigest compounds.collect{|c| c.uri}.inspect
+ digest = Digest::MD5.hexdigest compounds.collect{|c| c.inchi}.inspect
sdf_file = "/tmp/#{digest}.sdf"
if File.exists? sdf_file # do not recreate existing 3d sdfs
$logger.debug "re-using cached 3d structures from #{sdf_file}"
@@ -200,11 +210,11 @@ module OpenTox
sdf_2d = obconversion.write_string(obmol)
error = nil
if compound.inchi.include?(";") # component includes multiple compounds (; in inchi, . in smiles)
- error = "OpenBabel 3D generation failes for multi-compound #{compound.uri}, trying to calculate descriptors from 2D structure."
+ error = "OpenBabel 3D generation failes for multi-compound #{compound.inchi}, trying to calculate descriptors from 2D structure."
else
OpenBabel::OBOp.find_type("Gen3D").do(obmol)
sdf_3d = obconversion.write_string(obmol)
- error = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure." if sdf_3d.match(/.nan/)
+ error = "3D generation failed for compound #{compound.inchi}, trying to calculate descriptors from 2D structure." if sdf_3d.match(/.nan/)
end
if error
$logger.warn error
@@ -226,6 +236,19 @@ module OpenTox
sdf_file
end
+ def self.parse compounds
+ case compounds.class.to_s
+ when "OpenTox::Compound"
+ compounds = [compounds]
+ when "Array"
+ compounds
+ when "OpenTox::Dataset"
+ compounds = compounds.compounds
+ else
+ bad_request_error "Cannot calculate descriptors for #{compounds.class} objects."
+ end
+ end
+
def self.fix_value val
val = val.first if val.is_a? Array and val.size == 1
if val.numeric?
@@ -236,7 +259,7 @@ module OpenTox
end
val
end
- private_class_method :sdf_3d, :fix_value
+ private_class_method :sdf_3d, :fix_value, :parse, :run_cmd
end
end
end
diff --git a/lib/fminer.rb b/lib/fminer.rb
index 8023bdd..3333517 100644
--- a/lib/fminer.rb
+++ b/lib/fminer.rb
@@ -1,3 +1,4 @@
+require_relative 'bbrc'
=begin
* Name: fminer.rb
* Description: Fminer library
@@ -10,6 +11,7 @@ module OpenTox
# Fminer algorithms (https://github.com/amaunz/fminer2)
class Fminer
+ include OpenTox
attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi
@@ -19,15 +21,16 @@ module OpenTox
# @param[Integer] per-mil value for min frequency
def check_params(params,per_mil)
- bad_request_error "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
- @training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}"
+ bad_request_error "Please submit a dataset." unless params[:dataset] and !params[:dataset].nil?
+ @training_dataset = OpenTox::Dataset.new
+ @training_dataset = params[:dataset]
unless params[:prediction_feature] # try to read prediction_feature from dataset
resource_not_found_error "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1
- params[:prediction_feature] = @training_dataset.features.first.uri
+ params[:prediction_feature] = @training_dataset.features.first
end
- @prediction_feature = OpenTox::Feature.find params[:prediction_feature]
- resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" unless
- @training_dataset.find_feature_uri( params[:prediction_feature] )
+ @prediction_feature = params[:prediction_feature]
+ resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset]}'" unless
+ @training_dataset.features.include?( params[:prediction_feature] )
unless params[:min_frequency].nil?
# check for percentage
if params[:min_frequency].include? "pc"
@@ -58,7 +61,8 @@ module OpenTox
end
if @minfreq.nil?
@minfreq=min_frequency(@training_dataset,@prediction_feature,per_mil)
- $logger.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
+ p "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
+ #$logger.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
end
end
@@ -109,7 +113,7 @@ module OpenTox
end
if compound_activities.nil?
- $logger.warn "No activity for '#{compound.uri}' and feature '#{@prediction_feature.uri}'"
+ $logger.warn "No activity for '#{compound.inchi}' and feature '#{@prediction_feature.title}'"
else
if @prediction_feature.feature_type == "classification"
activity= value_map.invert[compound_activities].to_i # activities are mapped to 1..n
@@ -178,7 +182,7 @@ module OpenTox
# return [Integer] min-frequency
def min_frequency(training_dataset,prediction_feature,per_mil)
nr_labeled_cmpds=0
- f_idx=training_dataset.features.collect{|f| f.uri}.index prediction_feature.uri
+ f_idx=training_dataset.features.index prediction_feature
training_dataset.compounds.each_with_index { |cmpd, c_idx|
if ( training_dataset.data_entries[c_idx] )
unless training_dataset.data_entries[c_idx][f_idx].nil?
diff --git a/lib/opentox-algorithm.rb b/lib/opentox-algorithm.rb
new file mode 100644
index 0000000..4aaad9c
--- /dev/null
+++ b/lib/opentox-algorithm.rb
@@ -0,0 +1,10 @@
+require 'statsample'
+
+# Require sub-Repositories
+require_relative '../libfminer/libbbrc/bbrc' # include before openbabel
+require_relative '../libfminer/liblast/last' #
+require_relative '../last-utils/lu.rb'
+
+#Dir[File.join(File.dirname(__FILE__),"*.rb")].each{ |f| require_relative f}
+require_relative "descriptor.rb"
+require_relative "fminer.rb"