From 8adc2faf1cea832e91e08577e25831fd656120b4 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 13 Jun 2012 11:25:20 +0200 Subject: make fminer matching less memmory consuming --- algorithm_test.rb | 31 ++++++++++++++++++++----------- fminer.rb | 31 +++++++++++++++++++------------ 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/algorithm_test.rb b/algorithm_test.rb index 4b6da4e..428ed2b 100644 --- a/algorithm_test.rb +++ b/algorithm_test.rb @@ -56,18 +56,22 @@ class AlgorithmTest < Test::Unit::TestCase #kazius 250 no features dataset_uri = "http://local-ot/dataset/9264" prediction_feature = dataset_uri+"/feature/endpoint" + feature_dataset_uri = "http://local-ot/dataset/91409" params = {:dataset_uri=>dataset_uri, :prediction_feature=>prediction_feature, :min_frequency=>7, :max_num_features=>300} #multi: 10=>4, 5=>>3000 - post "/fminer/bbrc",params + # params = {:dataset_uri=>dataset_uri, # :prediction_feature=>prediction_feature, :feature_dataset_uri=>feature_dataset_uri} # post "/lazar",params - uri = wait_for_task(last_response.body) - puts uri + #post "/fminer/bbrc",params + #uri = wait_for_task(last_response.body) + #puts uri + + #puts OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"),params) # model = uri # puts "model #{model}" @@ -76,13 +80,18 @@ class AlgorithmTest < Test::Unit::TestCase #puts "features: "+OpenTox::Dataset.find(uri).features.size.to_s - -# params = {:dataset_uri=>"http://local-ot/dataset/1724", -# :feature_dataset_uri=>"http://local-ot/dataset/1725"} -# #post "/fminer/match",params -# #uri = wait_for_task(last_response.body) -# #puts uri -# + feature_dataset_uri="http://opentox.informatik.uni-freiburg.de/dataset/3277" + dataset_uri="http://opentox.informatik.uni-freiburg.de/dataset/1333" + + params = {:dataset_uri=>dataset_uri, + :feature_dataset_uri=>feature_dataset_uri} + #post "/fminer/bbrc/match",params + #uri = wait_for_task(last_response.body) + + puts OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc/match"),params) + +# puts uri + # fminer = File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc") # OpenTox::RestClientWrapper.post(fminer,params) @@ -92,7 +101,7 @@ class AlgorithmTest < Test::Unit::TestCase # post "/lazar",params # uri = wait_for_task(last_response.body) # puts uri - puts "features: "+OpenTox::Dataset.find(uri).features.size.to_s + #puts "features: "+OpenTox::Dataset.find(uri).features.size.to_s # fminer = File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc") # OpenTox::RestClientWrapper.post(fminer,params) diff --git a/fminer.rb b/fminer.rb index 5c0fdf2..762b3bd 100644 --- a/fminer.rb +++ b/fminer.rb @@ -102,10 +102,9 @@ post '/fminer/:method/match?' do res_dataset.add_feature(f,m) end - step_width = 100 / c_dataset.compounds.size.to_f - count = 0 - if params[:nr_hits] == "true" + step_width = 100 / c_dataset.compounds.size.to_f + count = 0 c_dataset.compounds.each do |c| res_dataset.add_compound(c) comp = OpenTox::Compound.new(c) @@ -118,21 +117,29 @@ post '/fminer/:method/match?' do end else LOGGER.debug "match #{c_dataset.compounds.size} compounds with #{f_dataset.features.keys.size} features" - + step_width = 100 / f_dataset.features.size.to_f + count = 0 + obconversion = OpenBabel::OBConversion.new - obmol = OpenBabel::OBMol.new - obconversion.set_in_format('inchi') + obconversion.set_in_format('inchi') smarts_pattern = OpenBabel::OBSmartsPattern.new + + obmols = {} c_dataset.compounds.each do |c| res_dataset.add_compound(c) - comp = OpenTox::Compound.new(c) - obconversion.read_string(obmol,comp.inchi) - f_dataset.features.each do |f,m| - smarts_pattern.init(m[OT.smarts]) - res_dataset.add(c,f,1) if smarts_pattern.match(obmol) + inchi = OpenTox::Compound.new(c).inchi + obmol = OpenBabel::OBMol.new + obconversion.read_string(obmol,inchi) + obmols[c] = obmol + end + + f_dataset.features.each do |f,m| + smarts_pattern.init(m[OT.smarts]) + c_dataset.compounds.each do |c| + res_dataset.add(c,f,1) if smarts_pattern.match(obmols[c]) end count += 1 - task.progress step_width*count if count%100==0 + task.progress step_width*count if count%10==0 end end res_dataset.save @subjectid -- cgit v1.2.3