diff options
author | ist <ist@ist.opentox.org> | 2010-10-20 16:21:05 +0200 |
---|---|---|
committer | ist <ist@ist.opentox.org> | 2010-10-20 16:21:05 +0200 |
commit | 79e426f4f3723a28a820b85a42924fe4322ab49a (patch) | |
tree | 4ba470e9c2a433c3bb75e566536b88166572e3d6 | |
parent | cf13d2614a70af7e1b466166b12e823d58a18bbf (diff) |
Finished basic integration of LAST-PM webservice
TODOs:
- implement (de)activating facility in ruby (ch has still the bug for BBRCs in his code)
- implement p-value facility in ruby
- solve library 'require' issue: need completely separate namespaces or completely same ns and try to re-require after deleting from $''.
- Merge last-utils 'experimental' to 'master'
-rw-r--r-- | application.rb | 2 | ||||
-rw-r--r-- | fminer.rb | 242 | ||||
m--------- | last-utils | 0 | ||||
m--------- | libfminer | 0 |
4 files changed, 113 insertions, 131 deletions
diff --git a/application.rb b/application.rb index 3d511d1..5aecfbb 100644 --- a/application.rb +++ b/application.rb @@ -1,6 +1,4 @@ require 'rubygems' -require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems -require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST gem "opentox-ruby-api-wrapper", "= 1.6.5" require 'opentox-ruby-api-wrapper' @@ -12,13 +12,14 @@ get '/fminer/?' do } rdf = owl.rdf File.open('public/fminer.owl', 'w') {|f| f.print rdf} - response['Content-Type'] = 'application/rdf+xml' - rdf + response['Content-Type'] = 'application/rdf+xml' + rdf end ['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default post path do + require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems @@fminer = Bbrc::Bbrc.new halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? @@ -50,7 +51,7 @@ end @@fminer.Reset #@@fminer.SetChisqSig(0.99) LOGGER.debug "Fminer: initialising ..." - training_dataset.data.each do |c,features| + training_dataset.data.each do |c,features| begin smiles = OpenTox::Compound.new(:uri => c.to_s).smiles rescue @@ -119,17 +120,17 @@ end end else #regression part ids = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - f_arr.push(g_hash[id]) - end - f_median=OpenTox::Utils.median(f_arr) - if g_median >= f_median + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + f_arr.push(g_hash[id]) + end + f_median=OpenTox::Utils.median(f_arr) + if g_median >= f_median effect = 'activating' else effect = 'deactivating' - end + end end tuple = { @@ -157,137 +158,120 @@ end post '/fminer/last/?' do + require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems @@fminer = Last::Last.new - halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? - LOGGER.debug "Dataset: " + params[:dataset_uri] - LOGGER.debug "Endpoint: " + params[:feature_uri] - feature_uri = params[:feature_uri] - begin - LOGGER.debug "Retrieving #{params[:dataset_uri]}" - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" - rescue - LOGGER.error "Dataset #{params[:dataset_uri]} not found" - halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil? - end - halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) + halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? + LOGGER.debug "Dataset: " + params[:dataset_uri] + LOGGER.debug "Endpoint: " + params[:feature_uri] + feature_uri = params[:feature_uri] + begin + LOGGER.debug "Retrieving #{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + rescue + LOGGER.error "Dataset #{params[:dataset_uri]} not found" + halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil? + end + halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer/last',:full)) do - feature_dataset = OpenTox::Dataset.new - title = "LAST-PM descriptors for " + training_dataset.title - feature_dataset.title = title - feature_dataset.creator = url_for('/fminer/last',:full) - last_uri = url_for("/fminer#LAST-PM_descriptors",:full) - feature_dataset.features << last_uri + feature_dataset = OpenTox::Dataset.new + title = "LAST-PM descriptors for " + training_dataset.title + feature_dataset.title = title + feature_dataset.creator = url_for('/fminer/last',:full) + last_uri = url_for("/fminer#LAST-PM_descriptors",:full) + feature_dataset.features << last_uri - id = 1 # fminer start id is not 0 - compounds = [] + id = 1 # fminer start id is not 0 + compounds = [] + smi = [] # AM LAST: needed for matching the patterns back g_hash = Hash.new# DV: for effect calculation in regression part - @@fminer.Reset + @@fminer.Reset #@@fminer.SetChisqSig(0.99) - LOGGER.debug "Fminer: initialising ..." - training_dataset.data.each do |c,features| - begin - smiles = OpenTox::Compound.new(:uri => c.to_s).smiles - rescue - LOGGER.warn "No resource for #{c.to_s}" - next - end - if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{c.to_s}." - else - feature_dataset.compounds << c.to_s - features.each do |feature| - act = feature[feature_uri] - if act.nil? - LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}." - else - case act.to_s - when "true" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s - activity = 1 - when "false" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s - activity = 0 - else - # AM: add quantitative activity - activity = act.to_f - @@fminer.SetRegression(true) - end - compounds[id] = c.to_s - begin - @@fminer.AddCompound(smiles,id) - @@fminer.AddActivity(activity, id) + LOGGER.debug "Fminer: initialising ..." + training_dataset.data.each do |c,features| + begin + smiles = OpenTox::Compound.new(:uri => c.to_s).smiles + rescue + LOGGER.warn "No resource for #{c.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{c.to_s}." + else + feature_dataset.compounds << c.to_s + features.each do |feature| + act = feature[feature_uri] + if act.nil? + LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}." + else + case act.to_s + when "true" + #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s + activity = 1 + when "false" + #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s + activity = 0 + else + # AM: add quantitative activity + activity = act.to_f + @@fminer.SetRegression(true) + end + compounds[id] = c.to_s + smi[id] = smiles # AM LAST: changed this to store SMILES. + begin + @@fminer.AddCompound(smiles,id) + @@fminer.AddActivity(activity, id) g_hash[id]=activity # DV: insert global information - rescue - LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" - end - end - end - id += 1 - end - end + rescue + LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" + end + end + end + id += 1 + end + end g_array=g_hash.values # DV: calculation of global median for effect calculation g_median=OpenTox::Utils.median(g_array) - minfreq = (0.02*id).round - #minfreq = 5 - @@fminer.SetMinfreq(minfreq) - LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" - + minfreq = (0.02*id).round + #minfreq = 5 + @@fminer.SetMinfreq(minfreq) + LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" raise "no compounds" if compounds.size==0 + # run @@fminer + LOGGER.debug "Fminer: mining ..." + @@fminer.SetConsoleOut(false) + xml = "" + (0 .. @@fminer.GetNoRootNodes()-1).each do |j| + results = @@fminer.MineRoot(j) + results.each do |result| + xml << result + end + end + lu = LU.new + dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!) + smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) + instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations - values = {} - # run @@fminer - LOGGER.debug "Fminer: mining ..." - (0 .. @@fminer.GetNoRootNodes()-1).each do |j| - results = @@fminer.MineRoot(j) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - # AM: f[3] missing on regression - if (!@@fminer.GetRegression) - ids = f[2] + f[3] - if f[2].size > f[3].size - effect = 'activating' - else - effect = 'deactivating' - end - else #regression part - ids = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - f_arr.push(g_hash[id]) - end - f_median=OpenTox::Utils.median(f_arr) - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end - - tuple = { - url_for('/fminer#smarts',:full) => smarts, - url_for('/fminer#p_value',:full) => p_value.to_f, - url_for('/fminer#effect',:full) => effect - } - #LOGGER.debug "#{f[0]}\t#{f[1]}\t#{effect}" - ids.each do |id| - feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] - feature_dataset.data[compounds[id]] << {last_uri => tuple} - end - end - end + instances.each do |smarts, ids| + tuple = { + url_for('/fminer#smarts',:full) => smarts, + url_for('/fminer#p_value',:full) => nil, # AM LAST: TODO + url_for('/fminer#effect',:full) => nil # AM LAST: TODO + } + ids.each do |id| + feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] + feature_dataset.data[compounds[id]] << {last_uri => tuple} + end + end - uri = feature_dataset.save - LOGGER.debug "Fminer finished, dataset #{uri} created." + uri = feature_dataset.save + LOGGER.debug "Fminer finished, dataset #{uri} created." uri - end - LOGGER.debug "Fimer task started: "+task_uri.to_s - response['Content-Type'] = 'text/uri-list' - halt 202,task_uri.to_s+"\n" + end + LOGGER.debug "Fimer task started: "+task_uri.to_s + response['Content-Type'] = 'text/uri-list' + halt 202,task_uri.to_s+"\n" end diff --git a/last-utils b/last-utils -Subproject a678d63a5ccc7a1b5375406a3da66f8bd325141 +Subproject d2ad4f2bb82fdb5433d3f739400244ba89f0786 diff --git a/libfminer b/libfminer -Subproject e955cc6b24d577d7187e5660716ee69d12174a8 +Subproject 00ce7e88f4f1ca16030a9b7bdabf216b5402f9c |