From 590a4208ffe73c41e444928de7d7e1ba4c3b044e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 21 Mar 2011 15:18:05 +0100 Subject: nls argument for last-utils, minfreq: 8% for LAST, 5 Promille for BBRC, minimum: 2 (both) --- fminer.rb | 282 +++++++++++++++++++++++++++++++------------------------------ last-utils | 2 +- libfminer | 2 +- 3 files changed, 145 insertions(+), 141 deletions(-) diff --git a/fminer.rb b/fminer.rb index 5cc83ed..e01881a 100644 --- a/fminer.rb +++ b/fminer.rb @@ -67,148 +67,150 @@ end # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - # TODO: is this thread safe?? - #@@bbrc = Bbrc::Bbrc.new - minfreq = 5 unless minfreq = params[:min_frequency] - @@bbrc.SetMinfreq(minfreq) - @@bbrc.SetType(1) if params[:feature_type] == "paths" - @@bbrc.SetBackbone(params[:backbone]) if params[:backbone] - @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] - @@bbrc.SetConsoleOut(false) - - halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? - prediction_feature = params[:prediction_feature] - - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) - - task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do - - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.add_metadata({ - DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, - DC.creator => url_for('/fminer/bbrc',:full), - OT.hasSource => url_for('/fminer/bbrc', :full), - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - }) - feature_dataset.save(@subjectid) - - id = 1 # fminer start id is not 0 - compounds = [] - nr_active=0 - nr_inactive=0 - all_activities = Hash.new# DV: for effect calculation in regression part - - @@bbrc.Reset - training_dataset.data_entries.each do |compound,entry| - begin - smiles = OpenTox::Compound.new(compound.to_s).to_smiles - rescue - LOGGER.warn "No resource for #{compound.to_s}" - next - end - if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{compound.to_s}." - next - end - entry.each do |feature,values| - values.each do |value| - if value.nil? - LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + prediction_feature = params[:prediction_feature] + + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid + halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) + + unless minfreq = params[:min_frequency] + minfreq = 5*training_dataset.compounds.size/1000 # 8 promille according to Andreas suggestions + minfreq = 2 unless minfreq > 2 + end + + @@bbrc.SetMinfreq(minfreq) + @@bbrc.SetType(1) if params[:feature_type] == "paths" + @@bbrc.SetBackbone(params[:backbone]) if params[:backbone] + @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] + @@bbrc.SetConsoleOut(false) + + task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do + + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) + feature_dataset.add_metadata({ + DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, + DC.creator => url_for('/fminer/bbrc',:full), + OT.hasSource => url_for('/fminer/bbrc', :full), + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + }) + feature_dataset.save(@subjectid) + + id = 1 # fminer start id is not 0 + compounds = [] + nr_active=0 + nr_inactive=0 + all_activities = Hash.new# DV: for effect calculation in regression part + + @@bbrc.Reset + training_dataset.data_entries.each do |compound,entry| + begin + smiles = OpenTox::Compound.new(compound.to_s).to_smiles + rescue + LOGGER.warn "No resource for #{compound.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{compound.to_s}." + next + end + entry.each do |feature,values| + values.each do |value| + if value.nil? + LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + else + case value.to_s + when "true" + nr_active += 1 + activity = 1 + when "false" + nr_inactive += 1 + activity = 0 else - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - else - activity = value.to_f - @@bbrc.SetRegression(true) - end - begin - @@bbrc.AddCompound(smiles,id) - @@bbrc.AddActivity(activity, id) - all_activities[id]=activity # DV: insert global information - compounds[id] = compound - id += 1 - rescue - LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" - end + activity = value.to_f + @@bbrc.SetRegression(true) + end + begin + @@bbrc.AddCompound(smiles,id) + @@bbrc.AddActivity(activity, id) + all_activities[id]=activity # DV: insert global information + compounds[id] = compound + id += 1 + rescue + LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" end end end end + end - g_array=all_activities.values # DV: calculation of global median for effect calculation - g_median=OpenTox::Algorithm.median(g_array) - - raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 - - features = Set.new - # run @@bbrc - (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| - - results = @@bbrc.MineRoot(j) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - - if (!@@bbrc.GetRegression) - ids = f[2] + f[3] - if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) - effect = 'activating' - else - effect = 'deactivating' - end - else #regression part - ids = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - f_arr.push(all_activities[id]) - end - f_median=OpenTox::Algorithm.median(f_arr) - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end + g_array=all_activities.values # DV: calculation of global median for effect calculation + g_median=OpenTox::Algorithm.median(g_array) + + raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 + + features = Set.new + # run @@bbrc + (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| + + results = @@bbrc.MineRoot(j) + results.each do |result| + f = YAML.load(result)[0] + smarts = f[0] + p_value = f[1] - feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s - unless features.include? smarts - features << smarts - metadata = { - OT.hasSource => url_for('/fminer/bbrc', :full), - OT.isA => OT.Substructure, - OT.smarts => smarts, - OT.pValue => p_value.to_f, - OT.effect => effect, - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - } - feature_dataset.add_feature feature_uri, metadata - #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters + if (!@@bbrc.GetRegression) + ids = f[2] + f[3] + if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) + effect = 'activating' + else + effect = 'deactivating' + end + else #regression part + ids = f[2] + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + f_arr.push(all_activities[id]) + end + f_median=OpenTox::Algorithm.median(f_arr) + if g_median >= f_median + effect = 'activating' + else + effect = 'deactivating' end - ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end + + feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s + unless features.include? smarts + features << smarts + metadata = { + OT.hasSource => url_for('/fminer/bbrc', :full), + OT.isA => OT.Substructure, + OT.smarts => smarts, + OT.pValue => p_value.to_f, + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + } + feature_dataset.add_feature feature_uri, metadata + #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters + end + ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end - feature_dataset.save(@subjectid) - feature_dataset.uri end - response['Content-Type'] = 'text/uri-list' - halt 503,task.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri.to_s+"\n" + feature_dataset.save(@subjectid) + feature_dataset.uri end + response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" +end #end # Run last algorithm on a dataset @@ -221,22 +223,24 @@ post '/fminer/bbrc/?' do # - hops Maximum number of hops # @return [text/uri-list] Task URI post '/fminer/last/?' do - #@@last = Last::Last.new - minfreq = 5 unless minfreq = params[:min_frequency] - @@last.SetMinfreq(minfreq) - @@last.SetType(1) if params[:feature_type] == "paths" - @@last.SetMaxHops(params[:hops]) if params[:hops] - @@last.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid - training_dataset.load_all(@subjectid) halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) + unless minfreq = params[:min_frequency] + minfreq = 8*training_dataset.compounds.size/100 # 8% according to Andreas suggestions + minfreq = 2 unless minfreq > 2 + end + + @@last.SetMinfreq(minfreq) + @@last.SetType(1) if params[:feature_type] == "paths" + @@last.SetMaxHops(params[:hops]) if params[:hops] + @@last.SetConsoleOut(false) + task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new @@ -319,7 +323,7 @@ post '/fminer/last/?' do lu = LU.new # AM LAST: uses last-utils here dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!) - smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) + smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations instances.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax diff --git a/last-utils b/last-utils index f8a4631..daafa32 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit f8a4631495309d86cb4bec5493af4110d25e4ad7 +Subproject commit daafa32e330b27111df6dc7193a6ed72fae2be45 diff --git a/libfminer b/libfminer index 03a3588..01b8e50 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 03a3588cbea1c175d8573c2a8aad867f7a27e3e2 +Subproject commit 01b8e50e8e6fb3ce29fc8bf0a65a8c6f6af94b3f -- cgit v1.2.3 From aa2ef7d05c4016785012af20408352e0f1e944c1 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 21 Mar 2011 15:41:23 +0100 Subject: README updated, set backone fixed --- README.md | 53 +++++++++++++++++++++++++++++++++-------------------- fminer.rb | 2 +- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index cf1f519..0eb641c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ OpenTox Algorithm ================= -- An [OpenTox](http://www.opentox.org) REST Webservice +- An [OpenTox](http://www.opentox.org) REST Webservice - Implements the OpenTox algorithm API for - fminer - lazar @@ -9,18 +9,26 @@ OpenTox Algorithm REST operations --------------- - Get a list of all algorithms GET / - URIs of algorithms 200 - Get a representation of the GET /fminer - fminer representation 200,404 - fminer algorithm - Get a representation of the GET /lazar - lazar representation 200,404 - lazar algorithm - Create fminer features POST /fminer dataset_uri, URI for feature dataset 200,400,404,500 - feature_uri - Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 - prediction_feature, - feature_generation_uri - -Supported MIME formats + Get a list of all algorithms GET / - URIs of algorithms 200 + Get a representation of the GET /fminer/ - fminer representation 200,404 + fminer algorithms + Get a representation of the GET /fminer/bbrc - bbrc representation 200,404 + bbrc algorithm + Get a representation of the GET /fminer/last - last representation 200,404 + last algorithm + Get a representation of the GET /lazar - lazar representation 200,404 + lazar algorithm + Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500 + feature_uri, + min_frequency + Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500 + feature_uri, + min_frequency + Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 + prediction_feature, + feature_generation_uri + +Supported MIME formats ---------------------- - application/rdf+xml (default): read/write OWL-DL @@ -29,19 +37,25 @@ Supported MIME formats Examples -------- -### Get the OWL-DL representation of fminer +### Get the OWL-DL representation of fminer curl http://webservices.in-silico.ch/algorithm/fminer -### Get the OWL-DL representation of lazar +### Get the OWL-DL representation of lazar curl http://webservices.in-silico.ch/algorithm/lazar -### Create fminer features +### Create [BBRC](http://bbrc.maunz.de) features - curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} http://webservices.in-silico.ch/algorithm/fminer + curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/bbrc -(feature_uri specifies the dependent variable, e.g. http://www.epa.gov/NCCT/dsstox/CentralFieldDef.html#ActivityOutcome_CPDBAS_Hamster) +feature_uri specifies the dependent variable from the dataset. + +### Create [LAST-PM](http://last-pm.maunz.de) features + + curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/last + +feature_uri specifies the dependent variable from the dataset. Creates a dataset with fminer features (backbone refinement class representatives from supervised graph mining, see http://www.maunz.de/libfminer-doc/). These features can be used e.g. as structural alerts, as descriptors (fingerprints) for prediction models or for similarity calculations. @@ -49,10 +63,9 @@ Creates a dataset with fminer features (backbone refinement class representative curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer http://webservices.in-silico.ch/test/algorithm/lazar -(feaure_uri specifies the dependent variable, e.g. http://www.epa.gov/NCCT/dsstox/CentralFieldDef.html#ActivityOutcome_CPDBAS_Hamster) +feature_uri specifies the dependent variable from the dataset [API documentation](http://rdoc.info/github/opentox/algorithm) -------------------------------------------------------------- Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details. - diff --git a/fminer.rb b/fminer.rb index e01881a..91e73cd 100644 --- a/fminer.rb +++ b/fminer.rb @@ -81,7 +81,7 @@ post '/fminer/bbrc/?' do @@bbrc.SetMinfreq(minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" - @@bbrc.SetBackbone(params[:backbone]) if params[:backbone] + @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] @@bbrc.SetConsoleOut(false) -- cgit v1.2.3