From 9f905d4c4246cd240a5496e77e7e3be1fa6a75a8 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 3 May 2012 16:33:32 +0200 Subject: Initial commit --- application.rb | 2 +- fminer.rb | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 187 insertions(+), 25 deletions(-) diff --git a/application.rb b/application.rb index ef123da..b00ba9f 100644 --- a/application.rb +++ b/application.rb @@ -38,7 +38,7 @@ end # # @return [text/uri-list] algorithm URIs get '/?' do - list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n" + list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" diff --git a/fminer.rb b/fminer.rb index da06fc8..fd32043 100644 --- a/fminer.rb +++ b/fminer.rb @@ -4,14 +4,14 @@ ENV['FMINER_PVALUES'] = 'true' ENV['FMINER_SILENT'] = 'true' ENV['FMINER_NR_HITS'] = 'true' -@@bbrc = Bbrc::Bbrc.new -@@last = Last::Last.new +@@bbrc = Bbrc::Bbrc.new +@@last = Last::Last.new # Get list of fminer algorithms # # @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do - list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" @@ -50,7 +50,38 @@ get "/fminer/bbrc/?" do content_type "application/x-yaml" algorithm.to_yaml else - response['Content-Type'] = 'application/rdf+xml' + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end +end + +# Get RDF/XML representation of fminer bbrc algorithm +# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm +get "/fminer/bbrc/sample?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc/sample',:full)) + algorithm.metadata = { + DC.title => 'fminer backbone refinement class representatives, obtained from samples of a dataset', + DC.creator => "andreas@maunz.de", +# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", + RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Number of bootstrap samples", OT.paramScope => "optional", DC.title => "num_boots" }, + { DC.description => "Minimum sampling support", OT.paramScope => "optional", DC.title => "min_sampling_support" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, + { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, + ] + } + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' algorithm.to_rdfxml end end @@ -81,17 +112,17 @@ get "/fminer/last/?" do content_type "application/x-yaml" algorithm.to_yaml else - response['Content-Type'] = 'application/rdf+xml' + response['Content-Type'] = 'application/rdf+xml' algorithm.to_rdfxml end end # Creates same features for dataset that have been created # with fminer in dataset -# accept params[:nr_hits] as used in other fminer methods -post '/fminer/:method/match?' do +# accept params[:nr_hits] as used in other fminer methods +post '/fminer/:method/match?' do raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] - raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] + raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid @@ -105,7 +136,7 @@ post '/fminer/:method/match?' do f_dataset.features.each do |f,m| if params[:nr_hits] == "true" hits = comp.match_hits([m[OT.smarts]]) - res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] + res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] else res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) end @@ -115,7 +146,7 @@ post '/fminer/:method/match?' do res_dataset.uri end return_task(task) -end +end # Run bbrc algorithm on dataset # @@ -128,7 +159,7 @@ end # - min_chisq_significance Significance threshold (between 0 and 1) # - nr_hits Set to "true" to get hit count instead of presence # @return [text/uri-list] Task URI -post '/fminer/bbrc/?' do +post '/fminer/bbrc/?' do fminer=OpenTox::Algorithm::Fminer.new fminer.check_params(params,5,@subjectid) @@ -185,7 +216,7 @@ post '/fminer/bbrc/?' do smarts = f[0] p_value = f[1] - if (!@@bbrc.GetRegression) + if (!@@bbrc.GetRegression) id_arrs = f[2..-1].flatten max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) effect = f[2..-1].size-max @@ -195,10 +226,10 @@ post '/fminer/bbrc/?' do f_arr=Array.new f[2].each do |id| id=id.keys[0] # extract id from hit count hash - f_arr.push(fminer.all_activities[id]) - end + f_arr.push(fminer.all_activities[id]) + end f_median=f_arr.to_scale.median - if g_median >= f_median + if g_median >= f_median effect = 'activating' else effect = 'deactivating' @@ -232,13 +263,13 @@ post '/fminer/bbrc/?' do end } - end # end of + end # end of end # feature parsing # AM: add feature values for non-present features - # feature_dataset.complete_data_entries + # feature_dataset.complete_data_entries - feature_dataset.save(@subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' @@ -247,6 +278,137 @@ post '/fminer/bbrc/?' do end #end + +# Run bbrc/sample algorithm on a dataset +# +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] BBRC sample parameters, accepted are +# - num_boots Number of bootstrap samples (default 150) +# - min_sampling_support Minimum sampling support (default 30% of num_boots) +# - min_frequency Minimum frequency (default 10% of dataset size) +# - nr_hits Whether subgraphs should be weighted with their occurrence counts in the instances (frequency) +# +# @return [text/uri-list] Task URI +post '/fminer/bbrc/sample/?' do + + fminer=OpenTox::Algorithm::Fminer.new + fminer.check_params(params,80,@subjectid) + + # num_boots + unless params[:num_boots] + num_boots = 150 + LOGGER.debug "Set num_boots to default value #{num_boots}" + else + raise OpenTox::BadRequestError.new "num_boots is not numeric" unless OpenTox::Algorithm.numeric? params[:num_boots] + num_boots = params[:num_boots].to_i.ceil + end + + # min_sampling_support + unless params[:min_sampling_support] + min_sampling_support = (num_boots * 0.3).ceil + LOGGER.debug "Set num_boots to default value #{min_sampling_support}" + else + raise OpenTox::BadRequestError.new "min_sampling_support is not numeric" unless OpenTox::Algorithm.numeric? params[:min_sampling_support] + min_sampling_support= params[:min_sampling_support].to_i.ceil + end + + # re-set min_frequency + unless params[:min_frequency] + min_frequency = (fminer.training_dataset.compounds.size * 0.1).ceil + LOGGER.debug "Set min_frequency to default value #{fminer.minfreq}" + else + raise OpenTox::BadRequestError.new "min_frequency is not numeric" unless OpenTox::Algorithm.numeric? params[:min_frequency] + min_frequency= params[:min_frequency].to_i.ceil + end + + fminer.training_dataset.compounds.size + + task = OpenTox::Task.create("Mining BBRC sample features", url_for('/fminer',:full)) do |task| + if fminer.prediction_feature.feature_type == "regression" + raise OpenTox::BadRequestError.new "BBRC sampling is only for classification" + else + raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ + "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) + @value_map=fminer.training_dataset.value_map(fminer.prediction_feature.uri) + end + + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) + feature_dataset.add_metadata({ + DC.title => "BBRC sampled representatives for " + fminer.training_dataset.metadata[DC.title].to_s, + DC.creator => url_for('/fminer/bbrc/sample',:full), + OT.hasSource => url_for('/fminer/bbrc/sample', :full), + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + # TODO: add more params + ] + }) + feature_dataset.save(@subjectid) + + fminer.compounds = [] + fminer.db_class_sizes = Array.new # AM: effect + fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) + fminer.smi = [] # AM LAST: needed for matching the patterns back + + # Add data to fminer + fminer.add_fminer_data(@@last, @value_map) + + raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 + + # run bbrc-sample + features = Set.new + task.progress 10 + + + # matching + task.progress 90 + lu = LU.new # AM LAST: uses last-utils here + params[:nr_hits] == "true" ? hit_count=true: hit_count=false + matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations + + matches.each do |smarts, ids| + feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax + p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f + g=Array.new + @value_map.each { |y,act| g[y-1]=Array.new } + feat_hash.each { |x,y| g[y-1].push(x) } + max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) + effect = g.size-max + feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s + unless features.include? smarts + features << smarts + metadata = { + RDF.type => [OT.Feature, OT.Substructure], + OT.hasSource => feature_dataset.uri, + OT.smarts => smarts, + OT.pValue => p_value.abs, + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + } + feature_dataset.add_feature feature_uri, metadata + end + if !hit_count + ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)} + else + ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} + end + end + + # AM: add feature values for non-present features + # feature_dataset.complete_data_entries + + feature_dataset.save(@subjectid) + feature_dataset.uri + end + response['Content-Type'] = 'text/uri-list' + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" +end + # Run last algorithm on a dataset # # @param [String] dataset_uri URI of the training dataset @@ -312,14 +474,14 @@ post '/fminer/last/?' do end lu = LU.new # AM LAST: uses last-utils here - dom=lu.read(xml) # AM LAST: parse GraphML + dom=lu.read(xml) # AM LAST: parse GraphML smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) params[:nr_hits] == "true" ? hit_count=true: hit_count=false matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations matches.each do |smarts, ids| feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - if @@last.GetRegression() + if @@last.GetRegression() p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test effect = (p_value > 0) ? "activating" : "deactivating" else @@ -343,20 +505,20 @@ post '/fminer/last/?' do { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] - } + } feature_dataset.add_feature feature_uri, metadata end if !hit_count ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)} else - ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} + ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} end end # AM: add feature values for non-present features - # feature_dataset.complete_data_entries + # feature_dataset.complete_data_entries - feature_dataset.save(@subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3 From d37c44bcdb76f000c5c9a175a698f5d92b51b3a0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 4 May 2012 13:57:44 +0200 Subject: Added bbrc-sample as submodule --- .gitmodules | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 61a4b92..6bd2fe3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,7 @@ url = git://github.com/amaunz/fminer2.git [submodule "last-utils"] path = last-utils - url = git://github.com/amaunz/last-utils.git + url = git://github.com/amaunz/last-utils.git +[submodule "bbrc-sample"] + path = bbrc-sample + url = git://github.com/amaunz/bbrc-sample.git -- cgit v1.2.3 From 6f203f40ba5e500d695eb4146eab11e60e03d01c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 4 May 2012 13:58:53 +0200 Subject: Cleaner parameter handling for sampling --- fminer.rb | 19 ++++--------------- last-utils | 2 +- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/fminer.rb b/fminer.rb index fd32043..53985ab 100644 --- a/fminer.rb +++ b/fminer.rb @@ -57,7 +57,7 @@ end # Get RDF/XML representation of fminer bbrc algorithm # @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm -get "/fminer/bbrc/sample?" do +get "/fminer/bbrc/sample/?" do algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc/sample',:full)) algorithm.metadata = { DC.title => 'fminer backbone refinement class representatives, obtained from samples of a dataset', @@ -293,7 +293,7 @@ end post '/fminer/bbrc/sample/?' do fminer=OpenTox::Algorithm::Fminer.new - fminer.check_params(params,80,@subjectid) + fminer.check_params(params,100,@subjectid) # AM: 100 per-mil (10%) as default minfreq # num_boots unless params[:num_boots] @@ -313,17 +313,6 @@ post '/fminer/bbrc/sample/?' do min_sampling_support= params[:min_sampling_support].to_i.ceil end - # re-set min_frequency - unless params[:min_frequency] - min_frequency = (fminer.training_dataset.compounds.size * 0.1).ceil - LOGGER.debug "Set min_frequency to default value #{fminer.minfreq}" - else - raise OpenTox::BadRequestError.new "min_frequency is not numeric" unless OpenTox::Algorithm.numeric? params[:min_frequency] - min_frequency= params[:min_frequency].to_i.ceil - end - - fminer.training_dataset.compounds.size - task = OpenTox::Task.create("Mining BBRC sample features", url_for('/fminer',:full)) do |task| if fminer.prediction_feature.feature_type == "regression" raise OpenTox::BadRequestError.new "BBRC sampling is only for classification" @@ -352,11 +341,11 @@ post '/fminer/bbrc/sample/?' do fminer.smi = [] # AM LAST: needed for matching the patterns back # Add data to fminer - fminer.add_fminer_data(@@last, @value_map) + fminer.add_fminer_data(nil, @value_map) # AM: 'nil' as instance to only fill in administrative data raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 - # run bbrc-sample + # run bbrc-sample, obtain smarts and p-values features = Set.new task.progress 10 diff --git a/last-utils b/last-utils index cf02384..efcc3f4 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit cf0238477127e54509b6ab8b5c38f50dd6ffce08 +Subproject commit efcc3f41dd9e2f590a1520dfee3bf709120b2e41 -- cgit v1.2.3 From f32650ef78be87d618265dddfc71c63b725371f1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 4 May 2012 14:10:21 +0200 Subject: Reverted gitmodules --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 6bd2fe3..47881a4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,6 +4,3 @@ [submodule "last-utils"] path = last-utils url = git://github.com/amaunz/last-utils.git -[submodule "bbrc-sample"] - path = bbrc-sample - url = git://github.com/amaunz/bbrc-sample.git -- cgit v1.2.3 From 8b024e1b1b950e7a66c3bcab7c78f119c731aa3a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 4 May 2012 14:12:37 +0200 Subject: Added bbrc-sample as a submodule --- .gitmodules | 3 +++ bbrc-sample | 1 + 2 files changed, 4 insertions(+) create mode 160000 bbrc-sample diff --git a/.gitmodules b/.gitmodules index 47881a4..af7a8f4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "last-utils"] path = last-utils url = git://github.com/amaunz/last-utils.git +[submodule "bbrc-sample"] + path = bbrc-sample + url = git://github.com/amaunz/bbrc-sample diff --git a/bbrc-sample b/bbrc-sample new file mode 160000 index 0000000..0043519 --- /dev/null +++ b/bbrc-sample @@ -0,0 +1 @@ +Subproject commit 00435190a8f49e43fc5194a024156300472294e3 -- cgit v1.2.3 From dcd8379fcd2448b83f1b390fc74bb0b4025f8dd8 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 7 May 2012 08:36:18 +0200 Subject: Embedded R code --- application.rb | 1 + bbrc-sample | 2 +- fminer.rb | 15 +++++++++++++++ last-utils | 2 +- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/application.rb b/application.rb index b00ba9f..ee1bf65 100644 --- a/application.rb +++ b/application.rb @@ -20,6 +20,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') gem "opentox-ruby", "~> 3" require 'opentox-ruby' require 'rjb' +require 'rinruby' # main diff --git a/bbrc-sample b/bbrc-sample index 0043519..fc4ae7d 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit 00435190a8f49e43fc5194a024156300472294e3 +Subproject commit fc4ae7d22a60838a62747069660542c304ce2f12 diff --git a/fminer.rb b/fminer.rb index 53985ab..cec902e 100644 --- a/fminer.rb +++ b/fminer.rb @@ -345,10 +345,25 @@ post '/fminer/bbrc/sample/?' do raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 + # run bbrc-sample, obtain smarts and p-values features = Set.new task.progress 10 + @r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests + @r.assign "dataset.uri", params[:dataset_uri] + @r.assign "prediction.feature.uri", fminer.prediction_feature.uri + @r.assign "num.boots", num_boots + @r.assign "min.frequency.per.sample", fminer.minfreq + @r.assign "min.sampling.support", min_sampling_support + @r.assign "bbrc.service", File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") + @r.assign "dataset.service", CONFIG[:services]["opentox-dataset"] + + @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" + @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, F)" + + smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes + r_p_values = @r.pull "ans.p.values" # matching task.progress 90 diff --git a/last-utils b/last-utils index efcc3f4..8a3dd9b 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit efcc3f41dd9e2f590a1520dfee3bf709120b2e41 +Subproject commit 8a3dd9bb550f0ea264c2f4e844f664f0a42ca16f -- cgit v1.2.3 From 615b105230453e04bf6ec0354f2c391170e76974 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 7 May 2012 15:31:22 +0200 Subject: Better comments --- fminer.rb | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fminer.rb b/fminer.rb index cec902e..ee7f8e9 100644 --- a/fminer.rb +++ b/fminer.rb @@ -307,7 +307,7 @@ post '/fminer/bbrc/sample/?' do # min_sampling_support unless params[:min_sampling_support] min_sampling_support = (num_boots * 0.3).ceil - LOGGER.debug "Set num_boots to default value #{min_sampling_support}" + LOGGER.debug "Set min_sampling_support to default value #{min_sampling_support}" else raise OpenTox::BadRequestError.new "min_sampling_support is not numeric" unless OpenTox::Algorithm.numeric? params[:min_sampling_support] min_sampling_support= params[:min_sampling_support].to_i.ceil @@ -324,7 +324,7 @@ post '/fminer/bbrc/sample/?' do feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ - DC.title => "BBRC sampled representatives for " + fminer.training_dataset.metadata[DC.title].to_s, + DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s + "(bootstrapped)", DC.creator => url_for('/fminer/bbrc/sample',:full), OT.hasSource => url_for('/fminer/bbrc/sample', :full), OT.parameters => [ @@ -335,13 +335,12 @@ post '/fminer/bbrc/sample/?' do }) feature_dataset.save(@subjectid) - fminer.compounds = [] - fminer.db_class_sizes = Array.new # AM: effect - fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) - fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - fminer.add_fminer_data(nil, @value_map) # AM: 'nil' as instance to only fill in administrative data + # filled by add_fminer_data: + fminer.compounds = [] # indexed by id, starting from 1 (not 0) + fminer.db_class_sizes = Array.new # for effect calculation + fminer.all_activities = Hash.new # for effect calculation, indexed by id, starting from 1 (not 0) + fminer.smi = [] # needed for matching the patterns back, indexed by id, starting from 1 (not 0) + fminer.add_fminer_data(nil, @value_map) # To only fill in administrative data (no fminer priming) pass 'nil' as instance raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 @@ -362,24 +361,28 @@ post '/fminer/bbrc/sample/?' do @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, F)" - smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes + smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" # matching task.progress 90 lu = LU.new # AM LAST: uses last-utils here params[:nr_hits] == "true" ? hit_count=true: hit_count=false + + + LOGGER.debug fminer.smi.to_yaml + LOGGER.debug smarts.to_yaml matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations matches.each do |smarts, ids| feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f - g=Array.new + g = Array.new @value_map.each { |y,act| g[y-1]=Array.new } feat_hash.each { |x,y| g[y-1].push(x) } max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) effect = g.size-max - feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s + feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s unless features.include? smarts features << smarts metadata = { -- cgit v1.2.3 From b0f3f05d6a8df53956547d04a6a82316ee78b102 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 8 May 2012 08:18:04 +0200 Subject: Removed debug --- bbrc-sample | 2 +- fminer.rb | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/bbrc-sample b/bbrc-sample index fc4ae7d..813595b 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit fc4ae7d22a60838a62747069660542c304ce2f12 +Subproject commit 813595bc811e1fdb71dee0ea9e2f0dc9b6a44cdc diff --git a/fminer.rb b/fminer.rb index ee7f8e9..abedc36 100644 --- a/fminer.rb +++ b/fminer.rb @@ -359,7 +359,7 @@ post '/fminer/bbrc/sample/?' do @r.assign "dataset.service", CONFIG[:services]["opentox-dataset"] @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" - @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, F)" + @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T)" smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" @@ -369,9 +369,6 @@ post '/fminer/bbrc/sample/?' do lu = LU.new # AM LAST: uses last-utils here params[:nr_hits] == "true" ? hit_count=true: hit_count=false - - LOGGER.debug fminer.smi.to_yaml - LOGGER.debug smarts.to_yaml matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations matches.each do |smarts, ids| -- cgit v1.2.3 From a00bf34d6dbd079f2437bac19299def05dc6321c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 8 May 2012 08:28:28 +0200 Subject: Fixed r quit --- fminer.rb | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fminer.rb b/fminer.rb index abedc36..d38703a 100644 --- a/fminer.rb +++ b/fminer.rb @@ -348,7 +348,6 @@ post '/fminer/bbrc/sample/?' do # run bbrc-sample, obtain smarts and p-values features = Set.new task.progress 10 - @r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests @r.assign "dataset.uri", params[:dataset_uri] @r.assign "prediction.feature.uri", fminer.prediction_feature.uri @@ -357,20 +356,22 @@ post '/fminer/bbrc/sample/?' do @r.assign "min.sampling.support", min_sampling_support @r.assign "bbrc.service", File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @r.assign "dataset.service", CONFIG[:services]["opentox-dataset"] - @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" - @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T)" - - smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts - r_p_values = @r.pull "ans.p.values" + begin + @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T)" + smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts + r_p_values = @r.pull "ans.p.values" + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + @r.quit # free R # matching task.progress 90 lu = LU.new # AM LAST: uses last-utils here params[:nr_hits] == "true" ? hit_count=true: hit_count=false - matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations - matches.each do |smarts, ids| feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f -- cgit v1.2.3 From aa91484566a1c7a6d7d0ad3edb8d9664b4b4c883 Mon Sep 17 00:00:00 2001 From: David Vorgrimmler Date: Tue, 8 May 2012 11:24:40 +0200 Subject: Added parameters to dataset metadata. --- fminer.rb | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fminer.rb b/fminer.rb index cec902e..17cfc49 100644 --- a/fminer.rb +++ b/fminer.rb @@ -326,12 +326,7 @@ post '/fminer/bbrc/sample/?' do feature_dataset.add_metadata({ DC.title => "BBRC sampled representatives for " + fminer.training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc/sample',:full), - OT.hasSource => url_for('/fminer/bbrc/sample', :full), - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - # TODO: add more params - ] + OT.hasSource => url_for('/fminer/bbrc/sample', :full) }) feature_dataset.save(@subjectid) @@ -370,6 +365,16 @@ post '/fminer/bbrc/sample/?' do lu = LU.new # AM LAST: uses last-utils here params[:nr_hits] == "true" ? hit_count=true: hit_count=false matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations + + feature_dataset.add_metadata({ + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, + { DC.title => "min_sampling_support", OT.paramValue => min_sampling_support }, + { DC.title => "num_boots", OT.paramValue => num_boots }, + { DC.title => "min_frequency_per_sample", OT.paramValue => fminer.minfreq }, + { DC.title => "nr_hits", OT.paramValue => hit_count.to_s }] + }) matches.each do |smarts, ids| feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax -- cgit v1.2.3 From 3fe8a116f1255eedd4628d68d9a6ae16064b0d7c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 8 May 2012 14:48:20 +0200 Subject: Added metadata: minfreq and nr_hits --- fminer.rb | 11 ++++++++--- last-utils | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fminer.rb b/fminer.rb index 8559628..3c282c8 100644 --- a/fminer.rb +++ b/fminer.rb @@ -186,7 +186,10 @@ post '/fminer/bbrc/?' do OT.hasSource => url_for('/fminer/bbrc', :full), OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, + { DC.title => "min_frequency", OT.paramValue => fminer.minfreq }, + { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } + ] }) feature_dataset.save(@subjectid) @@ -455,8 +458,10 @@ post '/fminer/last/?' do OT.hasSource => url_for('/fminer/last', :full), OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, + { DC.title => "min_frequency", OT.paramValue => fminer.minfreq }, + { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } + ] }) feature_dataset.save(@subjectid) diff --git a/last-utils b/last-utils index 8a3dd9b..efcc3f4 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit 8a3dd9bb550f0ea264c2f4e844f664f0a42ca16f +Subproject commit efcc3f41dd9e2f590a1520dfee3bf709120b2e41 -- cgit v1.2.3 From e9676d060e4214e219e36cbb0444506e22dc8e64 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 8 May 2012 16:42:35 +0200 Subject: Supporting merge time in metadata --- bbrc-sample | 2 +- fminer.rb | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/bbrc-sample b/bbrc-sample index 813595b..bbd8b46 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit 813595bc811e1fdb71dee0ea9e2f0dc9b6a44cdc +Subproject commit bbd8b461eca2d7dcc3cfc247e55846e327be41c3 diff --git a/fminer.rb b/fminer.rb index 3c282c8..e86da65 100644 --- a/fminer.rb +++ b/fminer.rb @@ -359,6 +359,7 @@ post '/fminer/bbrc/sample/?' do @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T)" smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" + merge_time = @r.pull "merge.time" rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" @@ -378,7 +379,9 @@ post '/fminer/bbrc/sample/?' do { DC.title => "min_sampling_support", OT.paramValue => min_sampling_support }, { DC.title => "num_boots", OT.paramValue => num_boots }, { DC.title => "min_frequency_per_sample", OT.paramValue => fminer.minfreq }, - { DC.title => "nr_hits", OT.paramValue => hit_count.to_s }] + { DC.title => "nr_hits", OT.paramValue => hit_count.to_s }, + { DC.title => "merge_time", OT.paramValue => merge_time.to_s } + ] }) matches.each do |smarts, ids| -- cgit v1.2.3 From 426a2e0e3a3c23f13b99785fdd46543353cc6266 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 9 May 2012 13:18:17 +0200 Subject: Emitting stripped patterns --- bbrc-sample | 2 +- fminer.rb | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bbrc-sample b/bbrc-sample index bbd8b46..ad0ffc4 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit bbd8b461eca2d7dcc3cfc247e55846e327be41c3 +Subproject commit ad0ffc43072ed9b0d0b90ea5e435241cd5d4fa35 diff --git a/fminer.rb b/fminer.rb index e86da65..644d5ad 100644 --- a/fminer.rb +++ b/fminer.rb @@ -360,6 +360,8 @@ post '/fminer/bbrc/sample/?' do smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" merge_time = @r.pull "merge.time" + n_stripped_mss = @r.pull "n.stripped.mss" + n_stripped_cst = @r.pull "n.stripped.cst" rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" @@ -380,7 +382,9 @@ post '/fminer/bbrc/sample/?' do { DC.title => "num_boots", OT.paramValue => num_boots }, { DC.title => "min_frequency_per_sample", OT.paramValue => fminer.minfreq }, { DC.title => "nr_hits", OT.paramValue => hit_count.to_s }, - { DC.title => "merge_time", OT.paramValue => merge_time.to_s } + { DC.title => "merge_time", OT.paramValue => merge_time.to_s }, + { DC.title => "n_stripped_mss", OT.paramValue => n_stripped_mss.to_s }, + { DC.title => "n_stripped_cst", OT.paramValue => n_stripped_cst.to_s } ] }) -- cgit v1.2.3 From d0ddfd6e685bc8050b12644b628beb7f232fa266 Mon Sep 17 00:00:00 2001 From: David Vorgrimmler Date: Wed, 9 May 2012 17:08:18 +0200 Subject: Added random seed to bbrc sample. --- fminer.rb | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 644d5ad..ae84d75 100644 --- a/fminer.rb +++ b/fminer.rb @@ -291,6 +291,7 @@ end # - min_sampling_support Minimum sampling support (default 30% of num_boots) # - min_frequency Minimum frequency (default 10% of dataset size) # - nr_hits Whether subgraphs should be weighted with their occurrence counts in the instances (frequency) +# - random_seed Random seed ensures same datasets in bootBbrc # # @return [text/uri-list] Task URI post '/fminer/bbrc/sample/?' do @@ -316,6 +317,15 @@ post '/fminer/bbrc/sample/?' do min_sampling_support= params[:min_sampling_support].to_i.ceil end + # random_seed + unless params[:random_seed] + random_seed = 1 + LOGGER.debug "Set random seed to default value #{random_seed}" + else + raise OpenTox::BadRequestError.new "random_seed is not numeric" unless OpenTox::Algorithm.numeric? params[:random_seed] + random_seed= params[:random_seed].to_i.ceil + end + task = OpenTox::Task.create("Mining BBRC sample features", url_for('/fminer',:full)) do |task| if fminer.prediction_feature.feature_type == "regression" raise OpenTox::BadRequestError.new "BBRC sampling is only for classification" @@ -352,11 +362,12 @@ post '/fminer/bbrc/sample/?' do @r.assign "num.boots", num_boots @r.assign "min.frequency.per.sample", fminer.minfreq @r.assign "min.sampling.support", min_sampling_support + @r.assign "random.seed", random_seed @r.assign "bbrc.service", File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @r.assign "dataset.service", CONFIG[:services]["opentox-dataset"] @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" begin - @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T)" + @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed)" smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" merge_time = @r.pull "merge.time" @@ -384,7 +395,8 @@ post '/fminer/bbrc/sample/?' do { DC.title => "nr_hits", OT.paramValue => hit_count.to_s }, { DC.title => "merge_time", OT.paramValue => merge_time.to_s }, { DC.title => "n_stripped_mss", OT.paramValue => n_stripped_mss.to_s }, - { DC.title => "n_stripped_cst", OT.paramValue => n_stripped_cst.to_s } + { DC.title => "n_stripped_cst", OT.paramValue => n_stripped_cst.to_s }, + { DC.title => "random_seed", OT.paramValue => random_seed.to_s } ] }) -- cgit v1.2.3 From 6b43dc4ecb085c67ebaee5f9a64dd88188b56754 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 14 May 2012 13:46:24 +0200 Subject: matching service uses last-utils --- bbrc-sample | 2 +- fminer.rb | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/bbrc-sample b/bbrc-sample index ad0ffc4..6110fc9 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit ad0ffc43072ed9b0d0b90ea5e435241cd5d4fa35 +Subproject commit 6110fc9cf34766444e94482fba08878c19c87d39 diff --git a/fminer.rb b/fminer.rb index ae84d75..ed595f8 100644 --- a/fminer.rb +++ b/fminer.rb @@ -132,14 +132,16 @@ post '/fminer/:method/match?' do end c_dataset.compounds.each do |c| res_dataset.add_compound(c) - comp = OpenTox::Compound.new(c) - f_dataset.features.each do |f,m| - if params[:nr_hits] == "true" - hits = comp.match_hits([m[OT.smarts]]) - res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] - else - res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) - end + end + smi = [nil]; smi += c_dataset.compounds.collect { |c| OpenTox::Compound.new(c).to_smiles } + smarts = f_dataset.features.collect { |f,m| m[OT.smarts] } + params[:nr_hits] == "true" ? hit_count=true: hit_count=false + matches, counts = LU.new.match_rb(smi, smarts, hit_count) + f_dataset.features.each do |f,m| + if (matches[m[OT.smarts]] && matches[m[OT.smarts]].size>0) + matches[m[OT.smarts]].each_with_index {|id,idx| + res_dataset.add(c_dataset.compounds[id-1],f,counts[m[OT.smarts]][idx]) + } end end res_dataset.save @subjectid -- cgit v1.2.3 From 1655fb7ba0ff8536de4cc45efe4f8923f35df5f9 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 16 May 2012 14:32:58 +0200 Subject: Added support for backbone-false --- bbrc-sample | 2 +- fminer.rb | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/bbrc-sample b/bbrc-sample index 6110fc9..bbcfdfc 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit 6110fc9cf34766444e94482fba08878c19c87d39 +Subproject commit bbcfdfc85108774275a98b33ba133a0a9cab9c81 diff --git a/fminer.rb b/fminer.rb index ed595f8..085fb04 100644 --- a/fminer.rb +++ b/fminer.rb @@ -71,6 +71,7 @@ get "/fminer/bbrc/sample/?" do { DC.description => "Minimum sampling support", OT.paramScope => "optional", DC.title => "min_sampling_support" }, { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, + { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" } ] } case request.env['HTTP_ACCEPT'] @@ -177,7 +178,7 @@ post '/fminer/bbrc/?' do end @@bbrc.SetMinfreq(fminer.minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" - @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean + @@bbrc.SetBackbone(false) if params[:backbone] == "false" @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] @@bbrc.SetConsoleOut(false) @@ -190,7 +191,8 @@ post '/fminer/bbrc/?' do { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, { DC.title => "min_frequency", OT.paramValue => fminer.minfreq }, - { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } + { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }, + { DC.title => "backbone", OT.paramValue => (params[:backbone] == "false" ? "false" : "true") } ] }) @@ -294,6 +296,7 @@ end # - min_frequency Minimum frequency (default 10% of dataset size) # - nr_hits Whether subgraphs should be weighted with their occurrence counts in the instances (frequency) # - random_seed Random seed ensures same datasets in bootBbrc +# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") # # @return [text/uri-list] Task URI post '/fminer/bbrc/sample/?' do @@ -328,6 +331,15 @@ post '/fminer/bbrc/sample/?' do random_seed= params[:random_seed].to_i.ceil end + # backbone + unless params[:backbone] + backbone = "true" + LOGGER.debug "Set backbone to default value #{backbone}" + else + raise OpenTox::BadRequestError.new "backbone is neither 'true' nor 'false'" unless (params[:backbone] == "true" or params[:backbone] == "false") + backbone = params[:backbone] + end + task = OpenTox::Task.create("Mining BBRC sample features", url_for('/fminer',:full)) do |task| if fminer.prediction_feature.feature_type == "regression" raise OpenTox::BadRequestError.new "BBRC sampling is only for classification" @@ -365,11 +377,12 @@ post '/fminer/bbrc/sample/?' do @r.assign "min.frequency.per.sample", fminer.minfreq @r.assign "min.sampling.support", min_sampling_support @r.assign "random.seed", random_seed + @r.assign "do.backbone", backbone @r.assign "bbrc.service", File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @r.assign "dataset.service", CONFIG[:services]["opentox-dataset"] @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" begin - @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed)" + @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed, as.logical(do.backbone))" smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" merge_time = @r.pull "merge.time" @@ -398,7 +411,8 @@ post '/fminer/bbrc/sample/?' do { DC.title => "merge_time", OT.paramValue => merge_time.to_s }, { DC.title => "n_stripped_mss", OT.paramValue => n_stripped_mss.to_s }, { DC.title => "n_stripped_cst", OT.paramValue => n_stripped_cst.to_s }, - { DC.title => "random_seed", OT.paramValue => random_seed.to_s } + { DC.title => "random_seed", OT.paramValue => random_seed.to_s }, + { DC.title => "backbone", OT.paramValue => backbone.to_s } ] }) -- cgit v1.2.3 From 4f73de5ec7a567505224a195b7dc4bf9d563cbab Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 16 May 2012 16:39:00 +0200 Subject: renaming --- bbrc-sample | 2 +- fminer.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bbrc-sample b/bbrc-sample index bbcfdfc..6ddfc2d 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit bbcfdfc85108774275a98b33ba133a0a9cab9c81 +Subproject commit 6ddfc2dc414f1e64ac16286c0cee5a4b0022d2e2 diff --git a/fminer.rb b/fminer.rb index 085fb04..fc27f18 100644 --- a/fminer.rb +++ b/fminer.rb @@ -377,12 +377,12 @@ post '/fminer/bbrc/sample/?' do @r.assign "min.frequency.per.sample", fminer.minfreq @r.assign "min.sampling.support", min_sampling_support @r.assign "random.seed", random_seed - @r.assign "do.backbone", backbone + @r.assign "backbone", backbone @r.assign "bbrc.service", File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @r.assign "dataset.service", CONFIG[:services]["opentox-dataset"] @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" begin - @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed, as.logical(do.backbone))" + @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed, as.logical(backbone))" smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" merge_time = @r.pull "merge.time" -- cgit v1.2.3 From 4564c7bf92a53a97d787a09647efa69829eadfbc Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 21 May 2012 08:56:14 +0200 Subject: Support for Chisq estimation method --- fminer.rb | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index fc27f18..d8da725 100644 --- a/fminer.rb +++ b/fminer.rb @@ -71,7 +71,8 @@ get "/fminer/bbrc/sample/?" do { DC.description => "Minimum sampling support", OT.paramScope => "optional", DC.title => "min_sampling_support" }, { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, - { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" } + { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, + { DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", OT.paramScope => "optional", DC.title => "method" } ] } case request.env['HTTP_ACCEPT'] @@ -297,6 +298,7 @@ end # - nr_hits Whether subgraphs should be weighted with their occurrence counts in the instances (frequency) # - random_seed Random seed ensures same datasets in bootBbrc # - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") +# - method Chisq estimation method, pass 'mean' to use simple mean estimate (default 'mle'). # # @return [text/uri-list] Task URI post '/fminer/bbrc/sample/?' do @@ -340,6 +342,15 @@ post '/fminer/bbrc/sample/?' do backbone = params[:backbone] end + # method + unless params[:method] + method="mean" + LOGGER.debug "Set method to default value #{method}" + else + raise OpenTox::BadRequestError.new "method is neither 'mle' nor 'mean'" unless (params[:method] == "mle" or params[:method] == "mean") + method = params[:method] + end + task = OpenTox::Task.create("Mining BBRC sample features", url_for('/fminer',:full)) do |task| if fminer.prediction_feature.feature_type == "regression" raise OpenTox::BadRequestError.new "BBRC sampling is only for classification" @@ -380,9 +391,10 @@ post '/fminer/bbrc/sample/?' do @r.assign "backbone", backbone @r.assign "bbrc.service", File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @r.assign "dataset.service", CONFIG[:services]["opentox-dataset"] + @r.assign "method", method @r.eval "source(\"bbrc-sample/bbrc-sample.R\")" begin - @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed, as.logical(backbone))" + @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed, as.logical(backbone), method)" smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" merge_time = @r.pull "merge.time" @@ -412,7 +424,8 @@ post '/fminer/bbrc/sample/?' do { DC.title => "n_stripped_mss", OT.paramValue => n_stripped_mss.to_s }, { DC.title => "n_stripped_cst", OT.paramValue => n_stripped_cst.to_s }, { DC.title => "random_seed", OT.paramValue => random_seed.to_s }, - { DC.title => "backbone", OT.paramValue => backbone.to_s } + { DC.title => "backbone", OT.paramValue => backbone.to_s }, + { DC.title => "method", OT.paramValue => method.to_s } ] }) -- cgit v1.2.3 From 8bc699c0914b5a779ccfd2a00f30c7c107c6b78c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 21 May 2012 13:58:32 +0200 Subject: Chisq estimation for /match --- bbrc-sample | 2 +- fminer.rb | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 93 insertions(+), 14 deletions(-) diff --git a/bbrc-sample b/bbrc-sample index 6ddfc2d..0d1d349 160000 --- a/bbrc-sample +++ b/bbrc-sample @@ -1 +1 @@ -Subproject commit 6ddfc2dc414f1e64ac16286c0cee5a4b0022d2e2 +Subproject commit 0d1d349ac33ae2fcc1bbdf31617ed9132c7527ca diff --git a/fminer.rb b/fminer.rb index d8da725..9942cfa 100644 --- a/fminer.rb +++ b/fminer.rb @@ -119,30 +119,109 @@ get "/fminer/last/?" do end end -# Creates same features for dataset that have been created -# with fminer in dataset -# accept params[:nr_hits] as used in other fminer methods +# Matches features of a a feature dataset onto instances of another dataset. +# The latter is referred to as 'training dataset', since p-values are computed, +# if user passes a prediction feature, or if the training dataset has only one feature. +# The result does not contain the prediction feature. +# @param [String] dataset_uri URI of the dataset +# @param [String] feature_dataset_uri URI of the feature dataset (i.e. dependent variable) +# @param [optional] parameters Accepted parameters are +# - prediction_feature URI of prediction feature to calculate p-values for +# @return [text/uri-list] Task URI post '/fminer/:method/match?' do raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] + + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + unless params[:prediction_feature] # try to read prediction_feature from dataset + prediction_feature = OpenTox::Feature.find(training_dataset.features.keys.first) if training_dataset.features.size == 1 + end + prediction_feature = OpenTox::Feature.find(params[:prediction_feature]) if params[:prediction_feature] + task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| + + # get endpoint statistics + if prediction_feature + db_class_sizes = Array.new # for effect calculation + all_activities = Hash.new # for effect calculation, indexed by id, starting from 1 (not 0) + id = 1 + training_dataset.compounds.each do |compound| + entry=training_dataset.data_entries[compound] + entry.each do |feature,values| + if feature == prediction_feature.uri + values.each { |val| + if val.nil? + LOGGER.warn "No #{feature} activity for #{compound.to_s}." + else + if prediction_feature.feature_type == "classification" + activity= training_dataset.value_map(prediction_feature.uri).invert[val].to_i # activities are mapped to 1..n + db_class_sizes[activity-1].nil? ? db_class_sizes[activity-1]=1 : db_class_sizes[activity-1]+=1 # AM effect + elsif prediction_feature.feature_type == "regression" + activity= val.to_f + end + begin + all_activities[id]=activity # DV: insert global information + id += 1 + rescue Exception => e + LOGGER.warn "Could not add " + smiles + "\t" + val.to_s + " to fminer" + LOGGER.warn e.backtrace + end + end + } + end + end + end + end + + # Intialize result by adding compounds f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid - f_dataset.features.each do |f,m| - res_dataset.add_feature(f,m) - end c_dataset.compounds.each do |c| res_dataset.add_compound(c) end + + # Run matching, put data entries in result. Features are recreated. smi = [nil]; smi += c_dataset.compounds.collect { |c| OpenTox::Compound.new(c).to_smiles } smarts = f_dataset.features.collect { |f,m| m[OT.smarts] } params[:nr_hits] == "true" ? hit_count=true: hit_count=false - matches, counts = LU.new.match_rb(smi, smarts, hit_count) + matches, counts = LU.new.match_rb(smi, smarts, hit_count) if smarts.size>0 + f_dataset.features.each do |f,m| if (matches[m[OT.smarts]] && matches[m[OT.smarts]].size>0) + + feature_uri = File.join res_dataset.uri,"feature","match", res_dataset.features.size.to_s + metadata = { + RDF.type => [OT.Feature, OT.Substructure], + OT.hasSource => f_dataset.uri, + OT.smarts => m[OT.smarts], + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] } + ] + } + + if (prediction_feature) + feat_hash = Hash[*(all_activities.select { |k,v| matches[m[OT.smarts]].include?(k) }.flatten)] + if prediction_feature.feature_type == "regression" + p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + effect = (p_value > 0) ? "activating" : "deactivating" + else + p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f + g=Array.new # g is filled in *a*scending activity + training_dataset.value_map(prediction_feature.uri).each { |y,act| g[y-1]=Array.new } + feat_hash.each { |x,y| g[y-1].push(x) } + max = OpenTox::Algorithm.effect(g, db_class_sizes) # db_class_sizes is filled in *a*scending activity + effect = max+1 + end + metadata[OT.effect] = effect + metadata[OT.pValue] = p_value.abs + metadata[OT.parameters] << { DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri } + end + + res_dataset.add_feature feature_uri, metadata + matches[m[OT.smarts]].each_with_index {|id,idx| - res_dataset.add(c_dataset.compounds[id-1],f,counts[m[OT.smarts]][idx]) + res_dataset.add(c_dataset.compounds[id-1],feature_uri,counts[m[OT.smarts]][idx]) } end end @@ -225,9 +304,9 @@ post '/fminer/bbrc/?' do p_value = f[1] if (!@@bbrc.GetRegression) - id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) - effect = f[2..-1].size-max + id_arrs = f[2..-1].flatten # f[2..-1] is filled in *de*scending order, + max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) # db_class_size is filled in *a*scending order, + effect = f[2..-1].size-max # thus need to turn around effect else #regression part id_arrs = f[2] # DV: effect calculation @@ -344,7 +423,7 @@ post '/fminer/bbrc/sample/?' do # method unless params[:method] - method="mean" + method="mle" LOGGER.debug "Set method to default value #{method}" else raise OpenTox::BadRequestError.new "method is neither 'mle' nor 'mean'" unless (params[:method] == "mle" or params[:method] == "mean") @@ -436,7 +515,7 @@ post '/fminer/bbrc/sample/?' do @value_map.each { |y,act| g[y-1]=Array.new } feat_hash.each { |x,y| g[y-1].push(x) } max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) - effect = g.size-max + effect = max + 1 feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s unless features.include? smarts features << smarts -- cgit v1.2.3 From 768580ea96d1e6bc355d4874adf341783a2953d7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 21 May 2012 15:10:08 +0200 Subject: BBRC service: simplified effect calculation --- fminer.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index 9942cfa..2248ae4 100644 --- a/fminer.rb +++ b/fminer.rb @@ -304,9 +304,9 @@ post '/fminer/bbrc/?' do p_value = f[1] if (!@@bbrc.GetRegression) - id_arrs = f[2..-1].flatten # f[2..-1] is filled in *de*scending order, - max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) # db_class_size is filled in *a*scending order, - effect = f[2..-1].size-max # thus need to turn around effect + id_arrs = f[2..-1].flatten + max = OpenTox::Algorithm.effect(f[2..-1].reverse, fminer.db_class_sizes) # f needs reversal for bbrc + effect = max+1 else #regression part id_arrs = f[2] # DV: effect calculation -- cgit v1.2.3 From 65b2b8fc258abb64f6189c831d95b815db7621c9 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 21 May 2012 15:21:46 +0200 Subject: Corrected LAST effect calculation --- fminer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 2248ae4..0665372 100644 --- a/fminer.rb +++ b/fminer.rb @@ -633,7 +633,7 @@ post '/fminer/last/?' do @value_map.each { |y,act| g[y-1]=Array.new } feat_hash.each { |x,y| g[y-1].push(x) } max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) - effect = g.size-max + effect = max+1 end feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s unless features.include? smarts -- cgit v1.2.3 From 0940fc24efd92413cfe0f1556518bc73be1028b5 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 23 May 2012 11:00:23 +0200 Subject: Using OOB estimated p-Values for feature metadata --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 0665372..d324dcc 100644 --- a/fminer.rb +++ b/fminer.rb @@ -476,6 +476,7 @@ post '/fminer/bbrc/sample/?' do @r.eval "bootBbrc(dataset.uri, prediction.feature.uri, num.boots, min.frequency.per.sample, min.sampling.support, NULL, bbrc.service, dataset.service, T, random.seed, as.logical(backbone), method)" smarts = (@r.pull "ans.patterns").collect! { |id| id.gsub(/\'/,"") } # remove extra quotes around smarts r_p_values = @r.pull "ans.p.values" + smarts_p_values = {}; smarts.size.times { |i| smarts_p_values[ smarts[i] ] = r_p_values[i] } merge_time = @r.pull "merge.time" n_stripped_mss = @r.pull "n.stripped.mss" n_stripped_cst = @r.pull "n.stripped.cst" @@ -510,7 +511,6 @@ post '/fminer/bbrc/sample/?' do matches.each do |smarts, ids| feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f g = Array.new @value_map.each { |y,act| g[y-1]=Array.new } feat_hash.each { |x,y| g[y-1].push(x) } @@ -523,7 +523,7 @@ post '/fminer/bbrc/sample/?' do RDF.type => [OT.Feature, OT.Substructure], OT.hasSource => feature_dataset.uri, OT.smarts => smarts, - OT.pValue => p_value.abs, + OT.pValue => smarts_p_values[smarts], OT.effect => effect, OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, -- cgit v1.2.3 From 1ea44d57e59b3bd80b475961d029b673e76dea31 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 24 May 2012 08:36:45 +0200 Subject: Added GET for match --- application.rb | 2 +- fminer.rb | 247 +++++++++++++++++++++++++++++++++------------------------ 2 files changed, 144 insertions(+), 105 deletions(-) diff --git a/application.rb b/application.rb index ee1bf65..75d1e21 100644 --- a/application.rb +++ b/application.rb @@ -39,7 +39,7 @@ end # # @return [text/uri-list] algorithm URIs get '/?' do - list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n" + list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full), url_for('/fminer/bbrc/match', :full), url_for('/fminer/last/match', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" diff --git a/fminer.rb b/fminer.rb index d324dcc..052d20d 100644 --- a/fminer.rb +++ b/fminer.rb @@ -7,11 +7,13 @@ ENV['FMINER_NR_HITS'] = 'true' @@bbrc = Bbrc::Bbrc.new @@last = Last::Last.new + + # Get list of fminer algorithms # # @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do - list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full), url_for('/fminer/bbrc/match', :full), url_for('/fminer/last/match', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" @@ -22,6 +24,8 @@ get '/fminer/?' do end end + + # Get RDF/XML representation of fminer bbrc algorithm # @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm get "/fminer/bbrc/?" do @@ -119,117 +123,39 @@ get "/fminer/last/?" do end end -# Matches features of a a feature dataset onto instances of another dataset. -# The latter is referred to as 'training dataset', since p-values are computed, -# if user passes a prediction feature, or if the training dataset has only one feature. -# The result does not contain the prediction feature. + +# Get RDF/XML representation of fminer matching algorithm # @param [String] dataset_uri URI of the dataset # @param [String] feature_dataset_uri URI of the feature dataset (i.e. dependent variable) # @param [optional] parameters Accepted parameters are # - prediction_feature URI of prediction feature to calculate p-values for -# @return [text/uri-list] Task URI -post '/fminer/:method/match?' do - raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] - raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] - - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" - unless params[:prediction_feature] # try to read prediction_feature from dataset - prediction_feature = OpenTox::Feature.find(training_dataset.features.keys.first) if training_dataset.features.size == 1 +get "/fminer/:method/match?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for("/fminer/#{params[:method]}/match",:full)) + algorithm.metadata = { + DC.title => 'fminer feature matching', + DC.creator => "mguetlein@gmail.com, andreas@maunz.de", + RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" } + ] + } + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /application\/x-yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml end - prediction_feature = OpenTox::Feature.find(params[:prediction_feature]) if params[:prediction_feature] - - task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| - - # get endpoint statistics - if prediction_feature - db_class_sizes = Array.new # for effect calculation - all_activities = Hash.new # for effect calculation, indexed by id, starting from 1 (not 0) - id = 1 - training_dataset.compounds.each do |compound| - entry=training_dataset.data_entries[compound] - entry.each do |feature,values| - if feature == prediction_feature.uri - values.each { |val| - if val.nil? - LOGGER.warn "No #{feature} activity for #{compound.to_s}." - else - if prediction_feature.feature_type == "classification" - activity= training_dataset.value_map(prediction_feature.uri).invert[val].to_i # activities are mapped to 1..n - db_class_sizes[activity-1].nil? ? db_class_sizes[activity-1]=1 : db_class_sizes[activity-1]+=1 # AM effect - elsif prediction_feature.feature_type == "regression" - activity= val.to_f - end - begin - all_activities[id]=activity # DV: insert global information - id += 1 - rescue Exception => e - LOGGER.warn "Could not add " + smiles + "\t" + val.to_s + " to fminer" - LOGGER.warn e.backtrace - end - end - } - end - end - end - end - - # Intialize result by adding compounds - f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid - c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid - res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid - c_dataset.compounds.each do |c| - res_dataset.add_compound(c) - end +end - # Run matching, put data entries in result. Features are recreated. - smi = [nil]; smi += c_dataset.compounds.collect { |c| OpenTox::Compound.new(c).to_smiles } - smarts = f_dataset.features.collect { |f,m| m[OT.smarts] } - params[:nr_hits] == "true" ? hit_count=true: hit_count=false - matches, counts = LU.new.match_rb(smi, smarts, hit_count) if smarts.size>0 - f_dataset.features.each do |f,m| - if (matches[m[OT.smarts]] && matches[m[OT.smarts]].size>0) - feature_uri = File.join res_dataset.uri,"feature","match", res_dataset.features.size.to_s - metadata = { - RDF.type => [OT.Feature, OT.Substructure], - OT.hasSource => f_dataset.uri, - OT.smarts => m[OT.smarts], - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] } - ] - } - - if (prediction_feature) - feat_hash = Hash[*(all_activities.select { |k,v| matches[m[OT.smarts]].include?(k) }.flatten)] - if prediction_feature.feature_type == "regression" - p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test - effect = (p_value > 0) ? "activating" : "deactivating" - else - p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f - g=Array.new # g is filled in *a*scending activity - training_dataset.value_map(prediction_feature.uri).each { |y,act| g[y-1]=Array.new } - feat_hash.each { |x,y| g[y-1].push(x) } - max = OpenTox::Algorithm.effect(g, db_class_sizes) # db_class_sizes is filled in *a*scending activity - effect = max+1 - end - metadata[OT.effect] = effect - metadata[OT.pValue] = p_value.abs - metadata[OT.parameters] << { DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri } - end - - res_dataset.add_feature feature_uri, metadata - - matches[m[OT.smarts]].each_with_index {|id,idx| - res_dataset.add(c_dataset.compounds[id-1],feature_uri,counts[m[OT.smarts]][idx]) - } - end - end - res_dataset.save @subjectid - res_dataset.uri - end - return_task(task) -end # Run bbrc algorithm on dataset # @@ -668,3 +594,116 @@ post '/fminer/last/?' do raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end + +# Matches features of a a feature dataset onto instances of another dataset. +# The latter is referred to as 'training dataset', since p-values are computed, +# if user passes a prediction feature, or if the training dataset has only one feature. +# The result does not contain the prediction feature. +# @param [String] dataset_uri URI of the dataset +# @param [String] feature_dataset_uri URI of the feature dataset (i.e. dependent variable) +# @param [optional] parameters Accepted parameters are +# - prediction_feature URI of prediction feature to calculate p-values for +# @return [text/uri-list] Task URI +post '/fminer/:method/match?' do + raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] + raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] + + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + unless params[:prediction_feature] # try to read prediction_feature from dataset + prediction_feature = OpenTox::Feature.find(training_dataset.features.keys.first) if training_dataset.features.size == 1 + end + prediction_feature = OpenTox::Feature.find(params[:prediction_feature]) if params[:prediction_feature] + + task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| + + # get endpoint statistics + if prediction_feature + db_class_sizes = Array.new # for effect calculation + all_activities = Hash.new # for effect calculation, indexed by id, starting from 1 (not 0) + id = 1 + training_dataset.compounds.each do |compound| + entry=training_dataset.data_entries[compound] + entry.each do |feature,values| + if feature == prediction_feature.uri + values.each { |val| + if val.nil? + LOGGER.warn "No #{feature} activity for #{compound.to_s}." + else + if prediction_feature.feature_type == "classification" + activity= training_dataset.value_map(prediction_feature.uri).invert[val].to_i # activities are mapped to 1..n + db_class_sizes[activity-1].nil? ? db_class_sizes[activity-1]=1 : db_class_sizes[activity-1]+=1 # AM effect + elsif prediction_feature.feature_type == "regression" + activity= val.to_f + end + begin + all_activities[id]=activity # DV: insert global information + id += 1 + rescue Exception => e + LOGGER.warn "Could not add " + smiles + "\t" + val.to_s + " to fminer" + LOGGER.warn e.backtrace + end + end + } + end + end + end + end + + # Intialize result by adding compounds + f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid + c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid + res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid + c_dataset.compounds.each do |c| + res_dataset.add_compound(c) + end + + # Run matching, put data entries in result. Features are recreated. + smi = [nil]; smi += c_dataset.compounds.collect { |c| OpenTox::Compound.new(c).to_smiles } + smarts = f_dataset.features.collect { |f,m| m[OT.smarts] } + params[:nr_hits] == "true" ? hit_count=true: hit_count=false + matches, counts = LU.new.match_rb(smi, smarts, hit_count) if smarts.size>0 + + f_dataset.features.each do |f,m| + if (matches[m[OT.smarts]] && matches[m[OT.smarts]].size>0) + + feature_uri = File.join res_dataset.uri,"feature","match", res_dataset.features.size.to_s + metadata = { + RDF.type => [OT.Feature, OT.Substructure], + OT.hasSource => f_dataset.uri, + OT.smarts => m[OT.smarts], + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] } + ] + } + + if (prediction_feature) + feat_hash = Hash[*(all_activities.select { |k,v| matches[m[OT.smarts]].include?(k) }.flatten)] + if prediction_feature.feature_type == "regression" + p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + effect = (p_value > 0) ? "activating" : "deactivating" + else + p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f + g=Array.new # g is filled in *a*scending activity + training_dataset.value_map(prediction_feature.uri).each { |y,act| g[y-1]=Array.new } + feat_hash.each { |x,y| g[y-1].push(x) } + max = OpenTox::Algorithm.effect(g, db_class_sizes) # db_class_sizes is filled in *a*scending activity + effect = max+1 + end + metadata[OT.effect] = effect + metadata[OT.pValue] = p_value.abs + metadata[OT.parameters] << { DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri } + end + + res_dataset.add_feature feature_uri, metadata + + matches[m[OT.smarts]].each_with_index {|id,idx| + res_dataset.add(c_dataset.compounds[id-1],feature_uri,counts[m[OT.smarts]][idx]) + } + end + end + res_dataset.save @subjectid + res_dataset.uri + end + return_task(task) +end + -- cgit v1.2.3