From 9f905d4c4246cd240a5496e77e7e3be1fa6a75a8 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 3 May 2012 16:33:32 +0200 Subject: Initial commit --- application.rb | 2 +- fminer.rb | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 187 insertions(+), 25 deletions(-) diff --git a/application.rb b/application.rb index ef123da..b00ba9f 100644 --- a/application.rb +++ b/application.rb @@ -38,7 +38,7 @@ end # # @return [text/uri-list] algorithm URIs get '/?' do - list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n" + list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" diff --git a/fminer.rb b/fminer.rb index da06fc8..fd32043 100644 --- a/fminer.rb +++ b/fminer.rb @@ -4,14 +4,14 @@ ENV['FMINER_PVALUES'] = 'true' ENV['FMINER_SILENT'] = 'true' ENV['FMINER_NR_HITS'] = 'true' -@@bbrc = Bbrc::Bbrc.new -@@last = Last::Last.new +@@bbrc = Bbrc::Bbrc.new +@@last = Last::Last.new # Get list of fminer algorithms # # @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do - list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/bbrc/sample', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" @@ -50,7 +50,38 @@ get "/fminer/bbrc/?" do content_type "application/x-yaml" algorithm.to_yaml else - response['Content-Type'] = 'application/rdf+xml' + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end +end + +# Get RDF/XML representation of fminer bbrc algorithm +# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm +get "/fminer/bbrc/sample?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc/sample',:full)) + algorithm.metadata = { + DC.title => 'fminer backbone refinement class representatives, obtained from samples of a dataset', + DC.creator => "andreas@maunz.de", +# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", + RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Number of bootstrap samples", OT.paramScope => "optional", DC.title => "num_boots" }, + { DC.description => "Minimum sampling support", OT.paramScope => "optional", DC.title => "min_sampling_support" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, + { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, + ] + } + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' algorithm.to_rdfxml end end @@ -81,17 +112,17 @@ get "/fminer/last/?" do content_type "application/x-yaml" algorithm.to_yaml else - response['Content-Type'] = 'application/rdf+xml' + response['Content-Type'] = 'application/rdf+xml' algorithm.to_rdfxml end end # Creates same features for dataset that have been created # with fminer in dataset -# accept params[:nr_hits] as used in other fminer methods -post '/fminer/:method/match?' do +# accept params[:nr_hits] as used in other fminer methods +post '/fminer/:method/match?' do raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] - raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] + raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid @@ -105,7 +136,7 @@ post '/fminer/:method/match?' do f_dataset.features.each do |f,m| if params[:nr_hits] == "true" hits = comp.match_hits([m[OT.smarts]]) - res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] + res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] else res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) end @@ -115,7 +146,7 @@ post '/fminer/:method/match?' do res_dataset.uri end return_task(task) -end +end # Run bbrc algorithm on dataset # @@ -128,7 +159,7 @@ end # - min_chisq_significance Significance threshold (between 0 and 1) # - nr_hits Set to "true" to get hit count instead of presence # @return [text/uri-list] Task URI -post '/fminer/bbrc/?' do +post '/fminer/bbrc/?' do fminer=OpenTox::Algorithm::Fminer.new fminer.check_params(params,5,@subjectid) @@ -185,7 +216,7 @@ post '/fminer/bbrc/?' do smarts = f[0] p_value = f[1] - if (!@@bbrc.GetRegression) + if (!@@bbrc.GetRegression) id_arrs = f[2..-1].flatten max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) effect = f[2..-1].size-max @@ -195,10 +226,10 @@ post '/fminer/bbrc/?' do f_arr=Array.new f[2].each do |id| id=id.keys[0] # extract id from hit count hash - f_arr.push(fminer.all_activities[id]) - end + f_arr.push(fminer.all_activities[id]) + end f_median=f_arr.to_scale.median - if g_median >= f_median + if g_median >= f_median effect = 'activating' else effect = 'deactivating' @@ -232,13 +263,13 @@ post '/fminer/bbrc/?' do end } - end # end of + end # end of end # feature parsing # AM: add feature values for non-present features - # feature_dataset.complete_data_entries + # feature_dataset.complete_data_entries - feature_dataset.save(@subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' @@ -247,6 +278,137 @@ post '/fminer/bbrc/?' do end #end + +# Run bbrc/sample algorithm on a dataset +# +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] BBRC sample parameters, accepted are +# - num_boots Number of bootstrap samples (default 150) +# - min_sampling_support Minimum sampling support (default 30% of num_boots) +# - min_frequency Minimum frequency (default 10% of dataset size) +# - nr_hits Whether subgraphs should be weighted with their occurrence counts in the instances (frequency) +# +# @return [text/uri-list] Task URI +post '/fminer/bbrc/sample/?' do + + fminer=OpenTox::Algorithm::Fminer.new + fminer.check_params(params,80,@subjectid) + + # num_boots + unless params[:num_boots] + num_boots = 150 + LOGGER.debug "Set num_boots to default value #{num_boots}" + else + raise OpenTox::BadRequestError.new "num_boots is not numeric" unless OpenTox::Algorithm.numeric? params[:num_boots] + num_boots = params[:num_boots].to_i.ceil + end + + # min_sampling_support + unless params[:min_sampling_support] + min_sampling_support = (num_boots * 0.3).ceil + LOGGER.debug "Set num_boots to default value #{min_sampling_support}" + else + raise OpenTox::BadRequestError.new "min_sampling_support is not numeric" unless OpenTox::Algorithm.numeric? params[:min_sampling_support] + min_sampling_support= params[:min_sampling_support].to_i.ceil + end + + # re-set min_frequency + unless params[:min_frequency] + min_frequency = (fminer.training_dataset.compounds.size * 0.1).ceil + LOGGER.debug "Set min_frequency to default value #{fminer.minfreq}" + else + raise OpenTox::BadRequestError.new "min_frequency is not numeric" unless OpenTox::Algorithm.numeric? params[:min_frequency] + min_frequency= params[:min_frequency].to_i.ceil + end + + fminer.training_dataset.compounds.size + + task = OpenTox::Task.create("Mining BBRC sample features", url_for('/fminer',:full)) do |task| + if fminer.prediction_feature.feature_type == "regression" + raise OpenTox::BadRequestError.new "BBRC sampling is only for classification" + else + raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ + "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) + @value_map=fminer.training_dataset.value_map(fminer.prediction_feature.uri) + end + + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) + feature_dataset.add_metadata({ + DC.title => "BBRC sampled representatives for " + fminer.training_dataset.metadata[DC.title].to_s, + DC.creator => url_for('/fminer/bbrc/sample',:full), + OT.hasSource => url_for('/fminer/bbrc/sample', :full), + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + # TODO: add more params + ] + }) + feature_dataset.save(@subjectid) + + fminer.compounds = [] + fminer.db_class_sizes = Array.new # AM: effect + fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) + fminer.smi = [] # AM LAST: needed for matching the patterns back + + # Add data to fminer + fminer.add_fminer_data(@@last, @value_map) + + raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 + + # run bbrc-sample + features = Set.new + task.progress 10 + + + # matching + task.progress 90 + lu = LU.new # AM LAST: uses last-utils here + params[:nr_hits] == "true" ? hit_count=true: hit_count=false + matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations + + matches.each do |smarts, ids| + feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax + p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f + g=Array.new + @value_map.each { |y,act| g[y-1]=Array.new } + feat_hash.each { |x,y| g[y-1].push(x) } + max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) + effect = g.size-max + feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s + unless features.include? smarts + features << smarts + metadata = { + RDF.type => [OT.Feature, OT.Substructure], + OT.hasSource => feature_dataset.uri, + OT.smarts => smarts, + OT.pValue => p_value.abs, + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + } + feature_dataset.add_feature feature_uri, metadata + end + if !hit_count + ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)} + else + ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} + end + end + + # AM: add feature values for non-present features + # feature_dataset.complete_data_entries + + feature_dataset.save(@subjectid) + feature_dataset.uri + end + response['Content-Type'] = 'text/uri-list' + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" +end + # Run last algorithm on a dataset # # @param [String] dataset_uri URI of the training dataset @@ -312,14 +474,14 @@ post '/fminer/last/?' do end lu = LU.new # AM LAST: uses last-utils here - dom=lu.read(xml) # AM LAST: parse GraphML + dom=lu.read(xml) # AM LAST: parse GraphML smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) params[:nr_hits] == "true" ? hit_count=true: hit_count=false matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations matches.each do |smarts, ids| feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - if @@last.GetRegression() + if @@last.GetRegression() p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test effect = (p_value > 0) ? "activating" : "deactivating" else @@ -343,20 +505,20 @@ post '/fminer/last/?' do { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] - } + } feature_dataset.add_feature feature_uri, metadata end if !hit_count ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)} else - ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} + ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} end end # AM: add feature values for non-present features - # feature_dataset.complete_data_entries + # feature_dataset.complete_data_entries - feature_dataset.save(@subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3