diff options
Diffstat (limited to 'fminer.rb')
-rw-r--r-- | fminer.rb | 370 |
1 files changed, 0 insertions, 370 deletions
diff --git a/fminer.rb b/fminer.rb deleted file mode 100644 index 95ce469..0000000 --- a/fminer.rb +++ /dev/null @@ -1,370 +0,0 @@ -ENV['FMINER_SMARTS'] = 'true' -ENV['FMINER_NO_AROMATIC'] = 'true' -ENV['FMINER_PVALUES'] = 'true' -ENV['FMINER_SILENT'] = 'true' -ENV['FMINER_NR_HITS'] = 'true' - -@@bbrc = Bbrc::Bbrc.new -@@last = Last::Last.new - -# Get list of fminer algorithms -# -# @return [text/uri-list] URIs of fminer algorithms -get '/fminer/?' do - list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html list - else - content_type 'text/uri-list' - list - end -end - -# Get RDF/XML representation of fminer bbrc algorithm -# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm -get "/fminer/bbrc/?" do - algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full)) - algorithm.metadata = { - DC.title => 'fminer backbone refinement class representatives', - DC.creator => "andreas@maunz.de, helma@in-silico.ch", - DC.contributor => "vorgrimmlerdavid@gmx.de", -# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], - OT.parameters => [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, - { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, - ] - } - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html algorithm.to_yaml - when /application\/x-yaml/ - content_type "application/x-yaml" - algorithm.to_yaml - else - response['Content-Type'] = 'application/rdf+xml' - algorithm.to_rdfxml - end -end - -# Get RDF/XML representation of fminer last algorithm -# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm -get "/fminer/last/?" do - algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full)) - algorithm.metadata = { - DC.title => 'fminer latent structure class representatives', - DC.creator => "andreas@maunz.de, helma@in-silico.ch", - DC.contributor => "vorgrimmlerdavid@gmx.de", -# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], - OT.parameters => [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, - ] - } - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html algorithm.to_yaml - when /application\/x-yaml/ - content_type "application/x-yaml" - algorithm.to_yaml - else - response['Content-Type'] = 'application/rdf+xml' - algorithm.to_rdfxml - end -end - -# Creates same features for dataset <dataset_uri> that have been created -# with fminer in dataset <feature_dataset_uri> -# accept params[:nr_hits] as used in other fminer methods -post '/fminer/:method/match?' do - raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] - raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] - task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| - f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid - c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid - res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid - f_dataset.features.each do |f,m| - res_dataset.add_feature(f,m) - end - c_dataset.compounds.each do |c| - res_dataset.add_compound(c) - comp = OpenTox::Compound.new(c) - f_dataset.features.each do |f,m| - if params[:nr_hits] == "true" - hits = comp.match_hits([m[OT.smarts]]) - res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] - else - res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) - end - end - end - res_dataset.save @subjectid - res_dataset.uri - end - return_task(task) -end - -# Run bbrc algorithm on dataset -# -# @param [String] dataset_uri URI of the training dataset -# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) -# @param [optional] parameters BBRC parameters, accepted parameters are -# - min_frequency Minimum frequency (default 5) -# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") -# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") -# - min_chisq_significance Significance threshold (between 0 and 1) -# - nr_hits Set to "true" to get hit count instead of presence -# @return [text/uri-list] Task URI -post '/fminer/bbrc/?' do - - fminer=OpenTox::Algorithm::Fminer.new - fminer.check_params(params,5,@subjectid) - - task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do |task| - @@bbrc.Reset - if fminer.prediction_feature.feature_type == "regression" - @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ - "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) - @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort - @value_map=Hash.new - @training_classes.each_with_index { |c,i| @value_map[i+1] = c } - end - @@bbrc.SetMinfreq(fminer.minfreq) - @@bbrc.SetType(1) if params[:feature_type] == "paths" - @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean - @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] - @@bbrc.SetConsoleOut(false) - - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.add_metadata({ - DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s, - DC.creator => url_for('/fminer/bbrc',:full), - OT.hasSource => url_for('/fminer/bbrc', :full), - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - }) - feature_dataset.save(@subjectid) - - fminer.compounds = [] - fminer.db_class_sizes = Array.new # AM: effect - fminer.all_activities = Hash.new # DV: for effect calculation in regression part - fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - fminer.add_fminer_data(@@bbrc, params, @value_map) - - g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation - g_median=g_array.to_scale.median - - raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 - task.progress 10 - step_width = 80 / @@bbrc.GetNoRootNodes().to_f - features = Set.new - - # run @@bbrc - (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| - results = @@bbrc.MineRoot(j) - task.progress 10+step_width*(j+1) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - - if (!@@bbrc.GetRegression) - id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) - effect = f[2..-1].size-max - else #regression part - id_arrs = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - id=id.keys[0] # extract id from hit count hash - f_arr.push(fminer.all_activities[id]) - end - f_median=f_arr.to_scale.median - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end - - feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s - unless features.include? smarts - features << smarts - metadata = { - OT.hasSource => url_for('/fminer/bbrc', :full), - RDF.type => [OT.Feature, OT.Substructure], - OT.smarts => smarts, - OT.pValue => p_value.to_f, - OT.effect => effect, - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - } - feature_dataset.add_feature feature_uri, metadata - #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters - end - id_arrs.each { |id_count_hash| - id=id_count_hash.keys[0].to_i - count=id_count_hash.values[0].to_i - if params[:nr_hits] == "true" - feature_dataset.add(fminer.compounds[id], feature_uri, count) - else - feature_dataset.add(fminer.compounds[id], feature_uri, 1) - end - } - - end # end of - end # feature parsing - - # AM: add feature values for non-present features - # feature_dataset.complete_data_entries - - feature_dataset.save(@subjectid) - feature_dataset.uri - end - response['Content-Type'] = 'text/uri-list' - raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri.to_s+"\n" -end -#end - -# Run last algorithm on a dataset -# -# @param [String] dataset_uri URI of the training dataset -# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) -# @param [optional] parameters LAST parameters, accepted parameters are -# - min_frequency freq Minimum frequency (default 5) -# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") -# - hops Maximum number of hops -# - nr_hits Set to "true" to get hit count instead of presence -# @return [text/uri-list] Task URI -post '/fminer/last/?' do - - fminer=OpenTox::Algorithm::Fminer.new - fminer.check_params(params,80,@subjectid) - - task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do |task| - @@last.Reset - if fminer.prediction_feature.feature_type == "regression" - @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ - "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) - @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort - @value_map=Hash.new - @training_classes.each_with_index { |c,i| @value_map[i+1] = c } - end - @@last.SetMinfreq(fminer.minfreq) - @@last.SetType(1) if params[:feature_type] == "paths" - @@last.SetMaxHops(params[:hops]) if params[:hops] - @@last.SetConsoleOut(false) - - - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.add_metadata({ - DC.title => "LAST representatives for " + fminer.training_dataset.metadata[DC.title].to_s, - DC.creator => url_for('/fminer/last',:full), - OT.hasSource => url_for('/fminer/last', :full), - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - }) - feature_dataset.save(@subjectid) - - fminer.compounds = [] - fminer.db_class_sizes = Array.new # AM: effect - fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) - fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - fminer.add_fminer_data(@@last, params, @value_map) - - raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 - - # run @@last - features = Set.new - xml = "" - task.progress 10 - step_width = 80 / @@last.GetNoRootNodes().to_f - - (0 .. @@last.GetNoRootNodes()-1).each do |j| - results = @@last.MineRoot(j) - task.progress 10+step_width*(j+1) - results.each do |result| - xml << result - end - end - - lu = LU.new # AM LAST: uses last-utils here - dom=lu.read(xml) # AM LAST: parse GraphML - smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) - params[:nr_hits] == "true" ? hit_count=true: hit_count=false - matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations - - matches.each do |smarts, ids| - feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - if @@last.GetRegression() - p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test - effect = (p_value > 0) ? "activating" : "deactivating" - else - p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f - g=Array.new - @value_map.each { |y,act| g[y-1]=Array.new } - feat_hash.each { |x,y| g[y-1].push(x) } - max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) - effect = g.size-max - end - feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s - unless features.include? smarts - features << smarts - metadata = { - RDF.type => [OT.Feature, OT.Substructure], - OT.hasSource => feature_dataset.uri, - OT.smarts => smarts, - OT.pValue => p_value.abs, - OT.effect => effect, - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - } - feature_dataset.add_feature feature_uri, metadata - end - if !hit_count - ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)} - else - ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} - end - end - - # AM: add feature values for non-present features - # feature_dataset.complete_data_entries - - feature_dataset.save(@subjectid) - feature_dataset.uri - end - response['Content-Type'] = 'text/uri-list' - raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri.to_s+"\n" -end |