=begin * Name: fminer.rb * Description: Subgraph descriptor calculation * Author: Andreas Maunz * Date: 10/2012 =end ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' ENV['FMINER_SILENT'] = 'true' ENV['FMINER_NR_HITS'] = 'true' @@bbrc = Bbrc::Bbrc.new @@last = Last::Last.new module OpenTox class Application < Service # Get list of fminer algorithms # @return [text/uri-list] URIs get '/fminer/?' do render [ uri('/fminer/bbrc'), uri('/fminer/last') ] end # Get representation of BBRC algorithm # @return [String] Representation get "/fminer/bbrc/?" do algorithm = OpenTox::Algorithm::Generic.new(to('/fminer/bbrc',:full)) algorithm.metadata = { RDF::DC.title => 'Backbone Refinement Class Representatives', RDF::DC.creator => "andreas@maunz.de", RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" }, { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" }, { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" }, { RDF::DC.description => "Significance threshold (between 0 and 1)", RDF::OT.paramScope => "optional", RDF::DC.title => "min_chisq_significance" }, { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" }, { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" } ] render(algorithm) end # Get representation of BBRC-sample algorithm # @return [String] Representation get "/fminer/bbrc/sample/?" do algorithm = OpenTox::Algorithm::Generic.new(to('/fminer/bbrc/sample',:full)) algorithm.metadata = { RDF::DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset', RDF::DC.creator => "andreas@maunz.de", RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, { RDF::DC.description => "Number of bootstrap samples", RDF::OT.paramScope => "optional", RDF::DC.title => "num_boots" }, { RDF::DC.description => "Minimum sampling support", RDF::OT.paramScope => "optional", RDF::DC.title => "min_sampling_support" }, { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" }, { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" }, { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" }, { RDF::DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", RDF::OT.paramScope => "optional", RDF::DC.title => "method" } ] render(algorithm) end # Get representation of fminer LAST-PM algorithm # @return [String] Representation get "/fminer/last/?" do algorithm = OpenTox::Algorithm::Generic.new(to('/fminer/last',:full)) algorithm.metadata = { RDF::DC.title => 'Latent Structure Pattern Mining descriptors', RDF::DC.creator => "andreas@maunz.de", RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" }, { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" }, { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" }, { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" } ] render(algorithm) end # Get representation of matching algorithm # @return [String] Representation get "/fminer/:method/match?" do algorithm = OpenTox::Algorithm::Generic.new(to("/fminer/#{params[:method]}/match",:full)) algorithm.metadata = { RDF::DC.title => 'fminer feature matching', RDF::DC.creator => "mguetlein@gmail.com, andreas@maunz.de", RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, { RDF::DC.description => "Feature Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "feature_dataset_uri" }, { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" } ] render(algorithm) end # Run last algorithm on a dataset # # @param [String] dataset_uri URI of the training dataset # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) # @param [optional] parameters LAST parameters, accepted parameters are # - min_frequency freq Minimum frequency (default 5) # - feature_type Feature type, can be 'paths' or 'trees' (default "trees") # - nr_hits Set to "true" to get hit count instead of presence # - get_target Set to "true" to obtain target variable as feature # @return [text/uri-list] Task URI post '/fminer/last/?' do @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/last',:full)) @@fminer.check_params(params,80) task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last')) do |task| @@last.Reset if @@fminer.prediction_feature.feature_type == "regression" @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else bad_request_error "No accept values for "\ "dataset '#{fminer.training_dataset.uri}' and "\ "feature '#{fminer.prediction_feature.uri}'" unless @@fminer.prediction_feature.accept_values value_map=@@fminer.prediction_feature.value_map end @@last.SetMinfreq(@@fminer.minfreq) @@last.SetType(1) if params[:feature_type] == "paths" @@last.SetConsoleOut(false) feature_dataset = OpenTox::Dataset.new feature_dataset.metadata = { RDF::DC.title => "LAST representatives for #{@@fminer.training_dataset.title}", RDF::DC.creator => to('/fminer/last'), RDF::OT.hasSource => to('/fminer/last') } feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }, { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] }, { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq }, { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } ] @@fminer.compounds = [] @@fminer.db_class_sizes = Array.new # AM: effect @@fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) @@fminer.smi = [] # needed for matching the patterns back # Add data to fminer @@fminer.add_fminer_data(@@last, value_map) #task.progress 10 step_width = 80 / @@bbrc.GetNoRootNodes().to_f # run @@last xml = "" (0 .. @@last.GetNoRootNodes()-1).each do |j| results = @@last.MineRoot(j) #task.progress 10+step_width*(j+1) results.each do |result| xml << result end end lu = LU.new # uses last-utils here dom=lu.read(xml) # parse GraphML smarts=lu.smarts_rb(dom,'nls') # converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) params[:nr_hits] == "true" ? hit_count=true : hit_count=false matches, counts = lu.match_rb(@@fminer.smi,smarts,hit_count,true) # creates instantiations features = [] # prepare to receive results as hash { c => [ [f,v], ... ] } fminer_results = {} matches.each do |smarts, ids| metadata, parameters = @@fminer.calc_metadata(smarts, ids, counts[smarts], @@last, nil, value_map, params) metadata[RDF::DC.title] = smarts.dup feature = OpenTox::Feature.find_or_create(metadata) features << feature ids.each_with_index { |id,idx| fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {} fminer_results[@@fminer.compounds[id]][feature.uri] = counts[smarts][idx] } end fminer_compounds = @@fminer.training_dataset.compounds prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] } fminer_noact_compounds = fminer_compounds - @@fminer.compounds feature_dataset.features = features if (params[:get_target] == "true") feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features end fminer_compounds.each_with_index { |c,idx| # TODO: fix value insertion row = [ c ] if (params[:get_target] == "true") row = row + [ prediction_feature_all_acts[idx] ] end features.each { |f| row << (fminer_results[c] ? fminer_results[c][f.uri] : nil) } row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c feature_dataset << row } feature_dataset.put feature_dataset.uri end response['Content-Type'] = 'text/uri-list' halt 202,task.uri end end end