From ef102b11ecdf9258f1942c9a25fa6db2772cfb89 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 26 Mar 2012 17:17:54 +0200 Subject: Deleted obsolete files --- balancer.rb | 98 -------------- feature_selection.rb | 93 ------------- fminer.rb | 370 --------------------------------------------------- lazar.rb | 259 ------------------------------------ openbabel.rb | 148 --------------------- similarity.rb | 26 ---- smarts.rb | 3 - 7 files changed, 997 deletions(-) delete mode 100644 balancer.rb delete mode 100644 feature_selection.rb delete mode 100644 fminer.rb delete mode 100644 lazar.rb delete mode 100644 openbabel.rb delete mode 100644 similarity.rb delete mode 100644 smarts.rb diff --git a/balancer.rb b/balancer.rb deleted file mode 100644 index 4ed2fd7..0000000 --- a/balancer.rb +++ /dev/null @@ -1,98 +0,0 @@ -# cuts a classification dataset into balanced pieces -# let inact_act_ratio := majority_class.size/minority_class.size -# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5 -# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds. - -class Balancer - - attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets - - # Supply a OpenTox::Dataset here - # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given - def initialize(dataset, feature_uri, creator_url) - @act_arr = [] - @inact_arr = [] - @inact_act_ratio = 1.0/0 # trick to define +infinity - @nr_majority_splits = 1 # +/-1 means: no split - @split = [] # splitted arrays with ids - @datasets = [] # result datasets - @errors = [] - - classification = true - if dataset.features.include?(feature_uri) - dataset.data.each do |i,a| - inchi = i - acts = a - acts.each do |act| - value = act[feature_uri] - if OpenTox::Utils.is_true?(value) - @act_arr << inchi - elsif OpenTox::Utils.classification?(value) - @inact_arr << inchi - else - classification = false - break; - end - end - end - @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression - set_nr_majority_splits - # perform majority split - @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1 - @split.each do |s| - new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr) - @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url) - end - - else - errors << "Feature not present in dataset." - end - errors << "Can not split regression dataset." unless classification - end - - - - # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values. - def set_nr_majority_splits - @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression - end - - # does the actual shuffle and split - def shuffle_split (arr) - arr = arr.shuffle - arr.chunk(@nr_majority_splits.abs) - end - - # turns a hash into a 2 col csv - def hsh2csv (hsh) - res="" - hsh.each do |k,v| - arr = [v,(@nr_majority_splits > 0 ? 0 : 1)] - res += arr.join(", ") + "\n" - end - res - end - -end - -class Array - - # cuts an array into chunks - returns a two-dimensional array - def chunk(pieces) - q, r = length.divmod(pieces) - (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \ - .map { |a, b| slice(a...b) } - end - - # shuffles the elements of an array - def shuffle( seed=nil ) - srand seed.to_i if seed - sort_by { Kernel.rand } - end - - # shuffels self - def shuffle!( seed=nil ) - self.replace shuffle( seed ) - end - -end diff --git a/feature_selection.rb b/feature_selection.rb deleted file mode 100644 index 556ccf8..0000000 --- a/feature_selection.rb +++ /dev/null @@ -1,93 +0,0 @@ -# Get list of feature_selection algorithms -# -# @return [text/uri-list] URIs of feature_selection algorithms -get '/feature_selection/?' do - list = [ url_for('/feature_selection/rfe', :full) ].join("\n") + "\n" - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html list - else - content_type 'text/uri-list' - list - end -end - -# Get RDF/XML representation of feature_selection rfe algorithm -# @return [application/rdf+xml] OWL-DL representation of feature_selection rfe algorithm -get "/feature_selection/rfe/?" do - algorithm = OpenTox::Algorithm::Generic.new(url_for('/feature_selection/rfe',:full)) - algorithm.metadata = { - DC.title => 'recursive feature elimination', - DC.creator => "andreas@maunz.de, helma@in-silico.ch", - DC.contributor => "vorgrimmlerdavid@gmx.de", - BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#feature_selection_rfe", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], - OT.parameters => [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature_uri" }, - { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" }, - { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" } - ] - } - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html algorithm.to_yaml - when /application\/x-yaml/ - content_type "application/x-yaml" - algorithm.to_yaml - else - response['Content-Type'] = 'application/rdf+xml' - algorithm.to_rdfxml - end -end - -# Run rfe algorithm on dataset -# -# @param [String] dataset_uri URI of the training dataset -# @param [String] feature_dataset_uri URI of the feature dataset -# @return [text/uri-list] Task URI -post '/feature_selection/rfe/?' do - - raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] - raise OpenTox::NotFoundError.new "Please submit a prediction_feature_uri." unless params[:prediction_feature_uri] - raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri] - - ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} ) - tf_ds=Tempfile.open(['rfe_', '.csv']) - tf_ds.puts(ds_csv) - tf_ds.flush() - - prediction_feature = params[:prediction_feature_uri].split('/').last # get col name - - fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"}) - tf_fds=Tempfile.open(['rfe_', '.csv']) - tf_fds.puts(fds_csv) - tf_fds.flush() - - del_missing = params[:del_missing] == "true" ? true : false - - task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| - r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } ) - - parser = OpenTox::Parser::Spreadsheets.new - ds = OpenTox::Dataset.new - ds.save - parser.dataset = ds - ds = parser.load_csv(File.open(r_result_file).read,false,true) - ds.save - r_result_uri = ds.uri - #r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri - begin - tf_ds.close!; tf_fds.close! - File.unlink(r_result_file) - rescue - end - r_result_uri - end - response['Content-Type'] = 'text/uri-list' - raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri.to_s+"\n" -end - diff --git a/fminer.rb b/fminer.rb deleted file mode 100644 index 95ce469..0000000 --- a/fminer.rb +++ /dev/null @@ -1,370 +0,0 @@ -ENV['FMINER_SMARTS'] = 'true' -ENV['FMINER_NO_AROMATIC'] = 'true' -ENV['FMINER_PVALUES'] = 'true' -ENV['FMINER_SILENT'] = 'true' -ENV['FMINER_NR_HITS'] = 'true' - -@@bbrc = Bbrc::Bbrc.new -@@last = Last::Last.new - -# Get list of fminer algorithms -# -# @return [text/uri-list] URIs of fminer algorithms -get '/fminer/?' do - list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html list - else - content_type 'text/uri-list' - list - end -end - -# Get RDF/XML representation of fminer bbrc algorithm -# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm -get "/fminer/bbrc/?" do - algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full)) - algorithm.metadata = { - DC.title => 'fminer backbone refinement class representatives', - DC.creator => "andreas@maunz.de, helma@in-silico.ch", - DC.contributor => "vorgrimmlerdavid@gmx.de", -# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], - OT.parameters => [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, - { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, - ] - } - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html algorithm.to_yaml - when /application\/x-yaml/ - content_type "application/x-yaml" - algorithm.to_yaml - else - response['Content-Type'] = 'application/rdf+xml' - algorithm.to_rdfxml - end -end - -# Get RDF/XML representation of fminer last algorithm -# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm -get "/fminer/last/?" do - algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full)) - algorithm.metadata = { - DC.title => 'fminer latent structure class representatives', - DC.creator => "andreas@maunz.de, helma@in-silico.ch", - DC.contributor => "vorgrimmlerdavid@gmx.de", -# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], - OT.parameters => [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, - ] - } - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html algorithm.to_yaml - when /application\/x-yaml/ - content_type "application/x-yaml" - algorithm.to_yaml - else - response['Content-Type'] = 'application/rdf+xml' - algorithm.to_rdfxml - end -end - -# Creates same features for dataset that have been created -# with fminer in dataset -# accept params[:nr_hits] as used in other fminer methods -post '/fminer/:method/match?' do - raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] - raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] - task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| - f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid - c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid - res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid - f_dataset.features.each do |f,m| - res_dataset.add_feature(f,m) - end - c_dataset.compounds.each do |c| - res_dataset.add_compound(c) - comp = OpenTox::Compound.new(c) - f_dataset.features.each do |f,m| - if params[:nr_hits] == "true" - hits = comp.match_hits([m[OT.smarts]]) - res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] - else - res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) - end - end - end - res_dataset.save @subjectid - res_dataset.uri - end - return_task(task) -end - -# Run bbrc algorithm on dataset -# -# @param [String] dataset_uri URI of the training dataset -# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) -# @param [optional] parameters BBRC parameters, accepted parameters are -# - min_frequency Minimum frequency (default 5) -# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") -# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") -# - min_chisq_significance Significance threshold (between 0 and 1) -# - nr_hits Set to "true" to get hit count instead of presence -# @return [text/uri-list] Task URI -post '/fminer/bbrc/?' do - - fminer=OpenTox::Algorithm::Fminer.new - fminer.check_params(params,5,@subjectid) - - task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do |task| - @@bbrc.Reset - if fminer.prediction_feature.feature_type == "regression" - @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ - "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) - @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort - @value_map=Hash.new - @training_classes.each_with_index { |c,i| @value_map[i+1] = c } - end - @@bbrc.SetMinfreq(fminer.minfreq) - @@bbrc.SetType(1) if params[:feature_type] == "paths" - @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean - @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] - @@bbrc.SetConsoleOut(false) - - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.add_metadata({ - DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s, - DC.creator => url_for('/fminer/bbrc',:full), - OT.hasSource => url_for('/fminer/bbrc', :full), - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - }) - feature_dataset.save(@subjectid) - - fminer.compounds = [] - fminer.db_class_sizes = Array.new # AM: effect - fminer.all_activities = Hash.new # DV: for effect calculation in regression part - fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - fminer.add_fminer_data(@@bbrc, params, @value_map) - - g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation - g_median=g_array.to_scale.median - - raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 - task.progress 10 - step_width = 80 / @@bbrc.GetNoRootNodes().to_f - features = Set.new - - # run @@bbrc - (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| - results = @@bbrc.MineRoot(j) - task.progress 10+step_width*(j+1) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - - if (!@@bbrc.GetRegression) - id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) - effect = f[2..-1].size-max - else #regression part - id_arrs = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - id=id.keys[0] # extract id from hit count hash - f_arr.push(fminer.all_activities[id]) - end - f_median=f_arr.to_scale.median - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end - - feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s - unless features.include? smarts - features << smarts - metadata = { - OT.hasSource => url_for('/fminer/bbrc', :full), - RDF.type => [OT.Feature, OT.Substructure], - OT.smarts => smarts, - OT.pValue => p_value.to_f, - OT.effect => effect, - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - } - feature_dataset.add_feature feature_uri, metadata - #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters - end - id_arrs.each { |id_count_hash| - id=id_count_hash.keys[0].to_i - count=id_count_hash.values[0].to_i - if params[:nr_hits] == "true" - feature_dataset.add(fminer.compounds[id], feature_uri, count) - else - feature_dataset.add(fminer.compounds[id], feature_uri, 1) - end - } - - end # end of - end # feature parsing - - # AM: add feature values for non-present features - # feature_dataset.complete_data_entries - - feature_dataset.save(@subjectid) - feature_dataset.uri - end - response['Content-Type'] = 'text/uri-list' - raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri.to_s+"\n" -end -#end - -# Run last algorithm on a dataset -# -# @param [String] dataset_uri URI of the training dataset -# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) -# @param [optional] parameters LAST parameters, accepted parameters are -# - min_frequency freq Minimum frequency (default 5) -# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") -# - hops Maximum number of hops -# - nr_hits Set to "true" to get hit count instead of presence -# @return [text/uri-list] Task URI -post '/fminer/last/?' do - - fminer=OpenTox::Algorithm::Fminer.new - fminer.check_params(params,80,@subjectid) - - task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do |task| - @@last.Reset - if fminer.prediction_feature.feature_type == "regression" - @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! - else - raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ - "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) - @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort - @value_map=Hash.new - @training_classes.each_with_index { |c,i| @value_map[i+1] = c } - end - @@last.SetMinfreq(fminer.minfreq) - @@last.SetType(1) if params[:feature_type] == "paths" - @@last.SetMaxHops(params[:hops]) if params[:hops] - @@last.SetConsoleOut(false) - - - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.add_metadata({ - DC.title => "LAST representatives for " + fminer.training_dataset.metadata[DC.title].to_s, - DC.creator => url_for('/fminer/last',:full), - OT.hasSource => url_for('/fminer/last', :full), - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - }) - feature_dataset.save(@subjectid) - - fminer.compounds = [] - fminer.db_class_sizes = Array.new # AM: effect - fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) - fminer.smi = [] # AM LAST: needed for matching the patterns back - - # Add data to fminer - fminer.add_fminer_data(@@last, params, @value_map) - - raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 - - # run @@last - features = Set.new - xml = "" - task.progress 10 - step_width = 80 / @@last.GetNoRootNodes().to_f - - (0 .. @@last.GetNoRootNodes()-1).each do |j| - results = @@last.MineRoot(j) - task.progress 10+step_width*(j+1) - results.each do |result| - xml << result - end - end - - lu = LU.new # AM LAST: uses last-utils here - dom=lu.read(xml) # AM LAST: parse GraphML - smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) - params[:nr_hits] == "true" ? hit_count=true: hit_count=false - matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations - - matches.each do |smarts, ids| - feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - if @@last.GetRegression() - p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test - effect = (p_value > 0) ? "activating" : "deactivating" - else - p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f - g=Array.new - @value_map.each { |y,act| g[y-1]=Array.new } - feat_hash.each { |x,y| g[y-1].push(x) } - max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) - effect = g.size-max - end - feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s - unless features.include? smarts - features << smarts - metadata = { - RDF.type => [OT.Feature, OT.Substructure], - OT.hasSource => feature_dataset.uri, - OT.smarts => smarts, - OT.pValue => p_value.abs, - OT.effect => effect, - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] - } - feature_dataset.add_feature feature_uri, metadata - end - if !hit_count - ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)} - else - ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} - end - end - - # AM: add feature values for non-present features - # feature_dataset.complete_data_entries - - feature_dataset.save(@subjectid) - feature_dataset.uri - end - response['Content-Type'] = 'text/uri-list' - raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri.to_s+"\n" -end diff --git a/lazar.rb b/lazar.rb deleted file mode 100644 index 1fe28f8..0000000 --- a/lazar.rb +++ /dev/null @@ -1,259 +0,0 @@ -@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc") - -# Get RDF/XML representation of the lazar algorithm -# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm -get '/lazar/?' do - algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full)) - algorithm.metadata = { - DC.title => 'lazar', - DC.creator => "helma@in-silico.ch, andreas@maunz.de", - DC.contributor => "vorgrimmlerdavid@gmx.de", -# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar", - OT.parameters => [ - { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, - { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, - { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" } - ] - } - case request.env['HTTP_ACCEPT'] - when /text\/html/ - content_type "text/html" - OpenTox.text_to_html algorithm.to_yaml - when /application\/x-yaml/ - content_type "application/x-yaml" - algorithm.to_yaml - else - response['Content-Type'] = 'application/rdf+xml' - algorithm.to_rdfxml - end -end - -# Create a lazar prediction model -# @param [String] dataset_uri Training dataset URI -# @param [optional,String] prediction_feature URI of the feature to be predicted -# @param [optional,String] feature_generation_uri URI of the feature generation algorithm -# @param [optional,String] - further parameters for the feature generation service -# @return [text/uri-list] Task URI -post '/lazar/?' do - - LOGGER.debug "building lazar model with params: "+params.inspect - params[:subjectid] = @subjectid - raise OpenTox::NotFoundError.new "No dataset_uri parameter." unless params[:dataset_uri] - dataset_uri = params[:dataset_uri] - - task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| - - - # # # Dataset present, prediction feature present? - raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all(@subjectid) - - # Prediction Feature - prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) - unless params[:prediction_feature] # try to read prediction_feature from dataset - raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 - prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) - params[:prediction_feature] = prediction_feature.uri # pass to feature mining service - end - raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) - - # Feature Generation URI - feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) ) - - # Create instance - lazar = OpenTox::Model::Lazar.new - - - - - # # # ENDPOINT RELATED - - # Default Values - # Classification: Weighted Majority, Substructure.match - if prediction_feature.feature_type == "classification" - @training_classes = training_activities.accept_values(prediction_feature.uri).sort - @training_classes.each_with_index { |c,i| - lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later. - params[:value_map] = lazar.value_map - } - # Regression: SVM, Substructure.match_hits - elsif prediction_feature.feature_type == "regression" - lazar.feature_calculation_algorithm = "Substructure.match_hits" - lazar.prediction_algorithm = "Neighbors.local_svm_regression" - end - - - - - # # # USER VALUES - - # Min Sim - min_sim = params[:min_sim].to_f if params[:min_sim] - min_sim = 0.3 unless params[:min_sim] - - # Algorithm - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] - - # Nr Hits - nr_hits = false - if params[:nr_hits] == "true" || lazar.prediction_algorithm.include?("local_svm") - lazar.feature_calculation_algorithm = "Substructure.match_hits" - nr_hits = true - end - params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed - - # Propositionalization - propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true) - - # PC type - pc_type = params[:pc_type] unless params[:pc_type].nil? - - # Min train performance - min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] - min_train_performance = 0.1 unless params[:min_train_performance] - - - - - - - task.progress 10 - - - - - - # # # Features - - # Read Features - if params[:feature_dataset_uri] - lazar.feature_calculation_algorithm = "Substructure.lookup" - feature_dataset_uri = params[:feature_dataset_uri] - training_features = OpenTox::Dataset.new(feature_dataset_uri) - if training_features.feature_type(@subjectid) == "regression" - lazar.similarity_algorithm = "Similarity.cosine" - min_sim = 0.4 unless params[:min_sim] - raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type] - end - - # Create Features - else - params[:feature_generation_uri] = feature_generation_uri - params[:subjectid] = @subjectid - prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid - if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) - params[:feature_type] = "paths" unless params[:feature_type] - end - feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params, OpenTox::SubTask.new(task,10,70)).to_s - training_features = OpenTox::Dataset.new(feature_dataset_uri) - end - - - - # # # Write fingerprints - training_features.load_all(@subjectid) - raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil? - - training_features.data_entries.each do |compound,entry| - if training_activities.data_entries.has_key? compound - - lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] - entry.keys.each do |feature| - - # CASE 1: Substructure - if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits") - if training_features.features[feature] - smarts = training_features.features[feature][OT.smarts] - #lazar.fingerprints[compound] << smarts - if lazar.feature_calculation_algorithm == "Substructure.match_hits" - lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] - else - lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] - end - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature][OT.pValue] - lazar.effects[smarts] = training_features.features[feature][OT.effect] - end - end - - # CASE 2: Others - elsif entry[feature].flatten.size == 1 - lazar.fingerprints[compound][feature] = entry[feature].flatten.first - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - end - - end - end - task.progress 80 - - - - - - # # # Activities - - if prediction_feature.feature_type == "regression" - training_activities.data_entries.each do |compound,entry| - lazar.activities[compound] = [] unless lazar.activities[compound] - unless entry[prediction_feature.uri].empty? - entry[prediction_feature.uri].each do |value| - lazar.activities[compound] << value - end - end - end - elsif prediction_feature.feature_type == "classification" - training_activities.data_entries.each do |compound,entry| - lazar.activities[compound] = [] unless lazar.activities[compound] - unless entry[prediction_feature.uri].empty? - entry[prediction_feature.uri].each do |value| - lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals - end - end - end - end - task.progress 90 - - - - - # Metadata - - lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" - lazar.metadata[OT.dependentVariables] = prediction_feature.uri - lazar.metadata[OT.trainingDataset] = dataset_uri - lazar.metadata[OT.featureDataset] = feature_dataset_uri - case training_activities.feature_type(@subjectid) - when "classification" - lazar.metadata[RDF.type] = [OT.Model, OTA.ClassificationLazySingleTarget] - when "regression" - lazar.metadata[RDF.type] = [OT.Model, OTA.RegressionLazySingleTarget] - end - - lazar.metadata[OT.parameters] = [ - {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, - {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri}, - {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}, - {DC.title => "propositionalized", OT.paramValue => propositionalized}, - {DC.title => "pc_type", OT.paramValue => pc_type}, - {DC.title => "nr_hits", OT.paramValue => nr_hits}, - {DC.title => "min_sim", OT.paramValue => min_sim}, - {DC.title => "min_train_performance", OT.paramValue => min_train_performance}, - - ] - - model_uri = lazar.save(@subjectid) - LOGGER.info model_uri + " created #{Time.now}" - model_uri - - end - response['Content-Type'] = 'text/uri-list' - raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri -end - diff --git a/openbabel.rb b/openbabel.rb deleted file mode 100644 index 463663e..0000000 --- a/openbabel.rb +++ /dev/null @@ -1,148 +0,0 @@ -OBMOL_METHODS = { - "NumAtoms" => "Number of atoms", - "NumBonds" => "Number of bonds", - "NumHvyAtoms" => "Number of heavy atoms", - "NumResidues" => "Number of residues", - "NumRotors" => "Number of rotatable bonds", - "GetEnergy" => "Heat of formation for this molecule (in kcal/mol)", - "GetMolWt" => "Standard molar mass given by IUPAC atomic masses (amu)", - "GetExactMass" => "Mass given by isotopes (or most abundant isotope, if not specified)", - "GetTotalCharge" => "Total charge", -} - -OBDESCRIPTOR_METHODS = { - "HBA1" => "Number of hydrogen bond acceptors 1 (JoelLib)", - "HBA2" => "Number of hydrogen bond acceptors 2 (JoelLib)", - "HBD" => "Number of hydrogen bond donors (JoelLib)", - "L5" => "Lipinski rule of five", - "logP" => "Octanol/water partition coefficient", - "MR" => "Molar refractivity", - "MW" => "Molecular weight", - "nF" => "Number of fluorine atoms", - "nHal" => "Number of halogen atoms", - "spinMult" => "Total spin multiplicity", - "TPSA" => "Topological polar surface area", -} - -# Get a list of OpenBabel algorithms -# @return [text/uri-list] URIs of OpenBabel algorithms -get '/openbabel' do - algorithms = OBMOL_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)} - algorithms << OBDESCRIPTOR_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)} - response['Content-Type'] = 'text/uri-list' - algorithms.join("\n") -end - -# Get RDF/XML representation of OpenBabel algorithm -# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm -get '/openbabel/:property' do - description = OBMOL_METHODS[params[:property]] if OBMOL_METHODS.include? params[:property] - description = OBDESCRIPTOR_METHODS[params[:property]] if OBDESCRIPTOR_METHODS.include? params[:property] - if description - algorithm = OpenTox::Algorithm::Generic.new(url_for("/openbabel/#{params[:property]}",:full)) - algorithm.metadata = { - DC.title => params[:property], - DC.creator => "helma@in-silico.ch", - DC.description => description, - RDF.type => [OTA.DescriptorCalculation], - } - response['Content-Type'] = 'application/rdf+xml' - algorithm.to_rdfxml - else - raise OpenTox::NotFoundError.new "Unknown OpenBabel descriptor #{params[:property]}." - end -end - -# Calculate OpenBabel descriptors -# Supports the following OpenBabel methods (see OpenBabel API http://openbabel.org/api/2.2.0/) -# - NumAtoms Number of atoms -# - NumBonds Number of bonds -# - NumHvyAtoms Number of heavy atoms -# - NumResidues Number of residues -# - NumRotors Number of rotatable bonds -# - GetEnergy Heat of formation for this molecule (in kcal/mol) -# - GetMolWt Standard molar mass given by IUPAC atomic masses (amu) -# - GetExactMass Mass given by isotopes (or most abundant isotope, if not specified) -# - GetTotalCharge Total charge -# - HBA1 Number of hydrogen bond acceptors 1 (JoelLib) -# - HBA2 Number of hydrogen bond acceptors 2 (JoelLib) -# - HBD Number of hydrogen bond donors (JoelLib) -# - L5 Lipinski rule of five -# - logP Octanol/water partition coefficient -# - MR Molar refractivity -# - MW Molecular weight -# - nF Number of fluorine atoms -# - nHal Number of halogen atoms -# - spinMult Total spin multiplicity -# - TPSA Topological polar surface area -# @param [String] compound_uri Compound URI -# @return [String] descriptor value -post '/openbabel/:property' do - obconversion = OpenBabel::OBConversion.new - obmol = OpenBabel::OBMol.new - compound = OpenTox::Compound.new params[:compound_uri] - obconversion.set_in_and_out_formats 'inchi', 'can' - obconversion.read_string obmol, compound.to_inchi - if OBMOL_METHODS.keys.include? params[:property] - eval("obmol.#{params[:property].underscore}").to_s - elsif OBDESCRIPTOR_METHODS.keys.include? params[:property] - descriptor = OpenBabel::OBDescriptor.find_type(params[:property]) - descriptor.predict(obmol).to_s - else - raise OpenTox::NotFoundError.new "Cannot calculate property #{params[:property]} with OpenBabel" - end -end - -# Calculate all OpenBabel descriptors for a dataset -# @param [String] dataset_uri Dataset URI -# @return [text/uri-list] Task URI -post '/openbabel' do - task = OpenTox::Task.create("Calculating OpenBabel descriptors for #{params[:dataset_uri]}", url_for('/openbabel',:full)) do - - dataset = OpenTox::Dataset.find(params[:dataset_uri]) - result_dataset = OpenTox::Dataset.create - result_dataset.add_metadata({ - DC.title => "OpenBabel descriptors for " + dataset.metadata[DC.title].to_s, - DC.creator => url_for('/openbabel',:full), - OT.hasSource => url_for('/openbabel', :full), - OT.parameters => [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - ] - }) - - obconversion = OpenBabel::OBConversion.new - obmol = OpenBabel::OBMol.new - obconversion.set_in_and_out_formats 'inchi', 'can' - - OBMOL_METHODS.merge(OBDESCRIPTOR_METHODS).each do |name,description| - feature_uri = File.join result_dataset.uri, "feature", "openbabel", name - metadata = { - OT.hasSource => url_for("/openbabel/#{name}", :full), - DC.description => description, - DC.title => name, - } - result_dataset.add_feature feature_uri, metadata - end - - dataset.compounds.each do |compound_uri| - compound = OpenTox::Compound.new(compound_uri) - obconversion.read_string obmol, compound.to_inchi - #result_dataset.add_compound compound_uri - OBMOL_METHODS.keys.each do |name| - feature_uri = File.join result_dataset.uri, "feature", "openbabel", name - value = eval("obmol.#{name.underscore}").to_f - result_dataset.add compound_uri, feature_uri, value - end - OBDESCRIPTOR_METHODS.keys.each do |name| - feature_uri = File.join result_dataset.uri, "feature", "openbabel", name - value = OpenBabel::OBDescriptor.find_type(params[:property]).predict(obmol).to_f - result_dataset.add compound_uri, feature_uri, value - end - end - result_dataset.save - result_dataset.uri - end - response['Content-Type'] = 'text/uri-list' - raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" - halt 202,task.uri.to_s+"\n" -end diff --git a/similarity.rb b/similarity.rb deleted file mode 100644 index faf43f9..0000000 --- a/similarity.rb +++ /dev/null @@ -1,26 +0,0 @@ -require File.join(File.dirname(__FILE__),'dataset.rb') - -helpers do -def find - # + charges are dropped - uri = uri(params[:splat].first.gsub(/(InChI.*) (.*)/,'\1+\2')) # reinsert dropped '+' signs in InChIs - raise OpenTox::NotFoundError.new "Dataset \"#{uri}\" not found." unless @set = Dataset.find(uri) -end - -def uri(name) - name = URI.encode(name) - uri = File.join Dataset.base_uri, name - end -end - -get '/tanimoto/dataset/*/dataset/*/?' do - find - @set.tanimoto(uri(params[:splat][1])) -end - -get '/weighted_tanimoto/dataset/*/dataset/*/?' do - find - @set.weighted_tanimoto(uri(params[:splat][1])) -end - - diff --git a/smarts.rb b/smarts.rb deleted file mode 100644 index 4ae6949..0000000 --- a/smarts.rb +++ /dev/null @@ -1,3 +0,0 @@ -get '/match/compound/*/smarts/*/?' do - "#{OpenTox::Compound.from_inchi(params[:splat][0]).match?(params[:splat][1])}" -end -- cgit v1.2.3