Deleted obsolete files

author: Andreas Maunz <andreas@maunz.de> 2012-03-26 17:17:54 +0200
committer: Andreas Maunz <andreas@maunz.de> 2012-03-26 17:17:54 +0200
commit: ef102b11ecdf9258f1942c9a25fa6db2772cfb89 (patch)
tree: 44e3e1029695c709f86ec4e82f3a345a9793520d
parent: 9cf07c58e6b5c51ff78ea56574ddf90308861ce2 (diff)
7 files changed, 0 insertions, 997 deletions
diff --git a/balancer.rb b/balancer.rb
deleted file mode 100644
index 4ed2fd7..0000000
--- a/balancer.rb
+++ /dev/null
@@ -1,98 +0,0 @@
-# cuts a classification dataset into balanced pieces
-# let inact_act_ratio := majority_class.size/minority_class.size 
-# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5
-# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds.
-
-class Balancer
-
-  attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets
-
-  # Supply a OpenTox::Dataset here
-  # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given
-  def initialize(dataset, feature_uri, creator_url)
-    @act_arr = []
-    @inact_arr = []
-    @inact_act_ratio = 1.0/0  # trick to define +infinity
-    @nr_majority_splits = 1   # +/-1 means: no split
-    @split = []               # splitted arrays with ids
-    @datasets = []            # result datasets
-    @errors = []
-
-    classification = true
-    if dataset.features.include?(feature_uri)
-      dataset.data.each do |i,a|
-        inchi = i
-        acts = a
-        acts.each do |act|
-          value = act[feature_uri]
-          if OpenTox::Utils.is_true?(value)
-            @act_arr << inchi
-          elsif OpenTox::Utils.classification?(value)
-            @inact_arr << inchi
-          else
-            classification = false
-            break;
-          end
-        end
-      end
-      @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression
-      set_nr_majority_splits
-      # perform majority split
-      @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1
-      @split.each do |s|
-        new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr)
-        @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url)
-      end
-
-    else
-      errors << "Feature not present in dataset."
-    end
-    errors << "Can not split regression dataset." unless classification
-  end
-
-
-
-  # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values.
-  def set_nr_majority_splits
-    @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression
-  end
-
-  # does the actual shuffle and split
-  def shuffle_split (arr)
-    arr = arr.shuffle
-    arr.chunk(@nr_majority_splits.abs)
-  end
-
-  # turns a hash into a 2 col csv
-  def hsh2csv (hsh)
-    res=""
-    hsh.each do |k,v|
-      arr = [v,(@nr_majority_splits > 0 ? 0 : 1)]
-      res += arr.join(", ") + "\n"
-    end
-    res
-  end
-
-end
-
-class Array
-
-  # cuts an array into <num-pieces> chunks - returns a two-dimensional array
-  def chunk(pieces)
-    q, r = length.divmod(pieces)
-    (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \
-      .map { |a, b| slice(a...b) }
-  end
-
-  # shuffles the elements of an array
-  def shuffle( seed=nil )
-    srand seed.to_i if seed
-    sort_by { Kernel.rand }
-  end
-
-  # shuffels self
-  def shuffle!( seed=nil )
-    self.replace shuffle( seed )
-  end
-
-end
diff --git a/feature_selection.rb b/feature_selection.rb
deleted file mode 100644
index 556ccf8..0000000
--- a/feature_selection.rb
+++ /dev/null
@@ -1,93 +0,0 @@
-# Get list of feature_selection algorithms
-#
-# @return [text/uri-list] URIs of feature_selection algorithms
-get '/feature_selection/?' do
-  list = [ url_for('/feature_selection/rfe', :full) ].join("\n") + "\n"
-  case request.env['HTTP_ACCEPT']
-  when /text\/html/
-    content_type "text/html"
-    OpenTox.text_to_html list
-  else
-    content_type 'text/uri-list'
-    list
-  end
-end
-
-# Get RDF/XML representation of feature_selection rfe algorithm
-# @return [application/rdf+xml] OWL-DL representation of feature_selection rfe algorithm
-get "/feature_selection/rfe/?" do
-  algorithm = OpenTox::Algorithm::Generic.new(url_for('/feature_selection/rfe',:full))
-  algorithm.metadata = {
-    DC.title => 'recursive feature elimination',
-    DC.creator => "andreas@maunz.de, helma@in-silico.ch",
-    DC.contributor => "vorgrimmlerdavid@gmx.de",
-    BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#feature_selection_rfe",
-    RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
-    OT.parameters => [
-      { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
-      { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature_uri" },
-      { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" },
-      { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" }
-  ]
-  }
-  case request.env['HTTP_ACCEPT']
-  when /text\/html/
-    content_type "text/html"
-    OpenTox.text_to_html algorithm.to_yaml
-  when /application\/x-yaml/
-    content_type "application/x-yaml"
-    algorithm.to_yaml
-  else
-    response['Content-Type'] = 'application/rdf+xml'  
-    algorithm.to_rdfxml
-  end
-end
-
-# Run rfe algorithm on dataset
-#
-# @param [String] dataset_uri URI of the training dataset
-# @param [String] feature_dataset_uri URI of the feature dataset
-# @return [text/uri-list] Task URI
-post '/feature_selection/rfe/?' do 
-
-  raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri]
-  raise OpenTox::NotFoundError.new "Please submit a prediction_feature_uri." unless params[:prediction_feature_uri]
-  raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri]
-
-  ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} )
-  tf_ds=Tempfile.open(['rfe_', '.csv'])
-  tf_ds.puts(ds_csv)
-  tf_ds.flush()
-
-  prediction_feature = params[:prediction_feature_uri].split('/').last # get col name
-  
-  fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"})
-  tf_fds=Tempfile.open(['rfe_', '.csv'])
-  tf_fds.puts(fds_csv)
-  tf_fds.flush()
-
-  del_missing = params[:del_missing] == "true" ? true : false
-
-  task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task|
-    r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } )
-    
-    parser = OpenTox::Parser::Spreadsheets.new
-    ds = OpenTox::Dataset.new
-    ds.save
-    parser.dataset = ds
-    ds = parser.load_csv(File.open(r_result_file).read,false,true)
-    ds.save    
-    r_result_uri = ds.uri
-    #r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri
-    begin
-      tf_ds.close!; tf_fds.close! 
-      File.unlink(r_result_file)
-    rescue
-    end
-    r_result_uri
-  end
-  response['Content-Type'] = 'text/uri-list'
-  raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
-  halt 202,task.uri.to_s+"\n"
-end
-
diff --git a/fminer.rb b/fminer.rb
deleted file mode 100644
index 95ce469..0000000
--- a/fminer.rb
+++ /dev/null
@@ -1,370 +0,0 @@
-ENV['FMINER_SMARTS'] = 'true'
-ENV['FMINER_NO_AROMATIC'] = 'true'
-ENV['FMINER_PVALUES'] = 'true'
-ENV['FMINER_SILENT'] = 'true'
-ENV['FMINER_NR_HITS'] = 'true'
-
-@@bbrc = Bbrc::Bbrc.new 
-@@last = Last::Last.new 
-
-# Get list of fminer algorithms
-#
-# @return [text/uri-list] URIs of fminer algorithms
-get '/fminer/?' do
-  list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
-  case request.env['HTTP_ACCEPT']
-  when /text\/html/
-    content_type "text/html"
-    OpenTox.text_to_html list
-  else
-    content_type 'text/uri-list'
-    list
-  end
-end
-
-# Get RDF/XML representation of fminer bbrc algorithm
-# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm
-get "/fminer/bbrc/?" do
-  algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full))
-  algorithm.metadata = {
-    DC.title => 'fminer backbone refinement class representatives',
-    DC.creator => "andreas@maunz.de, helma@in-silico.ch",
-    DC.contributor => "vorgrimmlerdavid@gmx.de",
-#    BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc",
-    RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
-    OT.parameters => [
-      { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
-      { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
-      { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
-      { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
-      { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
-      { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
-  ]
-  }
-  case request.env['HTTP_ACCEPT']
-  when /text\/html/
-    content_type "text/html"
-    OpenTox.text_to_html algorithm.to_yaml
-  when /application\/x-yaml/
-    content_type "application/x-yaml"
-    algorithm.to_yaml
-  else
-    response['Content-Type'] = 'application/rdf+xml'  
-    algorithm.to_rdfxml
-  end
-end
-
-# Get RDF/XML representation of fminer last algorithm
-# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm
-get "/fminer/last/?" do
-  algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full))
-  algorithm.metadata = {
-    DC.title => 'fminer latent structure class representatives',
-    DC.creator => "andreas@maunz.de, helma@in-silico.ch",
-    DC.contributor => "vorgrimmlerdavid@gmx.de",
-#    BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last",
-    RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
-    OT.parameters => [
-      { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
-      { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
-      { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
-      { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
-      { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" },
-  ]
-  }
-  case request.env['HTTP_ACCEPT']
-  when /text\/html/
-    content_type "text/html"
-    OpenTox.text_to_html algorithm.to_yaml
-  when /application\/x-yaml/
-    content_type "application/x-yaml"
-    algorithm.to_yaml
-  else
-    response['Content-Type'] = 'application/rdf+xml'  
-    algorithm.to_rdfxml
-  end
-end
-
-# Creates same features for dataset <dataset_uri> that have been created
-# with fminer in dataset <feature_dataset_uri>
-# accept params[:nr_hits] as used in other fminer methods 
-post '/fminer/:method/match?' do 
-  raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri]
-  raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] 
-  task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task|
-    f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid
-    c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid
-    res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid
-    f_dataset.features.each do |f,m|
-      res_dataset.add_feature(f,m)
-    end
-    c_dataset.compounds.each do |c|
-      res_dataset.add_compound(c)
-      comp = OpenTox::Compound.new(c)
-      f_dataset.features.each do |f,m|
-        if params[:nr_hits] == "true"
-          hits = comp.match_hits([m[OT.smarts]])
-          res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]]          
-        else
-          res_dataset.add(c,f,1) if comp.match?(m[OT.smarts])
-        end
-      end
-    end
-    res_dataset.save @subjectid
-    res_dataset.uri
-  end
-  return_task(task)
-end 
-
-# Run bbrc algorithm on dataset
-#
-# @param [String] dataset_uri URI of the training dataset
-# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
-# @param [optional] parameters BBRC parameters, accepted parameters are
-#   - min_frequency  Minimum frequency (default 5)
-#   - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
-#   - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
-#   - min_chisq_significance Significance threshold (between 0 and 1)
-#   - nr_hits Set to "true" to get hit count instead of presence
-# @return [text/uri-list] Task URI
-post '/fminer/bbrc/?' do 
-
-  fminer=OpenTox::Algorithm::Fminer.new
-  fminer.check_params(params,5,@subjectid)
-
-  task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do |task|
-    @@bbrc.Reset
-    if fminer.prediction_feature.feature_type == "regression"
-      @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
-    else
-      raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+
-        "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri)
-      @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort
-      @value_map=Hash.new
-      @training_classes.each_with_index { |c,i| @value_map[i+1] = c }
-    end
-    @@bbrc.SetMinfreq(fminer.minfreq)
-    @@bbrc.SetType(1) if params[:feature_type] == "paths"
-    @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean
-    @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
-    @@bbrc.SetConsoleOut(false)
-
-    feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
-    feature_dataset.add_metadata({
-      DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s,
-      DC.creator => url_for('/fminer/bbrc',:full),
-      OT.hasSource => url_for('/fminer/bbrc', :full),
-      OT.parameters => [
-        { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
-        { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
-    ]
-    })
-    feature_dataset.save(@subjectid)
-
-    fminer.compounds = []
-    fminer.db_class_sizes = Array.new # AM: effect
-    fminer.all_activities = Hash.new # DV: for effect calculation in regression part
-    fminer.smi = [] # AM LAST: needed for matching the patterns back
-
-    # Add data to fminer
-    fminer.add_fminer_data(@@bbrc, params, @value_map)
-
-    g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation
-    g_median=g_array.to_scale.median
-
-    raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
-    task.progress 10
-    step_width = 80 / @@bbrc.GetNoRootNodes().to_f
-    features = Set.new
-
-    # run @@bbrc
-    (0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
-      results = @@bbrc.MineRoot(j)
-      task.progress 10+step_width*(j+1)
-      results.each do |result|
-        f = YAML.load(result)[0]
-        smarts = f[0]
-        p_value = f[1]
-
-        if (!@@bbrc.GetRegression) 
-          id_arrs = f[2..-1].flatten
-          max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes)
-          effect = f[2..-1].size-max
-        else #regression part
-          id_arrs = f[2]
-          # DV: effect calculation
-          f_arr=Array.new
-          f[2].each do |id|
-            id=id.keys[0] # extract id from hit count hash
-            f_arr.push(fminer.all_activities[id]) 
-          end 
-          f_median=f_arr.to_scale.median
-          if g_median >= f_median 
-            effect = 'activating'
-          else
-            effect = 'deactivating'
-          end
-        end
-
-        feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
-        unless features.include? smarts
-          features << smarts
-          metadata = {
-            OT.hasSource => url_for('/fminer/bbrc', :full),
-            RDF.type => [OT.Feature, OT.Substructure],
-            OT.smarts => smarts,
-            OT.pValue => p_value.to_f,
-            OT.effect => effect,
-            OT.parameters => [
-              { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
-              { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
-          ]
-          }
-          feature_dataset.add_feature feature_uri, metadata
-          #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
-        end
-        id_arrs.each { |id_count_hash|
-          id=id_count_hash.keys[0].to_i
-          count=id_count_hash.values[0].to_i
-          if params[:nr_hits] == "true"
-            feature_dataset.add(fminer.compounds[id], feature_uri, count)
-          else
-            feature_dataset.add(fminer.compounds[id], feature_uri, 1)
-          end
-        }
-
-      end # end of 
-    end   # feature parsing
-
-    # AM: add feature values for non-present features
-    # feature_dataset.complete_data_entries 
-
-    feature_dataset.save(@subjectid) 
-    feature_dataset.uri
-  end
-  response['Content-Type'] = 'text/uri-list'
-  raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
-  halt 202,task.uri.to_s+"\n"
-end
-#end
-
-# Run last algorithm on a dataset
-#
-# @param [String] dataset_uri URI of the training dataset
-# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
-# @param [optional] parameters LAST parameters, accepted parameters are
-#   - min_frequency freq  Minimum frequency (default 5)
-#   - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
-#   - hops Maximum number of hops
-#   - nr_hits Set to "true" to get hit count instead of presence
-# @return [text/uri-list] Task URI
-post '/fminer/last/?' do
-
-  fminer=OpenTox::Algorithm::Fminer.new
-  fminer.check_params(params,80,@subjectid)
-
-  task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do |task|
-    @@last.Reset
-    if fminer.prediction_feature.feature_type == "regression"
-      @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
-    else
-      raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+
-        "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri)
-      @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort
-      @value_map=Hash.new
-      @training_classes.each_with_index { |c,i| @value_map[i+1] = c }
-    end
-    @@last.SetMinfreq(fminer.minfreq)
-    @@last.SetType(1) if params[:feature_type] == "paths"
-    @@last.SetMaxHops(params[:hops]) if params[:hops]
-    @@last.SetConsoleOut(false)
-
-
-    feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
-    feature_dataset.add_metadata({
-      DC.title => "LAST representatives for " + fminer.training_dataset.metadata[DC.title].to_s,
-      DC.creator => url_for('/fminer/last',:full),
-      OT.hasSource => url_for('/fminer/last', :full),
-      OT.parameters => [
-        { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
-        { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
-    ]
-    })
-    feature_dataset.save(@subjectid)
-
-    fminer.compounds = []
-    fminer.db_class_sizes = Array.new # AM: effect
-    fminer.all_activities = Hash.new # DV: for effect calculation (class and regr)
-    fminer.smi = [] # AM LAST: needed for matching the patterns back
-
-    # Add data to fminer
-    fminer.add_fminer_data(@@last, params, @value_map)
-
-    raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
-
-    # run @@last
-    features = Set.new
-    xml = ""
-    task.progress 10
-    step_width = 80 / @@last.GetNoRootNodes().to_f
-
-    (0 .. @@last.GetNoRootNodes()-1).each do |j|
-      results = @@last.MineRoot(j)
-      task.progress 10+step_width*(j+1)
-      results.each do |result|
-        xml << result
-      end
-    end
-
-    lu = LU.new                             # AM LAST: uses last-utils here
-    dom=lu.read(xml)                        # AM LAST: parse GraphML 
-    smarts=lu.smarts_rb(dom,'nls')          # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
-    params[:nr_hits] == "true" ? hit_count=true: hit_count=false
-    matches, counts = lu.match_rb(fminer.smi,smarts,hit_count)       # AM LAST: creates instantiations
-
-    matches.each do |smarts, ids|
-      feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
-      if @@last.GetRegression() 
-        p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test
-        effect = (p_value > 0) ? "activating" : "deactivating"
-      else
-        p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f
-        g=Array.new
-        @value_map.each { |y,act| g[y-1]=Array.new }
-        feat_hash.each  { |x,y|   g[y-1].push(x)   }
-        max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes)
-        effect = g.size-max
-      end
-      feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s
-      unless features.include? smarts
-        features << smarts
-        metadata = {
-          RDF.type => [OT.Feature, OT.Substructure],
-          OT.hasSource => feature_dataset.uri,
-          OT.smarts => smarts,
-          OT.pValue => p_value.abs,
-          OT.effect => effect,
-          OT.parameters => [
-            { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
-            { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
-        ]
-        } 
-        feature_dataset.add_feature feature_uri, metadata
-      end
-      if !hit_count
-        ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)}
-      else
-        ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} 
-      end
-    end
-
-    # AM: add feature values for non-present features
-    # feature_dataset.complete_data_entries 
-
-    feature_dataset.save(@subjectid) 
-    feature_dataset.uri
-  end
-  response['Content-Type'] = 'text/uri-list'
-  raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
-  halt 202,task.uri.to_s+"\n"
-end
diff --git a/lazar.rb b/lazar.rb
deleted file mode 100644
index 1fe28f8..0000000
--- a/lazar.rb
+++ /dev/null
@@ -1,259 +0,0 @@
-@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc")
-
-# Get RDF/XML representation of the lazar algorithm
-# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
-get '/lazar/?' do
-  algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full))
-  algorithm.metadata = {
-    DC.title => 'lazar',
-    DC.creator => "helma@in-silico.ch, andreas@maunz.de",
-    DC.contributor => "vorgrimmlerdavid@gmx.de",
-#    BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar",
-    OT.parameters => [
-      { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
-      { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" },
-      { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
-      { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
-      { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" }
-    ]
-  }
-  case request.env['HTTP_ACCEPT']
-  when /text\/html/
-    content_type "text/html"
-    OpenTox.text_to_html algorithm.to_yaml
-  when /application\/x-yaml/
-    content_type "application/x-yaml"
-    algorithm.to_yaml
-  else
-    response['Content-Type'] = 'application/rdf+xml'  
-    algorithm.to_rdfxml
-  end
-end
-
-# Create a lazar prediction model
-# @param [String] dataset_uri Training dataset URI
-# @param [optional,String] prediction_feature URI of the feature to be predicted
-# @param [optional,String] feature_generation_uri URI of the feature generation algorithm 
-# @param [optional,String] - further parameters for the feature generation service 
-# @return [text/uri-list] Task URI 
-post '/lazar/?' do 
-
-  LOGGER.debug "building lazar model with params: "+params.inspect
-  params[:subjectid] = @subjectid
-  raise OpenTox::NotFoundError.new "No dataset_uri parameter." unless params[:dataset_uri]
-	dataset_uri = params[:dataset_uri]
-
-  task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
-
-
-    # # # Dataset present, prediction feature present?
-    raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
-    training_activities.load_all(@subjectid)
-
-    # Prediction Feature
-    prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
-    unless params[:prediction_feature] # try to read prediction_feature from dataset
-    raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a  prediction_feature parameter." unless training_activities.features.size == 1
-      prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
-      params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
-    end
-    raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
-    
-    # Feature Generation URI
-    feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) )
-
-    # Create instance
-		lazar = OpenTox::Model::Lazar.new
-    
-
-
-
-    # # # ENDPOINT RELATED
-    
-    # Default Values
-    # Classification: Weighted Majority, Substructure.match
-    if prediction_feature.feature_type == "classification"
-      @training_classes = training_activities.accept_values(prediction_feature.uri).sort
-      @training_classes.each_with_index { |c,i|
-        lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later.
-        params[:value_map] = lazar.value_map
-      }
-    # Regression: SVM, Substructure.match_hits
-    elsif  prediction_feature.feature_type == "regression"
-      lazar.feature_calculation_algorithm = "Substructure.match_hits" 
-      lazar.prediction_algorithm = "Neighbors.local_svm_regression" 
-    end
-
-
-
-
-    # # # USER VALUES
-    
-    # Min Sim
-    min_sim = params[:min_sim].to_f if params[:min_sim]
-    min_sim = 0.3 unless params[:min_sim]
-
-    # Algorithm
-    lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm]
-
-    # Nr Hits
-    nr_hits = false
-    if params[:nr_hits] == "true" || lazar.prediction_algorithm.include?("local_svm")
-      lazar.feature_calculation_algorithm = "Substructure.match_hits"
-      nr_hits = true
-    end
-    params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed 
-
-    # Propositionalization
-    propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true)
-   
-    # PC type
-    pc_type = params[:pc_type] unless params[:pc_type].nil?
-
-    # Min train performance
-    min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance]
-    min_train_performance = 0.1 unless params[:min_train_performance]
-
-
-
-
-
-
-    task.progress 10
-
-
-
-
-
-    # # # Features
-
-    # Read Features
-    if params[:feature_dataset_uri]
-      lazar.feature_calculation_algorithm = "Substructure.lookup"
-      feature_dataset_uri = params[:feature_dataset_uri]
-      training_features = OpenTox::Dataset.new(feature_dataset_uri)
-      if training_features.feature_type(@subjectid) == "regression"
-        lazar.similarity_algorithm = "Similarity.cosine"
-        min_sim = 0.4 unless params[:min_sim]
-        raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type]
-      end
-
-    # Create Features
-    else 
-      params[:feature_generation_uri] = feature_generation_uri
-      params[:subjectid] = @subjectid
-      prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
-      if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) 
-        params[:feature_type] = "paths" unless params[:feature_type]
-      end
-      feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params, OpenTox::SubTask.new(task,10,70)).to_s
-      training_features = OpenTox::Dataset.new(feature_dataset_uri)
-    end
-
-
-
-    # # # Write fingerprints
-    training_features.load_all(@subjectid)
-		raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil?
-
-    training_features.data_entries.each do |compound,entry|
-      if training_activities.data_entries.has_key? compound
-
-        lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound]
-        entry.keys.each do |feature|
-
-          # CASE 1: Substructure
-          if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits")
-            if training_features.features[feature]
-              smarts = training_features.features[feature][OT.smarts]
-              #lazar.fingerprints[compound] << smarts
-              if lazar.feature_calculation_algorithm == "Substructure.match_hits"
-                lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue]
-              else
-                lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue]
-              end
-              unless lazar.features.include? smarts
-                lazar.features << smarts
-                lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
-                lazar.effects[smarts] = training_features.features[feature][OT.effect]
-              end
-            end
-
-          # CASE 2: Others
-          elsif entry[feature].flatten.size == 1
-            lazar.fingerprints[compound][feature] = entry[feature].flatten.first
-            lazar.features << feature unless lazar.features.include? feature
-          else
-            LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
-          end
-        end
-
-      end
-    end
-    task.progress 80
-
-
-
-
-    
-    # # # Activities
-  
-    if prediction_feature.feature_type == "regression"
-      training_activities.data_entries.each do |compound,entry| 
-        lazar.activities[compound] = [] unless lazar.activities[compound]
-        unless entry[prediction_feature.uri].empty?
-          entry[prediction_feature.uri].each do |value|
-            lazar.activities[compound] << value
-          end
-        end
-      end
-    elsif prediction_feature.feature_type == "classification"
-      training_activities.data_entries.each do |compound,entry| 
-        lazar.activities[compound] = [] unless lazar.activities[compound]
-        unless entry[prediction_feature.uri].empty?
-          entry[prediction_feature.uri].each do |value|
-            lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals
-          end
-        end
-      end
-    end
-    task.progress 90
-
-
-
-
-    # Metadata
-
-    lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
-    lazar.metadata[OT.dependentVariables] = prediction_feature.uri
-    lazar.metadata[OT.trainingDataset] = dataset_uri
-		lazar.metadata[OT.featureDataset] = feature_dataset_uri
-    case training_activities.feature_type(@subjectid)
-    when "classification"
-      lazar.metadata[RDF.type] = [OT.Model, OTA.ClassificationLazySingleTarget]
-    when "regression"
-      lazar.metadata[RDF.type] = [OT.Model, OTA.RegressionLazySingleTarget]
-    end
-
-    lazar.metadata[OT.parameters] = [
-      {DC.title => "dataset_uri", OT.paramValue => dataset_uri},
-      {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri},
-      {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri},
-      {DC.title => "propositionalized", OT.paramValue => propositionalized},
-      {DC.title => "pc_type", OT.paramValue => pc_type},
-      {DC.title => "nr_hits", OT.paramValue => nr_hits},
-      {DC.title => "min_sim", OT.paramValue => min_sim},
-      {DC.title => "min_train_performance", OT.paramValue => min_train_performance},
-
-    ]
-		
-		model_uri = lazar.save(@subjectid)
-		LOGGER.info model_uri + " created #{Time.now}"
-    model_uri
-
-	end
-  response['Content-Type'] = 'text/uri-list' 
-  raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
-  halt 202,task.uri
-end
-
diff --git a/openbabel.rb b/openbabel.rb
deleted file mode 100644
index 463663e..0000000
--- a/openbabel.rb
+++ /dev/null
@@ -1,148 +0,0 @@
-OBMOL_METHODS = {
-  "NumAtoms" =>       "Number of atoms",
-  "NumBonds" =>       "Number of bonds",
-  "NumHvyAtoms" =>    "Number of heavy atoms",
-  "NumResidues" =>    "Number of residues",
-  "NumRotors" =>      "Number of rotatable bonds",
-  "GetEnergy" =>      "Heat of formation for this molecule (in kcal/mol)",
-  "GetMolWt" =>       "Standard molar mass given by IUPAC atomic masses (amu)",
-  "GetExactMass" =>   "Mass given by isotopes (or most abundant isotope, if not specified)",
-  "GetTotalCharge" => "Total charge",
-}
-
-OBDESCRIPTOR_METHODS = { 
-  "HBA1" =>           "Number of hydrogen bond acceptors 1 (JoelLib)",
-  "HBA2" =>           "Number of hydrogen bond acceptors 2 (JoelLib)",
-  "HBD" =>            "Number of hydrogen bond donors (JoelLib)",
-  "L5" =>             "Lipinski rule of five",
-  "logP" =>           "Octanol/water partition coefficient",
-  "MR" =>             "Molar refractivity",
-  "MW" =>             "Molecular weight",
-  "nF" =>             "Number of fluorine atoms",
-  "nHal" =>           "Number of halogen atoms",
-  "spinMult" =>       "Total spin multiplicity",
-  "TPSA" =>           "Topological polar surface area",
-}
-
-# Get a list of OpenBabel algorithms
-# @return [text/uri-list] URIs of OpenBabel algorithms
-get '/openbabel' do
-  algorithms = OBMOL_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)}
-  algorithms << OBDESCRIPTOR_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)}
-  response['Content-Type'] = 'text/uri-list'
-  algorithms.join("\n")
-end
-
-# Get RDF/XML representation of OpenBabel algorithm
-# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm
-get '/openbabel/:property' do
-  description = OBMOL_METHODS[params[:property]] if OBMOL_METHODS.include? params[:property]
-  description = OBDESCRIPTOR_METHODS[params[:property]] if OBDESCRIPTOR_METHODS.include? params[:property]
-  if description
-    algorithm = OpenTox::Algorithm::Generic.new(url_for("/openbabel/#{params[:property]}",:full))
-    algorithm.metadata = {
-      DC.title => params[:property],
-      DC.creator => "helma@in-silico.ch",
-      DC.description => description,
-      RDF.type => [OTA.DescriptorCalculation],
-    }
-    response['Content-Type'] = 'application/rdf+xml'
-    algorithm.to_rdfxml
-  else
-    raise OpenTox::NotFoundError.new "Unknown OpenBabel descriptor #{params[:property]}."
-  end
-end
-
-# Calculate OpenBabel descriptors
-# Supports the following OpenBabel methods (see OpenBabel API http://openbabel.org/api/2.2.0/)
-#   - NumAtoms       Number of atoms
-#   - NumBonds       Number of bonds
-#   - NumHvyAtoms    Number of heavy atoms
-#   - NumResidues    Number of residues
-#   - NumRotors      Number of rotatable bonds
-#   - GetEnergy      Heat of formation for this molecule (in kcal/mol)
-#   - GetMolWt       Standard molar mass given by IUPAC atomic masses (amu)
-#   - GetExactMass   Mass given by isotopes (or most abundant isotope, if not specified)
-#   - GetTotalCharge Total charge
-#   - HBA1           Number of hydrogen bond acceptors 1 (JoelLib)
-#   - HBA2           Number of hydrogen bond acceptors 2 (JoelLib)
-#   - HBD            Number of hydrogen bond donors (JoelLib)
-#   - L5             Lipinski rule of five
-#   - logP           Octanol/water partition coefficient
-#   - MR             Molar refractivity
-#   - MW             Molecular weight
-#   - nF             Number of fluorine atoms
-#   - nHal           Number of halogen atoms
-#   - spinMult       Total spin multiplicity
-#   - TPSA           Topological polar surface area
-# @param [String] compound_uri Compound URI
-# @return [String] descriptor value
-post '/openbabel/:property' do
-	obconversion = OpenBabel::OBConversion.new
-	obmol = OpenBabel::OBMol.new
-  compound = OpenTox::Compound.new params[:compound_uri]
-	obconversion.set_in_and_out_formats 'inchi', 'can'
-  obconversion.read_string obmol, compound.to_inchi
-  if OBMOL_METHODS.keys.include? params[:property]
-    eval("obmol.#{params[:property].underscore}").to_s
-  elsif OBDESCRIPTOR_METHODS.keys.include? params[:property]
-    descriptor = OpenBabel::OBDescriptor.find_type(params[:property])
-    descriptor.predict(obmol).to_s
-  else
-    raise OpenTox::NotFoundError.new "Cannot calculate property #{params[:property]} with OpenBabel"
-  end
-end
-
-# Calculate all OpenBabel descriptors for a dataset
-# @param [String] dataset_uri Dataset URI
-# @return [text/uri-list] Task URI
-post '/openbabel' do
-  task = OpenTox::Task.create("Calculating OpenBabel descriptors for #{params[:dataset_uri]}", url_for('/openbabel',:full)) do 
-
-    dataset = OpenTox::Dataset.find(params[:dataset_uri])
-    result_dataset = OpenTox::Dataset.create
-    result_dataset.add_metadata({
-      DC.title => "OpenBabel descriptors for " + dataset.metadata[DC.title].to_s,
-      DC.creator => url_for('/openbabel',:full),
-      OT.hasSource => url_for('/openbabel', :full),
-      OT.parameters => [
-        { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
-      ]
-    })
-
-    obconversion = OpenBabel::OBConversion.new
-    obmol = OpenBabel::OBMol.new
-    obconversion.set_in_and_out_formats 'inchi', 'can'
-
-    OBMOL_METHODS.merge(OBDESCRIPTOR_METHODS).each do |name,description|
-      feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
-      metadata = {
-        OT.hasSource => url_for("/openbabel/#{name}", :full),
-        DC.description => description,
-        DC.title => name,
-      }
-      result_dataset.add_feature feature_uri, metadata
-    end
-
-    dataset.compounds.each do |compound_uri|
-      compound = OpenTox::Compound.new(compound_uri)
-      obconversion.read_string obmol, compound.to_inchi
-      #result_dataset.add_compound compound_uri
-      OBMOL_METHODS.keys.each do |name|
-        feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
-        value = eval("obmol.#{name.underscore}").to_f
-        result_dataset.add compound_uri, feature_uri, value
-      end
-      OBDESCRIPTOR_METHODS.keys.each do |name|
-        feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
-        value = OpenBabel::OBDescriptor.find_type(params[:property]).predict(obmol).to_f
-        result_dataset.add compound_uri, feature_uri, value
-      end
-    end
-    result_dataset.save
-    result_dataset.uri
-  end
-  response['Content-Type'] = 'text/uri-list'
-  raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
-  halt 202,task.uri.to_s+"\n"
-end
diff --git a/similarity.rb b/similarity.rb
deleted file mode 100644
index faf43f9..0000000
--- a/similarity.rb
+++ /dev/null
@@ -1,26 +0,0 @@
-require File.join(File.dirname(__FILE__),'dataset.rb')
-
-helpers do
-def find
-  # + charges are dropped
-  uri = uri(params[:splat].first.gsub(/(InChI.*) (.*)/,'\1+\2')) # reinsert dropped '+' signs in InChIs
-  raise OpenTox::NotFoundError.new "Dataset \"#{uri}\" not found." unless @set = Dataset.find(uri)
-end
-
-def uri(name)
-  name = URI.encode(name)
-  uri = File.join Dataset.base_uri, name
-  end
-end
-
-get '/tanimoto/dataset/*/dataset/*/?' do
-  find
-  @set.tanimoto(uri(params[:splat][1]))
-end
-
-get '/weighted_tanimoto/dataset/*/dataset/*/?' do
-  find
-  @set.weighted_tanimoto(uri(params[:splat][1]))
-end
-
-
diff --git a/smarts.rb b/smarts.rb
deleted file mode 100644
index 4ae6949..0000000
--- a/smarts.rb
+++ /dev/null
@@ -1,3 +0,0 @@
-get '/match/compound/*/smarts/*/?' do
-	"#{OpenTox::Compound.from_inchi(params[:splat][0]).match?(params[:splat][1])}"
-end
author	Andreas Maunz <andreas@maunz.de>	2012-03-26 17:17:54 +0200
committer	Andreas Maunz <andreas@maunz.de>	2012-03-26 17:17:54 +0200
commit	ef102b11ecdf9258f1942c9a25fa6db2772cfb89 (patch)
tree	44e3e1029695c709f86ec4e82f3a345a9793520d
parent	9cf07c58e6b5c51ff78ea56574ddf90308861ce2 (diff)