summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-03-26 17:17:54 +0200
committerAndreas Maunz <andreas@maunz.de>2012-03-26 17:17:54 +0200
commitef102b11ecdf9258f1942c9a25fa6db2772cfb89 (patch)
tree44e3e1029695c709f86ec4e82f3a345a9793520d
parent9cf07c58e6b5c51ff78ea56574ddf90308861ce2 (diff)
Deleted obsolete files
-rw-r--r--balancer.rb98
-rw-r--r--feature_selection.rb93
-rw-r--r--fminer.rb370
-rw-r--r--lazar.rb259
-rw-r--r--openbabel.rb148
-rw-r--r--similarity.rb26
-rw-r--r--smarts.rb3
7 files changed, 0 insertions, 997 deletions
diff --git a/balancer.rb b/balancer.rb
deleted file mode 100644
index 4ed2fd7..0000000
--- a/balancer.rb
+++ /dev/null
@@ -1,98 +0,0 @@
-# cuts a classification dataset into balanced pieces
-# let inact_act_ratio := majority_class.size/minority_class.size
-# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5
-# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds.
-
-class Balancer
-
- attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets
-
- # Supply a OpenTox::Dataset here
- # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given
- def initialize(dataset, feature_uri, creator_url)
- @act_arr = []
- @inact_arr = []
- @inact_act_ratio = 1.0/0 # trick to define +infinity
- @nr_majority_splits = 1 # +/-1 means: no split
- @split = [] # splitted arrays with ids
- @datasets = [] # result datasets
- @errors = []
-
- classification = true
- if dataset.features.include?(feature_uri)
- dataset.data.each do |i,a|
- inchi = i
- acts = a
- acts.each do |act|
- value = act[feature_uri]
- if OpenTox::Utils.is_true?(value)
- @act_arr << inchi
- elsif OpenTox::Utils.classification?(value)
- @inact_arr << inchi
- else
- classification = false
- break;
- end
- end
- end
- @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression
- set_nr_majority_splits
- # perform majority split
- @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1
- @split.each do |s|
- new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr)
- @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url)
- end
-
- else
- errors << "Feature not present in dataset."
- end
- errors << "Can not split regression dataset." unless classification
- end
-
-
-
- # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values.
- def set_nr_majority_splits
- @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression
- end
-
- # does the actual shuffle and split
- def shuffle_split (arr)
- arr = arr.shuffle
- arr.chunk(@nr_majority_splits.abs)
- end
-
- # turns a hash into a 2 col csv
- def hsh2csv (hsh)
- res=""
- hsh.each do |k,v|
- arr = [v,(@nr_majority_splits > 0 ? 0 : 1)]
- res += arr.join(", ") + "\n"
- end
- res
- end
-
-end
-
-class Array
-
- # cuts an array into <num-pieces> chunks - returns a two-dimensional array
- def chunk(pieces)
- q, r = length.divmod(pieces)
- (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \
- .map { |a, b| slice(a...b) }
- end
-
- # shuffles the elements of an array
- def shuffle( seed=nil )
- srand seed.to_i if seed
- sort_by { Kernel.rand }
- end
-
- # shuffels self
- def shuffle!( seed=nil )
- self.replace shuffle( seed )
- end
-
-end
diff --git a/feature_selection.rb b/feature_selection.rb
deleted file mode 100644
index 556ccf8..0000000
--- a/feature_selection.rb
+++ /dev/null
@@ -1,93 +0,0 @@
-# Get list of feature_selection algorithms
-#
-# @return [text/uri-list] URIs of feature_selection algorithms
-get '/feature_selection/?' do
- list = [ url_for('/feature_selection/rfe', :full) ].join("\n") + "\n"
- case request.env['HTTP_ACCEPT']
- when /text\/html/
- content_type "text/html"
- OpenTox.text_to_html list
- else
- content_type 'text/uri-list'
- list
- end
-end
-
-# Get RDF/XML representation of feature_selection rfe algorithm
-# @return [application/rdf+xml] OWL-DL representation of feature_selection rfe algorithm
-get "/feature_selection/rfe/?" do
- algorithm = OpenTox::Algorithm::Generic.new(url_for('/feature_selection/rfe',:full))
- algorithm.metadata = {
- DC.title => 'recursive feature elimination',
- DC.creator => "andreas@maunz.de, helma@in-silico.ch",
- DC.contributor => "vorgrimmlerdavid@gmx.de",
- BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#feature_selection_rfe",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
- OT.parameters => [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature_uri" },
- { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" },
- { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" }
- ]
- }
- case request.env['HTTP_ACCEPT']
- when /text\/html/
- content_type "text/html"
- OpenTox.text_to_html algorithm.to_yaml
- when /application\/x-yaml/
- content_type "application/x-yaml"
- algorithm.to_yaml
- else
- response['Content-Type'] = 'application/rdf+xml'
- algorithm.to_rdfxml
- end
-end
-
-# Run rfe algorithm on dataset
-#
-# @param [String] dataset_uri URI of the training dataset
-# @param [String] feature_dataset_uri URI of the feature dataset
-# @return [text/uri-list] Task URI
-post '/feature_selection/rfe/?' do
-
- raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri]
- raise OpenTox::NotFoundError.new "Please submit a prediction_feature_uri." unless params[:prediction_feature_uri]
- raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri]
-
- ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} )
- tf_ds=Tempfile.open(['rfe_', '.csv'])
- tf_ds.puts(ds_csv)
- tf_ds.flush()
-
- prediction_feature = params[:prediction_feature_uri].split('/').last # get col name
-
- fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"})
- tf_fds=Tempfile.open(['rfe_', '.csv'])
- tf_fds.puts(fds_csv)
- tf_fds.flush()
-
- del_missing = params[:del_missing] == "true" ? true : false
-
- task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task|
- r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } )
-
- parser = OpenTox::Parser::Spreadsheets.new
- ds = OpenTox::Dataset.new
- ds.save
- parser.dataset = ds
- ds = parser.load_csv(File.open(r_result_file).read,false,true)
- ds.save
- r_result_uri = ds.uri
- #r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri
- begin
- tf_ds.close!; tf_fds.close!
- File.unlink(r_result_file)
- rescue
- end
- r_result_uri
- end
- response['Content-Type'] = 'text/uri-list'
- raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
- halt 202,task.uri.to_s+"\n"
-end
-
diff --git a/fminer.rb b/fminer.rb
deleted file mode 100644
index 95ce469..0000000
--- a/fminer.rb
+++ /dev/null
@@ -1,370 +0,0 @@
-ENV['FMINER_SMARTS'] = 'true'
-ENV['FMINER_NO_AROMATIC'] = 'true'
-ENV['FMINER_PVALUES'] = 'true'
-ENV['FMINER_SILENT'] = 'true'
-ENV['FMINER_NR_HITS'] = 'true'
-
-@@bbrc = Bbrc::Bbrc.new
-@@last = Last::Last.new
-
-# Get list of fminer algorithms
-#
-# @return [text/uri-list] URIs of fminer algorithms
-get '/fminer/?' do
- list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
- case request.env['HTTP_ACCEPT']
- when /text\/html/
- content_type "text/html"
- OpenTox.text_to_html list
- else
- content_type 'text/uri-list'
- list
- end
-end
-
-# Get RDF/XML representation of fminer bbrc algorithm
-# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm
-get "/fminer/bbrc/?" do
- algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full))
- algorithm.metadata = {
- DC.title => 'fminer backbone refinement class representatives',
- DC.creator => "andreas@maunz.de, helma@in-silico.ch",
- DC.contributor => "vorgrimmlerdavid@gmx.de",
-# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
- OT.parameters => [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
- { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
- ]
- }
- case request.env['HTTP_ACCEPT']
- when /text\/html/
- content_type "text/html"
- OpenTox.text_to_html algorithm.to_yaml
- when /application\/x-yaml/
- content_type "application/x-yaml"
- algorithm.to_yaml
- else
- response['Content-Type'] = 'application/rdf+xml'
- algorithm.to_rdfxml
- end
-end
-
-# Get RDF/XML representation of fminer last algorithm
-# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm
-get "/fminer/last/?" do
- algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full))
- algorithm.metadata = {
- DC.title => 'fminer latent structure class representatives',
- DC.creator => "andreas@maunz.de, helma@in-silico.ch",
- DC.contributor => "vorgrimmlerdavid@gmx.de",
-# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
- OT.parameters => [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" },
- ]
- }
- case request.env['HTTP_ACCEPT']
- when /text\/html/
- content_type "text/html"
- OpenTox.text_to_html algorithm.to_yaml
- when /application\/x-yaml/
- content_type "application/x-yaml"
- algorithm.to_yaml
- else
- response['Content-Type'] = 'application/rdf+xml'
- algorithm.to_rdfxml
- end
-end
-
-# Creates same features for dataset <dataset_uri> that have been created
-# with fminer in dataset <feature_dataset_uri>
-# accept params[:nr_hits] as used in other fminer methods
-post '/fminer/:method/match?' do
- raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri]
- raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri]
- task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task|
- f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid
- c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid
- res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid
- f_dataset.features.each do |f,m|
- res_dataset.add_feature(f,m)
- end
- c_dataset.compounds.each do |c|
- res_dataset.add_compound(c)
- comp = OpenTox::Compound.new(c)
- f_dataset.features.each do |f,m|
- if params[:nr_hits] == "true"
- hits = comp.match_hits([m[OT.smarts]])
- res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]]
- else
- res_dataset.add(c,f,1) if comp.match?(m[OT.smarts])
- end
- end
- end
- res_dataset.save @subjectid
- res_dataset.uri
- end
- return_task(task)
-end
-
-# Run bbrc algorithm on dataset
-#
-# @param [String] dataset_uri URI of the training dataset
-# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
-# @param [optional] parameters BBRC parameters, accepted parameters are
-# - min_frequency Minimum frequency (default 5)
-# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
-# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
-# - min_chisq_significance Significance threshold (between 0 and 1)
-# - nr_hits Set to "true" to get hit count instead of presence
-# @return [text/uri-list] Task URI
-post '/fminer/bbrc/?' do
-
- fminer=OpenTox::Algorithm::Fminer.new
- fminer.check_params(params,5,@subjectid)
-
- task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do |task|
- @@bbrc.Reset
- if fminer.prediction_feature.feature_type == "regression"
- @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
- else
- raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+
- "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri)
- @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort
- @value_map=Hash.new
- @training_classes.each_with_index { |c,i| @value_map[i+1] = c }
- end
- @@bbrc.SetMinfreq(fminer.minfreq)
- @@bbrc.SetType(1) if params[:feature_type] == "paths"
- @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean
- @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
- @@bbrc.SetConsoleOut(false)
-
- feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
- feature_dataset.add_metadata({
- DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s,
- DC.creator => url_for('/fminer/bbrc',:full),
- OT.hasSource => url_for('/fminer/bbrc', :full),
- OT.parameters => [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
- })
- feature_dataset.save(@subjectid)
-
- fminer.compounds = []
- fminer.db_class_sizes = Array.new # AM: effect
- fminer.all_activities = Hash.new # DV: for effect calculation in regression part
- fminer.smi = [] # AM LAST: needed for matching the patterns back
-
- # Add data to fminer
- fminer.add_fminer_data(@@bbrc, params, @value_map)
-
- g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation
- g_median=g_array.to_scale.median
-
- raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
- task.progress 10
- step_width = 80 / @@bbrc.GetNoRootNodes().to_f
- features = Set.new
-
- # run @@bbrc
- (0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
- results = @@bbrc.MineRoot(j)
- task.progress 10+step_width*(j+1)
- results.each do |result|
- f = YAML.load(result)[0]
- smarts = f[0]
- p_value = f[1]
-
- if (!@@bbrc.GetRegression)
- id_arrs = f[2..-1].flatten
- max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes)
- effect = f[2..-1].size-max
- else #regression part
- id_arrs = f[2]
- # DV: effect calculation
- f_arr=Array.new
- f[2].each do |id|
- id=id.keys[0] # extract id from hit count hash
- f_arr.push(fminer.all_activities[id])
- end
- f_median=f_arr.to_scale.median
- if g_median >= f_median
- effect = 'activating'
- else
- effect = 'deactivating'
- end
- end
-
- feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
- unless features.include? smarts
- features << smarts
- metadata = {
- OT.hasSource => url_for('/fminer/bbrc', :full),
- RDF.type => [OT.Feature, OT.Substructure],
- OT.smarts => smarts,
- OT.pValue => p_value.to_f,
- OT.effect => effect,
- OT.parameters => [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
- }
- feature_dataset.add_feature feature_uri, metadata
- #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
- end
- id_arrs.each { |id_count_hash|
- id=id_count_hash.keys[0].to_i
- count=id_count_hash.values[0].to_i
- if params[:nr_hits] == "true"
- feature_dataset.add(fminer.compounds[id], feature_uri, count)
- else
- feature_dataset.add(fminer.compounds[id], feature_uri, 1)
- end
- }
-
- end # end of
- end # feature parsing
-
- # AM: add feature values for non-present features
- # feature_dataset.complete_data_entries
-
- feature_dataset.save(@subjectid)
- feature_dataset.uri
- end
- response['Content-Type'] = 'text/uri-list'
- raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
- halt 202,task.uri.to_s+"\n"
-end
-#end
-
-# Run last algorithm on a dataset
-#
-# @param [String] dataset_uri URI of the training dataset
-# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
-# @param [optional] parameters LAST parameters, accepted parameters are
-# - min_frequency freq Minimum frequency (default 5)
-# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
-# - hops Maximum number of hops
-# - nr_hits Set to "true" to get hit count instead of presence
-# @return [text/uri-list] Task URI
-post '/fminer/last/?' do
-
- fminer=OpenTox::Algorithm::Fminer.new
- fminer.check_params(params,80,@subjectid)
-
- task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do |task|
- @@last.Reset
- if fminer.prediction_feature.feature_type == "regression"
- @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
- else
- raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+
- "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri)
- @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort
- @value_map=Hash.new
- @training_classes.each_with_index { |c,i| @value_map[i+1] = c }
- end
- @@last.SetMinfreq(fminer.minfreq)
- @@last.SetType(1) if params[:feature_type] == "paths"
- @@last.SetMaxHops(params[:hops]) if params[:hops]
- @@last.SetConsoleOut(false)
-
-
- feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
- feature_dataset.add_metadata({
- DC.title => "LAST representatives for " + fminer.training_dataset.metadata[DC.title].to_s,
- DC.creator => url_for('/fminer/last',:full),
- OT.hasSource => url_for('/fminer/last', :full),
- OT.parameters => [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
- })
- feature_dataset.save(@subjectid)
-
- fminer.compounds = []
- fminer.db_class_sizes = Array.new # AM: effect
- fminer.all_activities = Hash.new # DV: for effect calculation (class and regr)
- fminer.smi = [] # AM LAST: needed for matching the patterns back
-
- # Add data to fminer
- fminer.add_fminer_data(@@last, params, @value_map)
-
- raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
-
- # run @@last
- features = Set.new
- xml = ""
- task.progress 10
- step_width = 80 / @@last.GetNoRootNodes().to_f
-
- (0 .. @@last.GetNoRootNodes()-1).each do |j|
- results = @@last.MineRoot(j)
- task.progress 10+step_width*(j+1)
- results.each do |result|
- xml << result
- end
- end
-
- lu = LU.new # AM LAST: uses last-utils here
- dom=lu.read(xml) # AM LAST: parse GraphML
- smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
- params[:nr_hits] == "true" ? hit_count=true: hit_count=false
- matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations
-
- matches.each do |smarts, ids|
- feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
- if @@last.GetRegression()
- p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test
- effect = (p_value > 0) ? "activating" : "deactivating"
- else
- p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f
- g=Array.new
- @value_map.each { |y,act| g[y-1]=Array.new }
- feat_hash.each { |x,y| g[y-1].push(x) }
- max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes)
- effect = g.size-max
- end
- feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s
- unless features.include? smarts
- features << smarts
- metadata = {
- RDF.type => [OT.Feature, OT.Substructure],
- OT.hasSource => feature_dataset.uri,
- OT.smarts => smarts,
- OT.pValue => p_value.abs,
- OT.effect => effect,
- OT.parameters => [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
- }
- feature_dataset.add_feature feature_uri, metadata
- end
- if !hit_count
- ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, 1)}
- else
- ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])}
- end
- end
-
- # AM: add feature values for non-present features
- # feature_dataset.complete_data_entries
-
- feature_dataset.save(@subjectid)
- feature_dataset.uri
- end
- response['Content-Type'] = 'text/uri-list'
- raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
- halt 202,task.uri.to_s+"\n"
-end
diff --git a/lazar.rb b/lazar.rb
deleted file mode 100644
index 1fe28f8..0000000
--- a/lazar.rb
+++ /dev/null
@@ -1,259 +0,0 @@
-@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc")
-
-# Get RDF/XML representation of the lazar algorithm
-# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
-get '/lazar/?' do
- algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full))
- algorithm.metadata = {
- DC.title => 'lazar',
- DC.creator => "helma@in-silico.ch, andreas@maunz.de",
- DC.contributor => "vorgrimmlerdavid@gmx.de",
-# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar",
- OT.parameters => [
- { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
- { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
- { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" }
- ]
- }
- case request.env['HTTP_ACCEPT']
- when /text\/html/
- content_type "text/html"
- OpenTox.text_to_html algorithm.to_yaml
- when /application\/x-yaml/
- content_type "application/x-yaml"
- algorithm.to_yaml
- else
- response['Content-Type'] = 'application/rdf+xml'
- algorithm.to_rdfxml
- end
-end
-
-# Create a lazar prediction model
-# @param [String] dataset_uri Training dataset URI
-# @param [optional,String] prediction_feature URI of the feature to be predicted
-# @param [optional,String] feature_generation_uri URI of the feature generation algorithm
-# @param [optional,String] - further parameters for the feature generation service
-# @return [text/uri-list] Task URI
-post '/lazar/?' do
-
- LOGGER.debug "building lazar model with params: "+params.inspect
- params[:subjectid] = @subjectid
- raise OpenTox::NotFoundError.new "No dataset_uri parameter." unless params[:dataset_uri]
- dataset_uri = params[:dataset_uri]
-
- task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
-
-
- # # # Dataset present, prediction feature present?
- raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
- training_activities.load_all(@subjectid)
-
- # Prediction Feature
- prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
- unless params[:prediction_feature] # try to read prediction_feature from dataset
- raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
- params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
- end
- raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
-
- # Feature Generation URI
- feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) )
-
- # Create instance
- lazar = OpenTox::Model::Lazar.new
-
-
-
-
- # # # ENDPOINT RELATED
-
- # Default Values
- # Classification: Weighted Majority, Substructure.match
- if prediction_feature.feature_type == "classification"
- @training_classes = training_activities.accept_values(prediction_feature.uri).sort
- @training_classes.each_with_index { |c,i|
- lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later.
- params[:value_map] = lazar.value_map
- }
- # Regression: SVM, Substructure.match_hits
- elsif prediction_feature.feature_type == "regression"
- lazar.feature_calculation_algorithm = "Substructure.match_hits"
- lazar.prediction_algorithm = "Neighbors.local_svm_regression"
- end
-
-
-
-
- # # # USER VALUES
-
- # Min Sim
- min_sim = params[:min_sim].to_f if params[:min_sim]
- min_sim = 0.3 unless params[:min_sim]
-
- # Algorithm
- lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm]
-
- # Nr Hits
- nr_hits = false
- if params[:nr_hits] == "true" || lazar.prediction_algorithm.include?("local_svm")
- lazar.feature_calculation_algorithm = "Substructure.match_hits"
- nr_hits = true
- end
- params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed
-
- # Propositionalization
- propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true)
-
- # PC type
- pc_type = params[:pc_type] unless params[:pc_type].nil?
-
- # Min train performance
- min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance]
- min_train_performance = 0.1 unless params[:min_train_performance]
-
-
-
-
-
-
- task.progress 10
-
-
-
-
-
- # # # Features
-
- # Read Features
- if params[:feature_dataset_uri]
- lazar.feature_calculation_algorithm = "Substructure.lookup"
- feature_dataset_uri = params[:feature_dataset_uri]
- training_features = OpenTox::Dataset.new(feature_dataset_uri)
- if training_features.feature_type(@subjectid) == "regression"
- lazar.similarity_algorithm = "Similarity.cosine"
- min_sim = 0.4 unless params[:min_sim]
- raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type]
- end
-
- # Create Features
- else
- params[:feature_generation_uri] = feature_generation_uri
- params[:subjectid] = @subjectid
- prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
- if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/)
- params[:feature_type] = "paths" unless params[:feature_type]
- end
- feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params, OpenTox::SubTask.new(task,10,70)).to_s
- training_features = OpenTox::Dataset.new(feature_dataset_uri)
- end
-
-
-
- # # # Write fingerprints
- training_features.load_all(@subjectid)
- raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil?
-
- training_features.data_entries.each do |compound,entry|
- if training_activities.data_entries.has_key? compound
-
- lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound]
- entry.keys.each do |feature|
-
- # CASE 1: Substructure
- if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits")
- if training_features.features[feature]
- smarts = training_features.features[feature][OT.smarts]
- #lazar.fingerprints[compound] << smarts
- if lazar.feature_calculation_algorithm == "Substructure.match_hits"
- lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue]
- else
- lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue]
- end
- unless lazar.features.include? smarts
- lazar.features << smarts
- lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
- lazar.effects[smarts] = training_features.features[feature][OT.effect]
- end
- end
-
- # CASE 2: Others
- elsif entry[feature].flatten.size == 1
- lazar.fingerprints[compound][feature] = entry[feature].flatten.first
- lazar.features << feature unless lazar.features.include? feature
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- end
-
- end
- end
- task.progress 80
-
-
-
-
-
- # # # Activities
-
- if prediction_feature.feature_type == "regression"
- training_activities.data_entries.each do |compound,entry|
- lazar.activities[compound] = [] unless lazar.activities[compound]
- unless entry[prediction_feature.uri].empty?
- entry[prediction_feature.uri].each do |value|
- lazar.activities[compound] << value
- end
- end
- end
- elsif prediction_feature.feature_type == "classification"
- training_activities.data_entries.each do |compound,entry|
- lazar.activities[compound] = [] unless lazar.activities[compound]
- unless entry[prediction_feature.uri].empty?
- entry[prediction_feature.uri].each do |value|
- lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals
- end
- end
- end
- end
- task.progress 90
-
-
-
-
- # Metadata
-
- lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
- lazar.metadata[OT.dependentVariables] = prediction_feature.uri
- lazar.metadata[OT.trainingDataset] = dataset_uri
- lazar.metadata[OT.featureDataset] = feature_dataset_uri
- case training_activities.feature_type(@subjectid)
- when "classification"
- lazar.metadata[RDF.type] = [OT.Model, OTA.ClassificationLazySingleTarget]
- when "regression"
- lazar.metadata[RDF.type] = [OT.Model, OTA.RegressionLazySingleTarget]
- end
-
- lazar.metadata[OT.parameters] = [
- {DC.title => "dataset_uri", OT.paramValue => dataset_uri},
- {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri},
- {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri},
- {DC.title => "propositionalized", OT.paramValue => propositionalized},
- {DC.title => "pc_type", OT.paramValue => pc_type},
- {DC.title => "nr_hits", OT.paramValue => nr_hits},
- {DC.title => "min_sim", OT.paramValue => min_sim},
- {DC.title => "min_train_performance", OT.paramValue => min_train_performance},
-
- ]
-
- model_uri = lazar.save(@subjectid)
- LOGGER.info model_uri + " created #{Time.now}"
- model_uri
-
- end
- response['Content-Type'] = 'text/uri-list'
- raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
- halt 202,task.uri
-end
-
diff --git a/openbabel.rb b/openbabel.rb
deleted file mode 100644
index 463663e..0000000
--- a/openbabel.rb
+++ /dev/null
@@ -1,148 +0,0 @@
-OBMOL_METHODS = {
- "NumAtoms" => "Number of atoms",
- "NumBonds" => "Number of bonds",
- "NumHvyAtoms" => "Number of heavy atoms",
- "NumResidues" => "Number of residues",
- "NumRotors" => "Number of rotatable bonds",
- "GetEnergy" => "Heat of formation for this molecule (in kcal/mol)",
- "GetMolWt" => "Standard molar mass given by IUPAC atomic masses (amu)",
- "GetExactMass" => "Mass given by isotopes (or most abundant isotope, if not specified)",
- "GetTotalCharge" => "Total charge",
-}
-
-OBDESCRIPTOR_METHODS = {
- "HBA1" => "Number of hydrogen bond acceptors 1 (JoelLib)",
- "HBA2" => "Number of hydrogen bond acceptors 2 (JoelLib)",
- "HBD" => "Number of hydrogen bond donors (JoelLib)",
- "L5" => "Lipinski rule of five",
- "logP" => "Octanol/water partition coefficient",
- "MR" => "Molar refractivity",
- "MW" => "Molecular weight",
- "nF" => "Number of fluorine atoms",
- "nHal" => "Number of halogen atoms",
- "spinMult" => "Total spin multiplicity",
- "TPSA" => "Topological polar surface area",
-}
-
-# Get a list of OpenBabel algorithms
-# @return [text/uri-list] URIs of OpenBabel algorithms
-get '/openbabel' do
- algorithms = OBMOL_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)}
- algorithms << OBDESCRIPTOR_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)}
- response['Content-Type'] = 'text/uri-list'
- algorithms.join("\n")
-end
-
-# Get RDF/XML representation of OpenBabel algorithm
-# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm
-get '/openbabel/:property' do
- description = OBMOL_METHODS[params[:property]] if OBMOL_METHODS.include? params[:property]
- description = OBDESCRIPTOR_METHODS[params[:property]] if OBDESCRIPTOR_METHODS.include? params[:property]
- if description
- algorithm = OpenTox::Algorithm::Generic.new(url_for("/openbabel/#{params[:property]}",:full))
- algorithm.metadata = {
- DC.title => params[:property],
- DC.creator => "helma@in-silico.ch",
- DC.description => description,
- RDF.type => [OTA.DescriptorCalculation],
- }
- response['Content-Type'] = 'application/rdf+xml'
- algorithm.to_rdfxml
- else
- raise OpenTox::NotFoundError.new "Unknown OpenBabel descriptor #{params[:property]}."
- end
-end
-
-# Calculate OpenBabel descriptors
-# Supports the following OpenBabel methods (see OpenBabel API http://openbabel.org/api/2.2.0/)
-# - NumAtoms Number of atoms
-# - NumBonds Number of bonds
-# - NumHvyAtoms Number of heavy atoms
-# - NumResidues Number of residues
-# - NumRotors Number of rotatable bonds
-# - GetEnergy Heat of formation for this molecule (in kcal/mol)
-# - GetMolWt Standard molar mass given by IUPAC atomic masses (amu)
-# - GetExactMass Mass given by isotopes (or most abundant isotope, if not specified)
-# - GetTotalCharge Total charge
-# - HBA1 Number of hydrogen bond acceptors 1 (JoelLib)
-# - HBA2 Number of hydrogen bond acceptors 2 (JoelLib)
-# - HBD Number of hydrogen bond donors (JoelLib)
-# - L5 Lipinski rule of five
-# - logP Octanol/water partition coefficient
-# - MR Molar refractivity
-# - MW Molecular weight
-# - nF Number of fluorine atoms
-# - nHal Number of halogen atoms
-# - spinMult Total spin multiplicity
-# - TPSA Topological polar surface area
-# @param [String] compound_uri Compound URI
-# @return [String] descriptor value
-post '/openbabel/:property' do
- obconversion = OpenBabel::OBConversion.new
- obmol = OpenBabel::OBMol.new
- compound = OpenTox::Compound.new params[:compound_uri]
- obconversion.set_in_and_out_formats 'inchi', 'can'
- obconversion.read_string obmol, compound.to_inchi
- if OBMOL_METHODS.keys.include? params[:property]
- eval("obmol.#{params[:property].underscore}").to_s
- elsif OBDESCRIPTOR_METHODS.keys.include? params[:property]
- descriptor = OpenBabel::OBDescriptor.find_type(params[:property])
- descriptor.predict(obmol).to_s
- else
- raise OpenTox::NotFoundError.new "Cannot calculate property #{params[:property]} with OpenBabel"
- end
-end
-
-# Calculate all OpenBabel descriptors for a dataset
-# @param [String] dataset_uri Dataset URI
-# @return [text/uri-list] Task URI
-post '/openbabel' do
- task = OpenTox::Task.create("Calculating OpenBabel descriptors for #{params[:dataset_uri]}", url_for('/openbabel',:full)) do
-
- dataset = OpenTox::Dataset.find(params[:dataset_uri])
- result_dataset = OpenTox::Dataset.create
- result_dataset.add_metadata({
- DC.title => "OpenBabel descriptors for " + dataset.metadata[DC.title].to_s,
- DC.creator => url_for('/openbabel',:full),
- OT.hasSource => url_for('/openbabel', :full),
- OT.parameters => [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- ]
- })
-
- obconversion = OpenBabel::OBConversion.new
- obmol = OpenBabel::OBMol.new
- obconversion.set_in_and_out_formats 'inchi', 'can'
-
- OBMOL_METHODS.merge(OBDESCRIPTOR_METHODS).each do |name,description|
- feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
- metadata = {
- OT.hasSource => url_for("/openbabel/#{name}", :full),
- DC.description => description,
- DC.title => name,
- }
- result_dataset.add_feature feature_uri, metadata
- end
-
- dataset.compounds.each do |compound_uri|
- compound = OpenTox::Compound.new(compound_uri)
- obconversion.read_string obmol, compound.to_inchi
- #result_dataset.add_compound compound_uri
- OBMOL_METHODS.keys.each do |name|
- feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
- value = eval("obmol.#{name.underscore}").to_f
- result_dataset.add compound_uri, feature_uri, value
- end
- OBDESCRIPTOR_METHODS.keys.each do |name|
- feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
- value = OpenBabel::OBDescriptor.find_type(params[:property]).predict(obmol).to_f
- result_dataset.add compound_uri, feature_uri, value
- end
- end
- result_dataset.save
- result_dataset.uri
- end
- response['Content-Type'] = 'text/uri-list'
- raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
- halt 202,task.uri.to_s+"\n"
-end
diff --git a/similarity.rb b/similarity.rb
deleted file mode 100644
index faf43f9..0000000
--- a/similarity.rb
+++ /dev/null
@@ -1,26 +0,0 @@
-require File.join(File.dirname(__FILE__),'dataset.rb')
-
-helpers do
-def find
- # + charges are dropped
- uri = uri(params[:splat].first.gsub(/(InChI.*) (.*)/,'\1+\2')) # reinsert dropped '+' signs in InChIs
- raise OpenTox::NotFoundError.new "Dataset \"#{uri}\" not found." unless @set = Dataset.find(uri)
-end
-
-def uri(name)
- name = URI.encode(name)
- uri = File.join Dataset.base_uri, name
- end
-end
-
-get '/tanimoto/dataset/*/dataset/*/?' do
- find
- @set.tanimoto(uri(params[:splat][1]))
-end
-
-get '/weighted_tanimoto/dataset/*/dataset/*/?' do
- find
- @set.weighted_tanimoto(uri(params[:splat][1]))
-end
-
-
diff --git a/smarts.rb b/smarts.rb
deleted file mode 100644
index 4ae6949..0000000
--- a/smarts.rb
+++ /dev/null
@@ -1,3 +0,0 @@
-get '/match/compound/*/smarts/*/?' do
- "#{OpenTox::Compound.from_inchi(params[:splat][0]).match?(params[:splat][1])}"
-end