From 9c58dbf3b903f1b2b7343acdf7875fb7115e4bb3 Mon Sep 17 00:00:00 2001 From: mr Date: Fri, 30 Jul 2010 16:01:42 +0200 Subject: Authorization token_id added --- fminer.rb | 3 +++ lazar.rb | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index 9242f9f..6fd0dd1 100644 --- a/fminer.rb +++ b/fminer.rb @@ -42,6 +42,9 @@ post '/fminer/?' do title = "BBRC representatives for " + training_dataset.title feature_dataset.title = title feature_dataset.creator = url_for('/fminer',:full) + feature_dataset.token_id = params[:token_id] if params[:token_id] + feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if request.env["HTTP_TOKEN_ID"] and !feature_dataset.token_id + bbrc_uri = url_for("/fminer#BBRC_representative",:full) feature_dataset.features << bbrc_uri diff --git a/lazar.rb b/lazar.rb index 193cfd0..db29399 100644 --- a/lazar.rb +++ b/lazar.rb @@ -22,9 +22,10 @@ end post '/lazar/?' do # create a model - LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" - LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" - LOGGER.debug "Feature generation: '" + params[:feature_generation_uri].to_s + "'" + LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" + LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" + LOGGER.debug "Feature generation: '" + params[:feature_generation_uri].to_s + "'" + LOGGER.debug "Token ID: #{params[:token_id]}" dataset_uri = "#{params[:dataset_uri]}" begin @@ -54,6 +55,8 @@ post '/lazar/?' do # create a model training_features = OpenTox::Dataset.find(feature_dataset_uri) halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? lazar = OpenTox::Model::Lazar.new + lazar.token_id = params[:token_id] + lazar.token_id = request.env["HTTP_TOKEN_ID"] if request.env["HTTP_TOKEN_ID"] lazar.trainingDataset = dataset_uri lazar.feature_dataset_uri = feature_dataset_uri halt 404, "More than one descriptor type" unless training_features.features.size == 1 -- cgit v1.2.3 From 1523148dba95042aedcdb4f3aeeab52200f91325 Mon Sep 17 00:00:00 2001 From: ist Date: Tue, 24 Aug 2010 18:16:20 +0200 Subject: Added balanced model generation Need wrapper update for utils! --- balancer.rb | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lazar.rb | 53 +++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 balancer.rb diff --git a/balancer.rb b/balancer.rb new file mode 100644 index 0000000..4ed2fd7 --- /dev/null +++ b/balancer.rb @@ -0,0 +1,98 @@ +# cuts a classification dataset into balanced pieces +# let inact_act_ratio := majority_class.size/minority_class.size +# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5 +# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds. + +class Balancer + + attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets + + # Supply a OpenTox::Dataset here + # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given + def initialize(dataset, feature_uri, creator_url) + @act_arr = [] + @inact_arr = [] + @inact_act_ratio = 1.0/0 # trick to define +infinity + @nr_majority_splits = 1 # +/-1 means: no split + @split = [] # splitted arrays with ids + @datasets = [] # result datasets + @errors = [] + + classification = true + if dataset.features.include?(feature_uri) + dataset.data.each do |i,a| + inchi = i + acts = a + acts.each do |act| + value = act[feature_uri] + if OpenTox::Utils.is_true?(value) + @act_arr << inchi + elsif OpenTox::Utils.classification?(value) + @inact_arr << inchi + else + classification = false + break; + end + end + end + @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression + set_nr_majority_splits + # perform majority split + @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1 + @split.each do |s| + new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr) + @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url) + end + + else + errors << "Feature not present in dataset." + end + errors << "Can not split regression dataset." unless classification + end + + + + # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values. + def set_nr_majority_splits + @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression + end + + # does the actual shuffle and split + def shuffle_split (arr) + arr = arr.shuffle + arr.chunk(@nr_majority_splits.abs) + end + + # turns a hash into a 2 col csv + def hsh2csv (hsh) + res="" + hsh.each do |k,v| + arr = [v,(@nr_majority_splits > 0 ? 0 : 1)] + res += arr.join(", ") + "\n" + end + res + end + +end + +class Array + + # cuts an array into chunks - returns a two-dimensional array + def chunk(pieces) + q, r = length.divmod(pieces) + (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \ + .map { |a, b| slice(a...b) } + end + + # shuffles the elements of an array + def shuffle( seed=nil ) + srand seed.to_i if seed + sort_by { Kernel.rand } + end + + # shuffels self + def shuffle!( seed=nil ) + self.replace shuffle( seed ) + end + +end diff --git a/lazar.rb b/lazar.rb index 2c67298..4920f51 100644 --- a/lazar.rb +++ b/lazar.rb @@ -112,3 +112,56 @@ post '/lazar/?' do # create a model end halt 202,task_uri end + + +# AM: Balancer wraps around /lazar +post '/lazar-balanced/?' do # create a balanced model + LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" + LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" + LOGGER.debug "Feature generation: '" + params[:feature_generation_uri].to_s + "'" + dataset_uri = "#{params[:dataset_uri]}" + + begin + training_activities = OpenTox::Dataset.find(dataset_uri) + rescue + halt 404, "Dataset #{dataset_uri} not found" + end + + halt 404, "No prediction_feature parameter." unless params[:prediction_feature] + halt 404, "No feature_generation_uri parameter." unless params[:feature_generation_uri] + halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+ + training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature]) + + response['Content-Type'] = 'text/uri-list' + task_uri = OpenTox::Task.as_task do |task| + + # Split the dataset + bal = Balancer.new(training_activities, params[:prediction_feature], training_activities.creator) + balanced_datasets = [] + if bal.datasets.size > 0 + balanced_datasets = bal.datasets + end + + model_uris = [] + if balanced_datasets.size == 0 + mtu = OpenTox::Algorithm::Lazar.create_model(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature]) + t = OpenTox::Task.find(mtu) + t.wait_for_completion + model_uris << t.resultURI + else + balanced_datasets.each do |bd| + mtu = OpenTox::Algorithm::Lazar.create_model(:dataset_uri => bd, :prediction_feature => params[:prediction_feature]) + t = OpenTox::Task.find(mtu) + t.wait_for_completion + model_uris << t.resultURI + end + end + lazar = OpenTox::Model::Lazar.new + lazar.models = model_uris + + model_uri = lazar.save + LOGGER.info model_uri + " created #{Time.now}" + model_uri + end + halt 202,task_uri +end -- cgit v1.2.3 From b209d9da0673f9e9e4af0c5c90e8649e2154f83c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 25 Aug 2010 14:49:33 +0200 Subject: opentox-api-wrapper bumped to 1.6.6 --- application.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application.rb b/application.rb index c0363d5..d2a21c6 100644 --- a/application.rb +++ b/application.rb @@ -1,6 +1,6 @@ require 'rubygems' require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems -gem "opentox-ruby-api-wrapper", "= 1.6.5" +gem "opentox-ruby-api-wrapper", "= 1.6.6" require 'opentox-ruby-api-wrapper' #require 'smarts.rb' -- cgit v1.2.3 From ed505de7971dd8d6fb97b234a943fd1ea1c04f32 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 13 Sep 2010 17:09:37 +0200 Subject: intermediary commit for new owl serializer --- fminer.rb | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fminer.rb b/fminer.rb index 15379fd..f6f3e17 100644 --- a/fminer.rb +++ b/fminer.rb @@ -7,12 +7,19 @@ get '/fminer/?' do owl = OpenTox::Owl.create 'Algorithm', url_for('/fminer',:full) owl.set 'title',"fminer" owl.set 'creator',"http://github.com/amaunz/fminer2" - owl.parameters = { - "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri" }, - "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" } - } + owl.set_data( { + "parameters" => [ + { "title" => "Dataset URI", "paramScope" => "mandatory", "paramValue" => "dataset_uri" }, + { "title" => "Feature URI for dependent variable", "paramScope" => "mandatory", "paramValue" => "feature_uri" } + ] + } ) + +# owl.parameters = { +# "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri" }, +# "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" } +# } rdf = owl.rdf - File.open('public/fminer.owl', 'w') {|f| f.print rdf} + #File.open('public/fminer.owl', 'w') {|f| f.print rdf} response['Content-Type'] = 'application/rdf+xml' rdf end -- cgit v1.2.3 From 19dd7247be22e637419d79406041a4548b169c2c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 14 Sep 2010 20:52:04 +0200 Subject: simplified version of OwlSerializer, RDF/XML for algorithm/lazar, RDF/XML for annotations,compounds, features in dataset --- fminer.rb | 18 +++++++++--------- lazar.rb | 51 +++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/fminer.rb b/fminer.rb index f6f3e17..30d0d9a 100644 --- a/fminer.rb +++ b/fminer.rb @@ -4,15 +4,15 @@ ENV['FMINER_PVALUES'] = 'true' @@fminer = Bbrc::Bbrc.new get '/fminer/?' do - owl = OpenTox::Owl.create 'Algorithm', url_for('/fminer',:full) - owl.set 'title',"fminer" - owl.set 'creator',"http://github.com/amaunz/fminer2" - owl.set_data( { - "parameters" => [ - { "title" => "Dataset URI", "paramScope" => "mandatory", "paramValue" => "dataset_uri" }, - { "title" => "Feature URI for dependent variable", "paramScope" => "mandatory", "paramValue" => "feature_uri" } - ] - } ) + owl = OpenTox::OwlSerializer.create 'Algorithm', url_for('/fminer',:full) + owl.annotate 'title',"fminer" + owl.annotate 'creator',"http://github.com/amaunz/fminer2" +# owl.set_data( { +# "parameters" => [ +# { "title" => "Dataset URI", "paramScope" => "mandatory", "paramValue" => "dataset_uri" }, +# { "title" => "Feature URI for dependent variable", "paramScope" => "mandatory", "paramValue" => "feature_uri" } +# ] +# } ) # owl.parameters = { # "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri" }, diff --git a/lazar.rb b/lazar.rb index d037fbc..b9163f4 100644 --- a/lazar.rb +++ b/lazar.rb @@ -1,19 +1,42 @@ get '/lazar/?' do - owl = OpenTox::Owl.create 'Algorithm', url_for('/lazar',:full) - owl.set 'title',"lazar" - owl.set 'creator',"http://github.com/helma/opentox-algorithm" - owl.parameters = { - "Dataset URI" => - { :scope => "mandatory", :value => "dataset_uri" }, - "Feature URI for dependent variable" => - { :scope => "mandatory", :value => "prediction_feature" }, - "Feature generation URI" => - { :scope => "mandatory", :value => "feature_generation_uri" } - } - rdf = owl.rdf - File.open('public/lazar.owl', 'w') {|f| f.print rdf} + uri = url_for('/lazar',:full) + owl = OpenTox::OwlSerializer.create 'Algorithm', uri + owl.annotation_property uri, DC.creator, "helma@in-silico.ch", XSD.string + owl.annotation_property uri, DC.contributor, "andreas@maunz.de", XSD.string + owl.annotation_property uri, DC.title, "lazar", XSD.string + owl.annotation_property uri, DC.source, "http://github.com/helma/opentox-algorithm", XSD.anyUri + owl.object_property uri, OT.parameters, File.join(uri,"dataset_uri"), XSD.anyUri + owl.object_property uri, OT.parameters, File.join(uri,"prediction_feature"), XSD.anyUri + owl.object_property uri, OT.parameters, File.join(uri,"feature_generation_uri"), XSD.anyUri response['Content-Type'] = 'application/rdf+xml' - rdf + owl.rdf +end + +get '/lazar/prediction_feature?' do + uri = url_for('/lazar/prediction_feature',:full) + owl = OpenTox::OwlSerializer.create 'Parameter', uri + owl.annotation_property uri, DC.description, "URI of the feature to be predicted", XSD.string + owl.annotation_property uri, OT.paramScope, "mandatory", XSD.string + response['Content-Type'] = 'application/rdf+xml' + owl.rdf +end + +get '/lazar/feature_generation_uri?' do + uri = url_for('/lazar/feature_generation_uri',:full) + owl = OpenTox::OwlSerializer.create 'Parameter', uri + owl.annotation_property uri, DC.description, "URI of the feature_generation_algorithm", XSD.string + owl.annotation_property uri, OT.paramScope, "mandatory", XSD.string + response['Content-Type'] = 'application/rdf+xml' + owl.rdf +end + +get '/lazar/dataset_uri?' do + uri = url_for('/lazar/dataset_uri',:full) + owl = OpenTox::OwlSerializer.create 'Parameter', uri + owl.annotation_property uri, DC.description, "URI of the training dataset", XSD.string + owl.annotation_property uri, OT.paramScope, "mandatory", XSD.string + response['Content-Type'] = 'application/rdf+xml' + owl.rdf end post '/lazar/?' do # create a model -- cgit v1.2.3 From dc4bdddee0b759865eee2e562304c18c1e9f1292 Mon Sep 17 00:00:00 2001 From: ch Date: Sun, 19 Sep 2010 08:25:34 +0200 Subject: javascript for qmrf editor (has to be fixed) --- fminer.rb | 1 + libfminer | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 15379fd..9300bac 100644 --- a/fminer.rb +++ b/fminer.rb @@ -93,6 +93,7 @@ post '/fminer/?' do g_array=g_hash.values # DV: calculation of global median for effect calculation g_median=OpenTox::Utils.median(g_array) minfreq = (0.02*id).round + #minfreq = 5 @@fminer.SetMinfreq(minfreq) LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" diff --git a/libfminer b/libfminer index 5a97d00..2c2b517 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 5a97d006e0ccfc48e53d5f24842a898ec9e912e2 +Subproject commit 2c2b5177a8764d155fab2e12ce63457776f45d5e -- cgit v1.2.3 From 2e5d4c08b1136f774e5e5de885d851540a20898e Mon Sep 17 00:00:00 2001 From: mr Date: Fri, 15 Oct 2010 17:29:23 +0200 Subject: code cleaning --- fminer.rb | 2 +- lazar.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index e894f0b..867ea0f 100644 --- a/fminer.rb +++ b/fminer.rb @@ -40,7 +40,7 @@ post '/fminer/?' do feature_dataset.title = title feature_dataset.creator = url_for('/fminer',:full) feature_dataset.token_id = params[:token_id] if params[:token_id] - feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if request.env["HTTP_TOKEN_ID"] and !feature_dataset.token_id + feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] bbrc_uri = url_for("/fminer#BBRC_representative",:full) feature_dataset.features << bbrc_uri diff --git a/lazar.rb b/lazar.rb index 64175be..649c190 100644 --- a/lazar.rb +++ b/lazar.rb @@ -51,8 +51,8 @@ post '/lazar/?' do # create a model training_features = OpenTox::Dataset.find(feature_dataset_uri) halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? lazar = OpenTox::Model::Lazar.new - lazar.token_id = params[:token_id] - lazar.token_id = request.env["HTTP_TOKEN_ID"] if request.env["HTTP_TOKEN_ID"] + lazar.token_id = params[:token_id] if params[:token_id] + lazar.token_id = request.env["HTTP_TOKEN_ID"] if !lazar.token_id and request.env["HTTP_TOKEN_ID"] lazar.trainingDataset = dataset_uri lazar.feature_dataset_uri = feature_dataset_uri halt 404, "More than one descriptor type" unless training_features.features.size == 1 -- cgit v1.2.3 From 20887f5aca9a684169bae2c1893df77cc9783f77 Mon Sep 17 00:00:00 2001 From: ist Date: Mon, 18 Oct 2010 18:39:18 +0200 Subject: Rooted LastPM integration --- Rakefile | 15 +++++++++++++-- application.rb | 1 + libfminer | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Rakefile b/Rakefile index 70139f4..e92dbb2 100644 --- a/Rakefile +++ b/Rakefile @@ -13,11 +13,22 @@ namespace "fminer" do puts `git pull` puts `./configure` if $? == 0 - puts `echo "Fminer successfully configured."` + puts `echo "Fminer/LibBbrc successfully configured."` else - puts `echo "Fminer configuration failed!"` + puts `echo "Fminer/LibBbrc configuration failed!"` exit end + puts `make ruby` + Dir.chdir('../liblast') + puts `git checkout master` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibLast successfully configured."` + else + puts `echo "Fminer/LibLast configuration failed!"` + exit + end puts `make ruby` end diff --git a/application.rb b/application.rb index c0363d5..6eb65e5 100644 --- a/application.rb +++ b/application.rb @@ -1,5 +1,6 @@ require 'rubygems' require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems +require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems gem "opentox-ruby-api-wrapper", "= 1.6.5" require 'opentox-ruby-api-wrapper' diff --git a/libfminer b/libfminer index 2c2b517..e955cc6 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 2c2b5177a8764d155fab2e12ce63457776f45d5e +Subproject commit e955cc6b24d577d7187e5660716ee69d12174a8a -- cgit v1.2.3 From 06ecc8947efd544a9b008fdc861ee510f6f0cd74 Mon Sep 17 00:00:00 2001 From: ist Date: Tue, 19 Oct 2010 12:50:59 +0200 Subject: BBRC as default descriptors --- fminer.rb | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 142 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index 9300bac..d48b363 100644 --- a/fminer.rb +++ b/fminer.rb @@ -1,7 +1,6 @@ ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' -@@fminer = Bbrc::Bbrc.new get '/fminer/?' do owl = OpenTox::Owl.create 'Algorithm', url_for('/fminer',:full) @@ -17,8 +16,148 @@ get '/fminer/?' do rdf end -post '/fminer/?' do - + +['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default + post path do + @@fminer = Bbrc::Bbrc.new + halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? + LOGGER.debug "Dataset: " + params[:dataset_uri] + LOGGER.debug "Endpoint: " + params[:feature_uri] + feature_uri = params[:feature_uri] + begin + LOGGER.debug "Retrieving #{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + rescue + LOGGER.error "Dataset #{params[:dataset_uri]} not found" + halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil? + end + halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) + + task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer',:full)) do + + feature_dataset = OpenTox::Dataset.new + title = "BBRC representatives for " + training_dataset.title + feature_dataset.title = title + feature_dataset.creator = url_for('/fminer',:full) + bbrc_uri = url_for("/fminer#BBRC_representative",:full) + feature_dataset.features << bbrc_uri + + id = 1 # fminer start id is not 0 + compounds = [] + + g_hash = Hash.new# DV: for effect calculation in regression part + @@fminer.Reset + #@@fminer.SetChisqSig(0.99) + LOGGER.debug "Fminer: initialising ..." + training_dataset.data.each do |c,features| + begin + smiles = OpenTox::Compound.new(:uri => c.to_s).smiles + rescue + LOGGER.warn "No resource for #{c.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{c.to_s}." + else + feature_dataset.compounds << c.to_s + features.each do |feature| + act = feature[feature_uri] + if act.nil? + LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}." + else + case act.to_s + when "true" + #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s + activity = 1 + when "false" + #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s + activity = 0 + else + # AM: add quantitative activity + activity = act.to_f + @@fminer.SetRegression(true) + end + compounds[id] = c.to_s + begin + @@fminer.AddCompound(smiles,id) + @@fminer.AddActivity(activity, id) + g_hash[id]=activity # DV: insert global information + rescue + LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" + end + end + end + id += 1 + end + end + g_array=g_hash.values # DV: calculation of global median for effect calculation + g_median=OpenTox::Utils.median(g_array) + minfreq = (0.02*id).round + #minfreq = 5 + @@fminer.SetMinfreq(minfreq) + LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" + + raise "no compounds" if compounds.size==0 + + values = {} + # run @@fminer + LOGGER.debug "Fminer: mining ..." + (0 .. @@fminer.GetNoRootNodes()-1).each do |j| + results = @@fminer.MineRoot(j) + results.each do |result| + f = YAML.load(result)[0] + smarts = f[0] + p_value = f[1] + # AM: f[3] missing on regression + if (!@@fminer.GetRegression) + ids = f[2] + f[3] + if f[2].size > f[3].size + effect = 'activating' + else + effect = 'deactivating' + end + else #regression part + ids = f[2] + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + f_arr.push(g_hash[id]) + end + f_median=OpenTox::Utils.median(f_arr) + if g_median >= f_median + effect = 'activating' + else + effect = 'deactivating' + end + end + + tuple = { + url_for('/fminer#smarts',:full) => smarts, + url_for('/fminer#p_value',:full) => p_value.to_f, + url_for('/fminer#effect',:full) => effect + } + #LOGGER.debug "#{f[0]}\t#{f[1]}\t#{effect}" + ids.each do |id| + feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] + feature_dataset.data[compounds[id]] << {bbrc_uri => tuple} + end + end + end + + uri = feature_dataset.save + LOGGER.debug "Fminer finished, dataset #{uri} created." + uri + end + LOGGER.debug "Fimer task started: "+task_uri.to_s + response['Content-Type'] = 'text/uri-list' + halt 202,task_uri.to_s+"\n" + end +end + + +post '/fminer/last/?' do + @@fminer = Last::Last.new halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? LOGGER.debug "Dataset: " + params[:dataset_uri] -- cgit v1.2.3 From 2830e18e3b6ac0f4073bc5fbd70880d7a93e4a2a Mon Sep 17 00:00:00 2001 From: ist Date: Tue, 19 Oct 2010 14:58:58 +0200 Subject: Added submodule --- .gitmodules | 3 +++ fminer.rb | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.gitmodules b/.gitmodules index 3330d61..90a96ff 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "libfminer"] path = libfminer url = http://github.com/amaunz/fminer2.git +[submodule "last-utils"] + path = last-utils + url = git://github.com/amaunz/last-utils diff --git a/fminer.rb b/fminer.rb index d48b363..6ba2834 100644 --- a/fminer.rb +++ b/fminer.rb @@ -172,14 +172,14 @@ post '/fminer/last/?' do end halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) - task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer',:full)) do + task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer/last',:full)) do feature_dataset = OpenTox::Dataset.new - title = "BBRC representatives for " + training_dataset.title + title = "LAST-PM descriptors for " + training_dataset.title feature_dataset.title = title - feature_dataset.creator = url_for('/fminer',:full) - bbrc_uri = url_for("/fminer#BBRC_representative",:full) - feature_dataset.features << bbrc_uri + feature_dataset.creator = url_for('/fminer/last',:full) + last_uri = url_for("/fminer#LAST-PM_descriptors",:full) + feature_dataset.features << last_uri id = 1 # fminer start id is not 0 compounds = [] @@ -278,7 +278,7 @@ post '/fminer/last/?' do #LOGGER.debug "#{f[0]}\t#{f[1]}\t#{effect}" ids.each do |id| feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] - feature_dataset.data[compounds[id]] << {bbrc_uri => tuple} + feature_dataset.data[compounds[id]] << {last_uri => tuple} end end end -- cgit v1.2.3 From 12e1e26055bcb99d0b17e23803b6a28c467ea37b Mon Sep 17 00:00:00 2001 From: ist Date: Tue, 19 Oct 2010 15:05:07 +0200 Subject: Restored gitmodules --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 90a96ff..75218e9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = http://github.com/amaunz/fminer2.git [submodule "last-utils"] path = last-utils - url = git://github.com/amaunz/last-utils + url = git://github.com/amaunz/last-utils.git -- cgit v1.2.3 From 5323cfea63f491af2bfcb87aef06b5386c90c67b Mon Sep 17 00:00:00 2001 From: ist Date: Tue, 19 Oct 2010 15:05:33 +0200 Subject: Updated gitmodules --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 75218e9..3330d61 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "libfminer"] path = libfminer url = http://github.com/amaunz/fminer2.git -[submodule "last-utils"] - path = last-utils - url = git://github.com/amaunz/last-utils.git -- cgit v1.2.3 From cf13d2614a70af7e1b466166b12e823d58a18bbf Mon Sep 17 00:00:00 2001 From: ist Date: Tue, 19 Oct 2010 15:48:32 +0200 Subject: Added submodule last-utils --- .gitmodules | 3 ++ Rakefile | 105 ++++++++++++++++++++++++++++++++++----------------------- application.rb | 1 + last-utils | 1 + 4 files changed, 67 insertions(+), 43 deletions(-) create mode 160000 last-utils diff --git a/.gitmodules b/.gitmodules index 3330d61..75218e9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "libfminer"] path = libfminer url = http://github.com/amaunz/fminer2.git +[submodule "last-utils"] + path = last-utils + url = git://github.com/amaunz/last-utils.git diff --git a/Rakefile b/Rakefile index e92dbb2..588236b 100644 --- a/Rakefile +++ b/Rakefile @@ -4,52 +4,71 @@ require 'opentox-ruby-api-wrapper' #require 'tasks/opentox' namespace "fminer" do - desc "Install required gems and fminer" - task :install do - puts `git submodule init` - puts `git submodule update` - Dir.chdir('libfminer/libbbrc') - puts `git checkout master` - puts `git pull` - puts `./configure` - if $? == 0 - puts `echo "Fminer/LibBbrc successfully configured."` - else - puts `echo "Fminer/LibBbrc configuration failed!"` - exit - end - puts `make ruby` - Dir.chdir('../liblast') - puts `git checkout master` - puts `git pull` - puts `./configure` - if $? == 0 - puts `echo "Fminer/LibLast successfully configured."` - else - puts `echo "Fminer/LibLast configuration failed!"` - exit - end - puts `make ruby` - end + desc "Install required gems and fminer" + task :install do + puts `git submodule init` + puts `git submodule update` + Dir.chdir('libfminer/libbbrc') + puts `git checkout master` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibBbrc successfully configured."` + else + puts `echo "Fminer/LibBbrc configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../liblast') + puts `git checkout master` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibLast successfully configured."` + else + puts `echo "Fminer/LibLast configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../../last-utils') + puts `git fetch` + puts `git checkout -b experimental origin/experimental` + puts `git pull` + end - desc "Update gems and fminer" - task :update do - puts `git submodule update` - Dir.chdir('libfminer/libbbrc') - puts `git checkout master` - puts `git pull` - puts `./configure` - if $? == 0 - puts `echo "Fminer successfully configured."` - else - puts `echo "Fminer configuration failed!"` - exit - end - puts `make ruby` - end + desc "Update gems and fminer" + task :update do + puts `git submodule update` + Dir.chdir('libfminer/libbbrc') + puts `git checkout master` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibBbrc successfully configured."` + else + puts `echo "Fminer/LibBbrc configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../liblast') + puts `git checkout master` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibLast successfully configured."` + else + puts `echo "Fminer/LibLast configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../../last-utils') + puts `git fetch` + puts `git checkout -b experimental origin/experimental` + puts `git pull` + end end desc "Run tests" task :test do - load 'test/test.rb' + load 'test/test.rb' end diff --git a/application.rb b/application.rb index 6eb65e5..3d511d1 100644 --- a/application.rb +++ b/application.rb @@ -1,6 +1,7 @@ require 'rubygems' require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems +require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST gem "opentox-ruby-api-wrapper", "= 1.6.5" require 'opentox-ruby-api-wrapper' diff --git a/last-utils b/last-utils new file mode 160000 index 0000000..a678d63 --- /dev/null +++ b/last-utils @@ -0,0 +1 @@ +Subproject commit a678d63a5ccc7a1b5375406a3da66f8bd325141d -- cgit v1.2.3 From 79e426f4f3723a28a820b85a42924fe4322ab49a Mon Sep 17 00:00:00 2001 From: ist Date: Wed, 20 Oct 2010 16:21:05 +0200 Subject: Finished basic integration of LAST-PM webservice TODOs: - implement (de)activating facility in ruby (ch has still the bug for BBRCs in his code) - implement p-value facility in ruby - solve library 'require' issue: need completely separate namespaces or completely same ns and try to re-require after deleting from $''. - Merge last-utils 'experimental' to 'master' --- application.rb | 2 - fminer.rb | 242 +++++++++++++++++++++++++++------------------------------ last-utils | 2 +- libfminer | 2 +- 4 files changed, 115 insertions(+), 133 deletions(-) diff --git a/application.rb b/application.rb index 3d511d1..5aecfbb 100644 --- a/application.rb +++ b/application.rb @@ -1,6 +1,4 @@ require 'rubygems' -require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems -require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST gem "opentox-ruby-api-wrapper", "= 1.6.5" require 'opentox-ruby-api-wrapper' diff --git a/fminer.rb b/fminer.rb index 6ba2834..8cbf490 100644 --- a/fminer.rb +++ b/fminer.rb @@ -12,13 +12,14 @@ get '/fminer/?' do } rdf = owl.rdf File.open('public/fminer.owl', 'w') {|f| f.print rdf} - response['Content-Type'] = 'application/rdf+xml' - rdf + response['Content-Type'] = 'application/rdf+xml' + rdf end ['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default post path do + require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems @@fminer = Bbrc::Bbrc.new halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? @@ -50,7 +51,7 @@ end @@fminer.Reset #@@fminer.SetChisqSig(0.99) LOGGER.debug "Fminer: initialising ..." - training_dataset.data.each do |c,features| + training_dataset.data.each do |c,features| begin smiles = OpenTox::Compound.new(:uri => c.to_s).smiles rescue @@ -119,17 +120,17 @@ end end else #regression part ids = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - f_arr.push(g_hash[id]) - end - f_median=OpenTox::Utils.median(f_arr) - if g_median >= f_median + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + f_arr.push(g_hash[id]) + end + f_median=OpenTox::Utils.median(f_arr) + if g_median >= f_median effect = 'activating' else effect = 'deactivating' - end + end end tuple = { @@ -157,137 +158,120 @@ end post '/fminer/last/?' do + require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems @@fminer = Last::Last.new - halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? - LOGGER.debug "Dataset: " + params[:dataset_uri] - LOGGER.debug "Endpoint: " + params[:feature_uri] - feature_uri = params[:feature_uri] - begin - LOGGER.debug "Retrieving #{params[:dataset_uri]}" - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" - rescue - LOGGER.error "Dataset #{params[:dataset_uri]} not found" - halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil? - end - halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) + halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? + LOGGER.debug "Dataset: " + params[:dataset_uri] + LOGGER.debug "Endpoint: " + params[:feature_uri] + feature_uri = params[:feature_uri] + begin + LOGGER.debug "Retrieving #{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + rescue + LOGGER.error "Dataset #{params[:dataset_uri]} not found" + halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil? + end + halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer/last',:full)) do - feature_dataset = OpenTox::Dataset.new - title = "LAST-PM descriptors for " + training_dataset.title - feature_dataset.title = title - feature_dataset.creator = url_for('/fminer/last',:full) - last_uri = url_for("/fminer#LAST-PM_descriptors",:full) - feature_dataset.features << last_uri + feature_dataset = OpenTox::Dataset.new + title = "LAST-PM descriptors for " + training_dataset.title + feature_dataset.title = title + feature_dataset.creator = url_for('/fminer/last',:full) + last_uri = url_for("/fminer#LAST-PM_descriptors",:full) + feature_dataset.features << last_uri - id = 1 # fminer start id is not 0 - compounds = [] + id = 1 # fminer start id is not 0 + compounds = [] + smi = [] # AM LAST: needed for matching the patterns back g_hash = Hash.new# DV: for effect calculation in regression part - @@fminer.Reset + @@fminer.Reset #@@fminer.SetChisqSig(0.99) - LOGGER.debug "Fminer: initialising ..." - training_dataset.data.each do |c,features| - begin - smiles = OpenTox::Compound.new(:uri => c.to_s).smiles - rescue - LOGGER.warn "No resource for #{c.to_s}" - next - end - if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{c.to_s}." - else - feature_dataset.compounds << c.to_s - features.each do |feature| - act = feature[feature_uri] - if act.nil? - LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}." - else - case act.to_s - when "true" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s - activity = 1 - when "false" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s - activity = 0 - else - # AM: add quantitative activity - activity = act.to_f - @@fminer.SetRegression(true) - end - compounds[id] = c.to_s - begin - @@fminer.AddCompound(smiles,id) - @@fminer.AddActivity(activity, id) + LOGGER.debug "Fminer: initialising ..." + training_dataset.data.each do |c,features| + begin + smiles = OpenTox::Compound.new(:uri => c.to_s).smiles + rescue + LOGGER.warn "No resource for #{c.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{c.to_s}." + else + feature_dataset.compounds << c.to_s + features.each do |feature| + act = feature[feature_uri] + if act.nil? + LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}." + else + case act.to_s + when "true" + #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s + activity = 1 + when "false" + #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s + activity = 0 + else + # AM: add quantitative activity + activity = act.to_f + @@fminer.SetRegression(true) + end + compounds[id] = c.to_s + smi[id] = smiles # AM LAST: changed this to store SMILES. + begin + @@fminer.AddCompound(smiles,id) + @@fminer.AddActivity(activity, id) g_hash[id]=activity # DV: insert global information - rescue - LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" - end - end - end - id += 1 - end - end + rescue + LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" + end + end + end + id += 1 + end + end g_array=g_hash.values # DV: calculation of global median for effect calculation g_median=OpenTox::Utils.median(g_array) - minfreq = (0.02*id).round - #minfreq = 5 - @@fminer.SetMinfreq(minfreq) - LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" - + minfreq = (0.02*id).round + #minfreq = 5 + @@fminer.SetMinfreq(minfreq) + LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" raise "no compounds" if compounds.size==0 + # run @@fminer + LOGGER.debug "Fminer: mining ..." + @@fminer.SetConsoleOut(false) + xml = "" + (0 .. @@fminer.GetNoRootNodes()-1).each do |j| + results = @@fminer.MineRoot(j) + results.each do |result| + xml << result + end + end + lu = LU.new + dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!) + smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) + instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations - values = {} - # run @@fminer - LOGGER.debug "Fminer: mining ..." - (0 .. @@fminer.GetNoRootNodes()-1).each do |j| - results = @@fminer.MineRoot(j) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - # AM: f[3] missing on regression - if (!@@fminer.GetRegression) - ids = f[2] + f[3] - if f[2].size > f[3].size - effect = 'activating' - else - effect = 'deactivating' - end - else #regression part - ids = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - f_arr.push(g_hash[id]) - end - f_median=OpenTox::Utils.median(f_arr) - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end - - tuple = { - url_for('/fminer#smarts',:full) => smarts, - url_for('/fminer#p_value',:full) => p_value.to_f, - url_for('/fminer#effect',:full) => effect - } - #LOGGER.debug "#{f[0]}\t#{f[1]}\t#{effect}" - ids.each do |id| - feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] - feature_dataset.data[compounds[id]] << {last_uri => tuple} - end - end - end + instances.each do |smarts, ids| + tuple = { + url_for('/fminer#smarts',:full) => smarts, + url_for('/fminer#p_value',:full) => nil, # AM LAST: TODO + url_for('/fminer#effect',:full) => nil # AM LAST: TODO + } + ids.each do |id| + feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] + feature_dataset.data[compounds[id]] << {last_uri => tuple} + end + end - uri = feature_dataset.save - LOGGER.debug "Fminer finished, dataset #{uri} created." + uri = feature_dataset.save + LOGGER.debug "Fminer finished, dataset #{uri} created." uri - end - LOGGER.debug "Fimer task started: "+task_uri.to_s - response['Content-Type'] = 'text/uri-list' - halt 202,task_uri.to_s+"\n" + end + LOGGER.debug "Fimer task started: "+task_uri.to_s + response['Content-Type'] = 'text/uri-list' + halt 202,task_uri.to_s+"\n" end diff --git a/last-utils b/last-utils index a678d63..d2ad4f2 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit a678d63a5ccc7a1b5375406a3da66f8bd325141d +Subproject commit d2ad4f2bb82fdb5433d3f739400244ba89f07860 diff --git a/libfminer b/libfminer index e955cc6..00ce7e8 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit e955cc6b24d577d7187e5660716ee69d12174a8a +Subproject commit 00ce7e88f4f1ca16030a9b7bdabf216b5402f9ce -- cgit v1.2.3 From ef97aadda700a51384b7a72a63e28fbce116e50c Mon Sep 17 00:00:00 2001 From: ist Date: Thu, 28 Oct 2010 15:42:54 +0200 Subject: Completed integration of LAST-PM --- Rakefile | 28 ++++++++++++++++++++++------ application.rb | 2 ++ fminer.rb | 19 ++++++++----------- libfminer | 2 +- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/Rakefile b/Rakefile index 588236b..0c72015 100644 --- a/Rakefile +++ b/Rakefile @@ -9,7 +9,11 @@ namespace "fminer" do puts `git submodule init` puts `git submodule update` Dir.chdir('libfminer/libbbrc') - puts `git checkout master` + puts `git checkout Makefile` + #puts `git checkout master` + puts `git fetch` + puts `git checkout -f -b refactor origin/refactor` + puts `git checkout refactor` puts `git pull` puts `./configure` if $? == 0 @@ -20,7 +24,11 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../liblast') - puts `git checkout master` + puts `git checkout Makefile` + #puts `git checkout master` + puts `git fetch` + puts `git checkout -f -b refactor origin/refactor` + puts `git checkout refactor` puts `git pull` puts `./configure` if $? == 0 @@ -32,7 +40,7 @@ namespace "fminer" do puts `make ruby` Dir.chdir('../../last-utils') puts `git fetch` - puts `git checkout -b experimental origin/experimental` + puts `git checkout -f -b experimental origin/experimental` puts `git pull` end @@ -40,7 +48,11 @@ namespace "fminer" do task :update do puts `git submodule update` Dir.chdir('libfminer/libbbrc') - puts `git checkout master` + puts `git checkout Makefile` + #puts `git checkout master` + puts `git fetch` + puts `git checkout -f -b refactor origin/refactor` + puts `git checkout refactor` puts `git pull` puts `./configure` if $? == 0 @@ -51,7 +63,11 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../liblast') - puts `git checkout master` + puts `git checkout Makefile` + #puts `git checkout master` + puts `git fetch` + puts `git checkout -f -b refactor origin/refactor` + puts `git checkout refactor` puts `git pull` puts `./configure` if $? == 0 @@ -63,7 +79,7 @@ namespace "fminer" do puts `make ruby` Dir.chdir('../../last-utils') puts `git fetch` - puts `git checkout -b experimental origin/experimental` + puts `git checkout -f -b experimental origin/experimental` puts `git pull` end end diff --git a/application.rb b/application.rb index 5aecfbb..3d511d1 100644 --- a/application.rb +++ b/application.rb @@ -1,4 +1,6 @@ require 'rubygems' +require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems +require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST gem "opentox-ruby-api-wrapper", "= 1.6.5" require 'opentox-ruby-api-wrapper' diff --git a/fminer.rb b/fminer.rb index 8cbf490..b706fb0 100644 --- a/fminer.rb +++ b/fminer.rb @@ -19,7 +19,6 @@ end ['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default post path do - require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems @@fminer = Bbrc::Bbrc.new halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? @@ -150,7 +149,7 @@ end LOGGER.debug "Fminer finished, dataset #{uri} created." uri end - LOGGER.debug "Fimer task started: "+task_uri.to_s + LOGGER.debug "Fminer task started: "+task_uri.to_s response['Content-Type'] = 'text/uri-list' halt 202,task_uri.to_s+"\n" end @@ -158,7 +157,6 @@ end post '/fminer/last/?' do - require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems @@fminer = Last::Last.new halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? @@ -187,7 +185,7 @@ post '/fminer/last/?' do compounds = [] smi = [] # AM LAST: needed for matching the patterns back - g_hash = Hash.new# DV: for effect calculation in regression part + all_hash = Hash.new# DV: for effect calculation in regression part @@fminer.Reset #@@fminer.SetChisqSig(0.99) LOGGER.debug "Fminer: initialising ..." @@ -224,7 +222,7 @@ post '/fminer/last/?' do begin @@fminer.AddCompound(smiles,id) @@fminer.AddActivity(activity, id) - g_hash[id]=activity # DV: insert global information + all_hash[id]=activity # DV: insert global information rescue LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" end @@ -233,8 +231,6 @@ post '/fminer/last/?' do id += 1 end end - g_array=g_hash.values # DV: calculation of global median for effect calculation - g_median=OpenTox::Utils.median(g_array) minfreq = (0.02*id).round #minfreq = 5 @@fminer.SetMinfreq(minfreq) @@ -254,12 +250,13 @@ post '/fminer/last/?' do dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!) smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations - instances.each do |smarts, ids| + feat_hash = Hash[*(all_hash.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax + @@fminer.GetRegression() ? p_value = @@fminer.KSTest(all_hash.values, feat_hash.values).to_f : p_value = @@fminer.ChisqTest(all_hash.values, feat_hash.values).to_f tuple = { url_for('/fminer#smarts',:full) => smarts, - url_for('/fminer#p_value',:full) => nil, # AM LAST: TODO - url_for('/fminer#effect',:full) => nil # AM LAST: TODO + url_for('/fminer#p_value',:full) => p_value.abs, + url_for('/fminer#effect',:full) => ((p_value>0)?'activating':'deactivating') } ids.each do |id| feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] @@ -271,7 +268,7 @@ post '/fminer/last/?' do LOGGER.debug "Fminer finished, dataset #{uri} created." uri end - LOGGER.debug "Fimer task started: "+task_uri.to_s + LOGGER.debug "Fminer task started: "+task_uri.to_s response['Content-Type'] = 'text/uri-list' halt 202,task_uri.to_s+"\n" end diff --git a/libfminer b/libfminer index 00ce7e8..e0eee43 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 00ce7e88f4f1ca16030a9b7bdabf216b5402f9ce +Subproject commit e0eee431ecb954328ff64e3cc48840c7003a2769 -- cgit v1.2.3 From 5a71ee53f9b2ab55eded49894269a60030645061 Mon Sep 17 00:00:00 2001 From: ist Date: Thu, 28 Oct 2010 15:51:20 +0200 Subject: Comments in code --- Rakefile | 14 +++++++++----- application.rb | 1 + fminer.rb | 6 +++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Rakefile b/Rakefile index 0c72015..9610315 100644 --- a/Rakefile +++ b/Rakefile @@ -10,7 +10,7 @@ namespace "fminer" do puts `git submodule update` Dir.chdir('libfminer/libbbrc') puts `git checkout Makefile` - #puts `git checkout master` + #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 puts `git fetch` puts `git checkout -f -b refactor origin/refactor` puts `git checkout refactor` @@ -25,7 +25,7 @@ namespace "fminer" do puts `make ruby` Dir.chdir('../liblast') puts `git checkout Makefile` - #puts `git checkout master` + #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 puts `git fetch` puts `git checkout -f -b refactor origin/refactor` puts `git checkout refactor` @@ -40,7 +40,9 @@ namespace "fminer" do puts `make ruby` Dir.chdir('../../last-utils') puts `git fetch` - puts `git checkout -f -b experimental origin/experimental` + # AM LAST: need branch 'experimental' until merged to master in last-utils + puts `git checkout -f -b experimental origin/experimental` + puts `git checkout experimental` puts `git pull` end @@ -49,7 +51,7 @@ namespace "fminer" do puts `git submodule update` Dir.chdir('libfminer/libbbrc') puts `git checkout Makefile` - #puts `git checkout master` + #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 puts `git fetch` puts `git checkout -f -b refactor origin/refactor` puts `git checkout refactor` @@ -64,7 +66,7 @@ namespace "fminer" do puts `make ruby` Dir.chdir('../liblast') puts `git checkout Makefile` - #puts `git checkout master` + #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 puts `git fetch` puts `git checkout -f -b refactor origin/refactor` puts `git checkout refactor` @@ -79,7 +81,9 @@ namespace "fminer" do puts `make ruby` Dir.chdir('../../last-utils') puts `git fetch` + # AM LAST: need branch 'experimental' until merged to master in last-utils puts `git checkout -f -b experimental origin/experimental` + puts `git checkout experimental` puts `git pull` end end diff --git a/application.rb b/application.rb index 3d511d1..f489a4c 100644 --- a/application.rb +++ b/application.rb @@ -1,4 +1,5 @@ require 'rubygems' +# AM LAST: can include both libs, no problems require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST diff --git a/fminer.rb b/fminer.rb index b706fb0..85b4a53 100644 --- a/fminer.rb +++ b/fminer.rb @@ -246,17 +246,17 @@ post '/fminer/last/?' do xml << result end end - lu = LU.new + lu = LU.new # AM LAST: uses last-utils here dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!) smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations instances.each do |smarts, ids| feat_hash = Hash[*(all_hash.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - @@fminer.GetRegression() ? p_value = @@fminer.KSTest(all_hash.values, feat_hash.values).to_f : p_value = @@fminer.ChisqTest(all_hash.values, feat_hash.values).to_f + @@fminer.GetRegression() ? p_value = @@fminer.KSTest(all_hash.values, feat_hash.values).to_f : p_value = @@fminer.ChisqTest(all_hash.values, feat_hash.values).to_f # AM LAST: use internal function for test tuple = { url_for('/fminer#smarts',:full) => smarts, url_for('/fminer#p_value',:full) => p_value.abs, - url_for('/fminer#effect',:full) => ((p_value>0)?'activating':'deactivating') + url_for('/fminer#effect',:full) => ((p_value>0)?'activating':'deactivating') # AM LAST: sign decides about effect } ids.each do |id| feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] -- cgit v1.2.3 From e6d45f897d23987d03cff75fe958be342b0311a2 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 3 Nov 2010 15:56:33 +0100 Subject: Using master of fminer2 --- Rakefile | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/Rakefile b/Rakefile index 9610315..efadf01 100644 --- a/Rakefile +++ b/Rakefile @@ -9,11 +9,7 @@ namespace "fminer" do puts `git submodule init` puts `git submodule update` Dir.chdir('libfminer/libbbrc') - puts `git checkout Makefile` - #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 - puts `git fetch` - puts `git checkout -f -b refactor origin/refactor` - puts `git checkout refactor` + puts `git checkout master` puts `git pull` puts `./configure` if $? == 0 @@ -24,11 +20,7 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../liblast') - puts `git checkout Makefile` - #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 - puts `git fetch` - puts `git checkout -f -b refactor origin/refactor` - puts `git checkout refactor` + puts `git checkout master` puts `git pull` puts `./configure` if $? == 0 @@ -51,10 +43,6 @@ namespace "fminer" do puts `git submodule update` Dir.chdir('libfminer/libbbrc') puts `git checkout Makefile` - #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 - puts `git fetch` - puts `git checkout -f -b refactor origin/refactor` - puts `git checkout refactor` puts `git pull` puts `./configure` if $? == 0 @@ -66,10 +54,6 @@ namespace "fminer" do puts `make ruby` Dir.chdir('../liblast') puts `git checkout Makefile` - #puts `git checkout master` AM LAST: need branch 'refactor' until merged to master in fminer2 - puts `git fetch` - puts `git checkout -f -b refactor origin/refactor` - puts `git checkout refactor` puts `git pull` puts `./configure` if $? == 0 -- cgit v1.2.3 From e72bba4cdaa6fd68d62b567e21be730a49963207 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 4 Nov 2010 11:15:59 +0100 Subject: fminer with annotations, commit before merging andreas new version --- fminer.rb | 194 +++++++++++++++++++++++++++++++------------------------------- lazar.rb | 143 ++++++++++++++++----------------------------- libfminer | 2 +- smarts.rb | 2 +- 4 files changed, 148 insertions(+), 193 deletions(-) diff --git a/fminer.rb b/fminer.rb index 30d0d9a..b5956c5 100644 --- a/fminer.rb +++ b/fminer.rb @@ -2,159 +2,157 @@ ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' @@fminer = Bbrc::Bbrc.new +@@fminer.SetMinfreq(5) get '/fminer/?' do - owl = OpenTox::OwlSerializer.create 'Algorithm', url_for('/fminer',:full) - owl.annotate 'title',"fminer" - owl.annotate 'creator',"http://github.com/amaunz/fminer2" -# owl.set_data( { -# "parameters" => [ -# { "title" => "Dataset URI", "paramScope" => "mandatory", "paramValue" => "dataset_uri" }, -# { "title" => "Feature URI for dependent variable", "paramScope" => "mandatory", "paramValue" => "feature_uri" } -# ] -# } ) - -# owl.parameters = { -# "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri" }, -# "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" } -# } - rdf = owl.rdf - #File.open('public/fminer.owl', 'w') {|f| f.print rdf} + + metadata = { + DC.title => 'fminer', + DC.identifier => url_for("",:full), + DC.creator => "andreas@maunz.de, helma@in-silico.ch", + DC.contributor => "vorgrimmlerdavid@gmx.de", + OT.isA => OTA.PatternMiningSupervised + } + + parameters = [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" } + ] + + s = OpenTox::Serializer::Owl.new + s.add_algorithm(url_for('/fminer',:full),metadata,parameters) response['Content-Type'] = 'application/rdf+xml' - rdf + s.to_rdfxml + end post '/fminer/?' do halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? - LOGGER.debug "Dataset: " + params[:dataset_uri] - LOGGER.debug "Endpoint: " + params[:feature_uri] - feature_uri = params[:feature_uri] - begin - LOGGER.debug "Retrieving #{params[:dataset_uri]}" - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" - rescue - LOGGER.error "Dataset #{params[:dataset_uri]} not found" - halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil? - end - halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) + halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + prediction_feature = params[:prediction_feature] - task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer',:full)) do + training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" + training_dataset.load_all + halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) + + task_uri = OpenTox::Task.as_task("Mining BBRC features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new - title = "BBRC representatives for " + training_dataset.title - feature_dataset.title = title - feature_dataset.creator = url_for('/fminer',:full) - bbrc_uri = url_for("/fminer#BBRC_representative",:full) - feature_dataset.features << bbrc_uri + feature_dataset.add_metadata({ + DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title], + DC.creator => url_for('/fminer',:full), + OT.hasSource => url_for('/fminer', :full), + }) + feature_dataset.add_parameters({ + "dataset_uri" => params[:dataset_uri], + "prediction_feature" => params[:prediction_feature] + }) + feature_dataset.save id = 1 # fminer start id is not 0 compounds = [] - + nr_active=0 + nr_inactive=0 g_hash = Hash.new# DV: for effect calculation in regression part + @@fminer.Reset - #@@fminer.SetChisqSig(0.99) - LOGGER.debug "Fminer: initialising ..." - training_dataset.data.each do |c,features| + training_dataset.data_entries.each do |compound,entry| begin - smiles = OpenTox::Compound.new(:uri => c.to_s).smiles + smiles = OpenTox::Compound.new(compound.to_s).smiles rescue - LOGGER.warn "No resource for #{c.to_s}" + LOGGER.warn "No resource for #{compound.to_s}" next end if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{c.to_s}." - else - feature_dataset.compounds << c.to_s - features.each do |feature| - act = feature[feature_uri] - if act.nil? - LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}." + LOGGER.warn "Cannot find smiles for #{compound.to_s}." + next + end + entry.each do |feature,values| + values.each do |value| + if value.nil? + LOGGER.warn "No #{feature} activiity for #{compound.to_s}." else - case act.to_s + case value.to_s when "true" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s + nr_active += 1 activity = 1 when "false" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s + nr_inactive += 1 activity = 0 else - # AM: add quantitative activity - activity = act.to_f + activity = value.to_f @@fminer.SetRegression(true) end - compounds[id] = c.to_s begin @@fminer.AddCompound(smiles,id) @@fminer.AddActivity(activity, id) g_hash[id]=activity # DV: insert global information + compounds[id] = compound + id += 1 rescue - LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" + LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" end - end - end - id += 1 - end - end - g_array=g_hash.values # DV: calculation of global median for effect calculation - g_median=OpenTox::Utils.median(g_array) - minfreq = (0.02*id).round - @@fminer.SetMinfreq(minfreq) - LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" + end + end + end + end - raise "no compounds" if compounds.size==0 + g_array=g_hash.values # DV: calculation of global median for effect calculation + g_median=OpenTox::Algorithm.median(g_array) + + # TODO read from params + raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 - values = {} + features = Set.new # run @@fminer - LOGGER.debug "Fminer: mining ..." (0 .. @@fminer.GetNoRootNodes()-1).each do |j| + results = @@fminer.MineRoot(j) results.each do |result| f = YAML.load(result)[0] smarts = f[0] p_value = f[1] - # AM: f[3] missing on regression + if (!@@fminer.GetRegression) ids = f[2] + f[3] - if f[2].size > f[3].size + if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) effect = 'activating' else effect = 'deactivating' end else #regression part ids = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - f_arr.push(g_hash[id]) - end - f_median=OpenTox::Utils.median(f_arr) - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end - - tuple = { - url_for('/fminer#smarts',:full) => smarts, - url_for('/fminer#p_value',:full) => p_value.to_f, - url_for('/fminer#effect',:full) => effect - } - #LOGGER.debug "#{f[0]}\t#{f[1]}\t#{effect}" - ids.each do |id| - feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] - feature_dataset.data[compounds[id]] << {bbrc_uri => tuple} - end + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + f_arr.push(g_hash[id]) + end + f_median=OpenTox::Algorithm.median(f_arr) + if g_median >= f_median + effect = 'activating' + else + effect = 'deactivating' + end + end + + feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s + unless features.include? smarts + features << smarts + # TODO insert correct ontology entries + metadata = { + OT.hasSource => feature_dataset.uri, + OT.smarts => smarts, + OT.p_value => p_value.to_f, + OT.effect => effect } + feature_dataset.add_feature feature_uri, metadata + end + ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end - - uri = feature_dataset.save - LOGGER.debug "Fminer finished, dataset #{uri} created." - uri + feature_dataset.save + feature_dataset.uri end - LOGGER.debug "Fimer task started: "+task_uri.to_s response['Content-Type'] = 'text/uri-list' halt 202,task_uri.to_s+"\n" end diff --git a/lazar.rb b/lazar.rb index b9163f4..98e0aa7 100644 --- a/lazar.rb +++ b/lazar.rb @@ -1,55 +1,35 @@ get '/lazar/?' do - uri = url_for('/lazar',:full) - owl = OpenTox::OwlSerializer.create 'Algorithm', uri - owl.annotation_property uri, DC.creator, "helma@in-silico.ch", XSD.string - owl.annotation_property uri, DC.contributor, "andreas@maunz.de", XSD.string - owl.annotation_property uri, DC.title, "lazar", XSD.string - owl.annotation_property uri, DC.source, "http://github.com/helma/opentox-algorithm", XSD.anyUri - owl.object_property uri, OT.parameters, File.join(uri,"dataset_uri"), XSD.anyUri - owl.object_property uri, OT.parameters, File.join(uri,"prediction_feature"), XSD.anyUri - owl.object_property uri, OT.parameters, File.join(uri,"feature_generation_uri"), XSD.anyUri - response['Content-Type'] = 'application/rdf+xml' - owl.rdf -end - -get '/lazar/prediction_feature?' do - uri = url_for('/lazar/prediction_feature',:full) - owl = OpenTox::OwlSerializer.create 'Parameter', uri - owl.annotation_property uri, DC.description, "URI of the feature to be predicted", XSD.string - owl.annotation_property uri, OT.paramScope, "mandatory", XSD.string - response['Content-Type'] = 'application/rdf+xml' - owl.rdf -end -get '/lazar/feature_generation_uri?' do - uri = url_for('/lazar/feature_generation_uri',:full) - owl = OpenTox::OwlSerializer.create 'Parameter', uri - owl.annotation_property uri, DC.description, "URI of the feature_generation_algorithm", XSD.string - owl.annotation_property uri, OT.paramScope, "mandatory", XSD.string + metadata = { + DC.title => 'lazar', + DC.identifier => url_for("",:full), + DC.creator => "helma@in-silico.ch, andreas@maunz.de", + DC.contributor => "vorgrimmlerdavid@gmx.de", + OT.isA => OTA.ClassificationLazySingleTarget + } + + parameters = [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" }, + { DC.description => "URI of feature genration service", OT.paramScope => "mandatory", OT.title => "feature_generation_uri" } + ] + + s = OpenTox::Serializer::Owl.new + s.add_algorithm(url_for('/lazar',:full),metadata,parameters) response['Content-Type'] = 'application/rdf+xml' - owl.rdf -end + s.to_rdfxml -get '/lazar/dataset_uri?' do - uri = url_for('/lazar/dataset_uri',:full) - owl = OpenTox::OwlSerializer.create 'Parameter', uri - owl.annotation_property uri, DC.description, "URI of the training dataset", XSD.string - owl.annotation_property uri, OT.paramScope, "mandatory", XSD.string - response['Content-Type'] = 'application/rdf+xml' - owl.rdf end post '/lazar/?' do # create a model - LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" - LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" - LOGGER.debug "Feature generation: '" + params[:feature_generation_uri].to_s + "'" dataset_uri = "#{params[:dataset_uri]}" begin - training_activities = OpenTox::Dataset.find(dataset_uri) - rescue - halt 404, "Dataset #{dataset_uri} not found" + training_activities = OpenTox::Dataset.new(dataset_uri) + training_activities.load_all + rescue => e + halt 404, "Dataset #{dataset_uri} not found (#{e.inspect})." end halt 404, "No prediction_feature parameter." unless params[:prediction_feature] @@ -61,69 +41,46 @@ post '/lazar/?' do # create a model task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task| # create features - LOGGER.debug "Starting fminer" - params[:feature_uri] = params[:prediction_feature] - fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params) - fminer_task = OpenTox::Task.find(fminer_task_uri) - fminer_task.wait_for_completion - raise "fminer failed" unless fminer_task.completed? - - LOGGER.debug "Fminer finished #{Time.now}" - feature_dataset_uri = fminer_task.resultURI.to_s - training_features = OpenTox::Dataset.find(feature_dataset_uri) + feature_dataset_uri = OpenTox::Algorithm::Fminer.new.run(params).to_s + + training_features = OpenTox::Dataset.new(feature_dataset_uri) + training_features.load_all halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? + lazar = OpenTox::Model::Lazar.new - lazar.trainingDataset = dataset_uri - lazar.feature_dataset_uri = feature_dataset_uri - halt 404, "More than one descriptor type" unless training_features.features.size == 1 - bbrc = training_features.features.first - training_features.data.each do |compound,features| - lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] - features.each do |feature| - tuple = feature[bbrc] - if tuple - smarts =nil; p_value = nil; effect = nil - tuple.each do |k,v| - case k - when /fminer#smarts/ - smarts = v - lazar.features << smarts - lazar.fingerprints[compound] << smarts - when /fminer#p_value/ - p_value = v - when /fminer#effect/ - effect = v - end - end - lazar.p_values[smarts] = p_value - lazar.effects[smarts] = effect - end - end - end - activities = {} - classification = true - training_activities.data.each do |compound,features| + # TODO: dataset method for iterating over data entries + training_features.data_entries.each do |compound,entry| + lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] + entry.keys.each do |feature| + # TODO fix URI + fminer_uri = File.join CONFIG[:services]["opentox-algorithm"], "fminer" + smarts = training_features.features[feature]["#{fminer_uri}#smarts"] + lazar.fingerprints[compound] << smarts + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature]["#{fminer_uri}#p_value"] + lazar.effects[smarts] = training_features.features[feature]["#{fminer_uri}#effect"] + end + end + lazar.activities[compound] = [] unless lazar.activities[compound] - features.each do |feature| - case feature[params[:prediction_feature]].to_s + training_activities.data_entries[compound][params[:prediction_feature]].each do |value| + case value.to_s when "true" lazar.activities[compound] << true when "false" lazar.activities[compound] << false - # AM: handle quantitative activity values of features else - lazar.activities[compound] << feature[params[:prediction_feature]].to_f - classification = false + lazar.activities[compound] << value.to_f + lazar.type = "regression" end end - end - # TODO: insert regression - if classification - lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification" - else - lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression" - end + end + + lazar.metadata[OT.dependentVariables] = params[:prediction_feature] + lazar.metadata[OT.trainingDataset] = dataset_uri + lazar.metadata[OT.featureDataset] = feature_dataset_uri model_uri = lazar.save LOGGER.info model_uri + " created #{Time.now}" diff --git a/libfminer b/libfminer index 5a97d00..e955cc6 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 5a97d006e0ccfc48e53d5f24842a898ec9e912e2 +Subproject commit e955cc6b24d577d7187e5660716ee69d12174a8a diff --git a/smarts.rb b/smarts.rb index 2ea54d2..4ae6949 100644 --- a/smarts.rb +++ b/smarts.rb @@ -1,3 +1,3 @@ get '/match/compound/*/smarts/*/?' do - "#{OpenTox::Compound.new(:inchi => params[:splat][0]).match?(params[:splat][1])}" + "#{OpenTox::Compound.from_inchi(params[:splat][0]).match?(params[:splat][1])}" end -- cgit v1.2.3 From 397e5a2ba1913f1a66cbf85185a82d45ee942e47 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 10 Nov 2010 18:39:12 +0100 Subject: valid algorithm OWL-DL, documentation started --- application.rb | 5 +- fminer.rb | 119 ++++++++++++++++++++++---------- lazar.rb | 211 +++++++++++++++++++++++++-------------------------------- 3 files changed, 178 insertions(+), 157 deletions(-) diff --git a/application.rb b/application.rb index 8e0a573..867cf24 100644 --- a/application.rb +++ b/application.rb @@ -18,7 +18,10 @@ before do LOGGER.debug "Request: " + request.path end +# Get a list of available algorithms +# +# @return [text/uri-list] algorithm URIs get '/?' do response['Content-Type'] = 'text/uri-list' - [ url_for('/lazar', :full), url_for('/fminer', :full) ].join("\n") + "\n" + [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" end diff --git a/fminer.rb b/fminer.rb index 3ba3057..06b4e67 100644 --- a/fminer.rb +++ b/fminer.rb @@ -2,55 +2,85 @@ ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' +# Get list of fminer algorithms +# +# @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do + response['Content-Type'] = 'text/uri-list' + [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" +end - metadata = { - DC.title => 'fminer', - DC.identifier => url_for("",:full), +# Get RDF/XML representation of fminer bbrc algorithm +# +# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm +get "/fminer/bbrc/?" do + response['Content-Type'] = 'application/rdf+xml' + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full)) + algorithm.metadata = { + DC.title => 'fminer backbone refinement class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.PatternMiningSupervised + OT.isA => OTA.PatternMiningSupervised, + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + ] } + algorithm.to_rdfxml +end - parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" } - ] - - s = OpenTox::Serializer::Owl.new - s.add_algorithm(url_for('/fminer',:full),metadata,parameters) - response['Content-Type'] = 'application/rdf+xml' - s.to_rdfxml - +# Get RDF/XML representation of fminer last algorithm +# +# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm +get "/fminer/last/?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full)) + algorithm.metadata = { + DC.title => 'fminer latent structure class representatives', + DC.creator => "andreas@maunz.de, helma@in-silico.ch", + DC.contributor => "vorgrimmlerdavid@gmx.de", + OT.isA => OTA.PatternMiningSupervised, + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + ] + } + algorithm.to_rdfxml end -#post '/fminer/?' do -['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default - post path do +# Run bbrc algorithm on dataset +# +# @param [URI] dataset_uri URI of the training dataset +# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional, Integer] min_frequency minimum frequency (defaults to 5) +# @return [text/uri-list] Task URI +post '/fminer/bbrc/?' do +#['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default + #post path do + # TODO: is this thread safe?? @@fminer = Bbrc::Bbrc.new - @@fminer.SetMinfreq(5) + minfreq = 5 unless minfreq = params[:min_frequency] + @@fminer.SetMinfreq(minfreq) @@fminer.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" - training_dataset.load_all + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task_uri = OpenTox::Task.as_task("Mining BBRC features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ - DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title], + DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc',:full), OT.hasSource => url_for('/fminer/bbrc', :full), - }) - feature_dataset.add_parameters({ - "dataset_uri" => params[:dataset_uri], - "prediction_feature" => params[:prediction_feature] + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] }) feature_dataset.save @@ -63,7 +93,7 @@ end @@fminer.Reset training_dataset.data_entries.each do |compound,entry| begin - smiles = OpenTox::Compound.new(compound.to_s).smiles + smiles = OpenTox::Compound.new(compound.to_s).to_smiles rescue LOGGER.warn "No resource for #{compound.to_s}" next @@ -142,13 +172,19 @@ end feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s unless features.include? smarts features << smarts - # TODO insert correct ontology entries metadata = { - OT.hasSource => feature_dataset.uri, + OT.hasSource => url_for('/fminer/bbrc', :full), + OT.isA => OT.NominalFeature, OT.smarts => smarts, OT.p_value => p_value.to_f, - OT.effect => effect } + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + } feature_dataset.add_feature feature_uri, metadata + #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters end ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end @@ -159,8 +195,13 @@ end response['Content-Type'] = 'text/uri-list' halt 202,task_uri.to_s+"\n" end -end +#end +# Run last algorithm on a dataset +# +# @param [URI] dataset_uri URI of the training dataset +# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @return [text/uri-list] Task URI post '/fminer/last/?' do @@fminer = Last::Last.new @@ -179,13 +220,13 @@ post '/fminer/last/?' do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ - DC.title => "LAST representatives for " + training_dataset.metadata[DC.title], + DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/last',:full), OT.hasSource => url_for('/fminer/last', :full), - }) - feature_dataset.add_parameters({ - "dataset_uri" => params[:dataset_uri], - "prediction_feature" => params[:prediction_feature] + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] }) feature_dataset.save @@ -199,7 +240,7 @@ post '/fminer/last/?' do @@fminer.Reset training_dataset.data_entries.each do |compound,entry| begin - smiles = OpenTox::Compound.new(compound.to_s).smiles + smiles = OpenTox::Compound.new(compound.to_s).to_smiles rescue LOGGER.warn "No resource for #{compound.to_s}" next @@ -272,7 +313,11 @@ post '/fminer/last/?' do OT.hasSource => feature_dataset.uri, OT.smarts => smarts, OT.p_value => p_value.to_f, - OT.effect => effect + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] } feature_dataset.add_feature feature_uri, metadata end diff --git a/lazar.rb b/lazar.rb index 9fbc679..e1b9846 100644 --- a/lazar.rb +++ b/lazar.rb @@ -1,29 +1,35 @@ -get '/lazar/?' do +@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc") - metadata = { +# Get RDF/XML representation of the lazar algorithm +# +# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm +get '/lazar/?' do + response['Content-Type'] = 'application/rdf+xml' + algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full)) + algorithm.metadata = { DC.title => 'lazar', - DC.identifier => url_for("",:full), DC.creator => "helma@in-silico.ch, andreas@maunz.de", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.ClassificationLazySingleTarget + OT.isA => OTA.ClassificationLazySingleTarget, + OT.parameters => [ + { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, + { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, + { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, + { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" } + ] } - - parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" }, - { DC.description => "URI of feature genration service", OT.paramScope => "mandatory", OT.title => "feature_generation_uri" } - ] - - s = OpenTox::Serializer::Owl.new - s.add_algorithm(url_for('/lazar',:full),metadata,parameters) - response['Content-Type'] = 'application/rdf+xml' - s.to_rdfxml - + algorithm.to_rdfxml end -post '/lazar/?' do # create a model +# Create a lazar prediction model +# +# @ return [text/uri-list] Task URI +post '/lazar/?' do - dataset_uri = "#{params[:dataset_uri]}" + LOGGER.debug params.inspect + halt 404, "No dataset_uri parameter." unless params[:dataset_uri] + dataset_uri = params[:dataset_uri] begin training_activities = OpenTox::Dataset.new(dataset_uri) @@ -32,35 +38,77 @@ post '/lazar/?' do # create a model halt 404, "Dataset #{dataset_uri} not found (#{e.inspect})." end - halt 404, "No prediction_feature parameter." unless params[:prediction_feature] - halt 404, "No feature_generation_uri parameter." unless params[:feature_generation_uri] - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature]) + prediction_feature = params[:prediction_feature] + unless prediction_feature # try to read prediction_feature from dataset + halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 + prediction_feature = training_activities.features.keys.first + params[:prediction_feature] = prediction_feature + end + + feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] + + halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+ + training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) response['Content-Type'] = 'text/uri-list' task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task| - # create features - feature_dataset_uri = OpenTox::Algorithm::Fminer.new.run(params).to_s - - training_features = OpenTox::Dataset.new(feature_dataset_uri) + lazar = OpenTox::Model::Lazar.new + + if params[:feature_dataset_uri] + feature_dataset_uri = params[:feature_dataset_uri] + training_features = OpenTox::Dataset.new(feature_dataset_uri) + case training_features.feature_type + when "classification" + lazar.similarity_algorithm = "weighted_tanimoto" + when "regression" + lazar.similarity_algorithm = "weighted_euclid" + end + else # create features + params[:feature_generation_uri] = feature_generation_uri + if feature_generation_uri.match(/fminer/) + lazar.feature_calculation_algorithm = "substructure_match" + else + halt 404, "External feature generation services not yet supported" + end + feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s + training_features = OpenTox::Dataset.new(feature_dataset_uri) + end + training_features.load_all halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? - - lazar = OpenTox::Model::Lazar.new - # TODO: dataset method for iterating over data entries + # sorted features for index lookups + lazar.features = training_features.features.sort if training_features.feature_type == "regression" + training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] entry.keys.each do |feature| - # TODO fix URI - fminer_uri = File.join CONFIG[:services]["opentox-algorithm"], "fminer" - smarts = training_features.features[feature]["#{fminer_uri}#smarts"] - lazar.fingerprints[compound] << smarts - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature]["#{fminer_uri}#p_value"] - lazar.effects[smarts] = training_features.features[feature]["#{fminer_uri}#effect"] + case training_features.feature_type + when "fminer" + # fingerprints are sets + smarts = training_features.features[feature][OT.smarts] + lazar.fingerprints[compound] << smarts + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.p_value] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end + when "classification" + # fingerprints are sets + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP) + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + when "regression" + # fingerprints are arrays + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end end end @@ -73,7 +121,7 @@ post '/lazar/?' do # create a model lazar.activities[compound] << false else lazar.activities[compound] << value.to_f - lazar.type = "regression" + lazar.prediction_type = "regression" end end end @@ -81,88 +129,12 @@ post '/lazar/?' do # create a model lazar.metadata[OT.dependentVariables] = params[:prediction_feature] lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri - - model_uri = lazar.save - LOGGER.info model_uri + " created #{Time.now}" - model_uri - end - halt 202,task_uri -end - - -post '/property_lazar/?' do # create a model - - LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" - LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" - LOGGER.debug "Feature dataset: '" + params[:feature_dataset_uri].to_s + "'" - dataset_uri = "#{params[:dataset_uri]}" - - begin - training_activities = OpenTox::Dataset.find(dataset_uri) - rescue - halt 404, "Dataset #{dataset_uri} not found" - end - - halt 404, "No prediction_feature parameter." unless params[:prediction_feature] - halt 404, "No feature_dataset_uri parameter." unless params[:feature_dataset_uri] - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature]) - - response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/property_lazar',:full)) do |task| - - # create features - #LOGGER.debug "Starting fminer" - #params[:feature_uri] = params[:prediction_feature] - #fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params) - #fminer_task = OpenTox::Task.find(fminer_task_uri) - #fminer_task.wait_for_completion - #raise "fminer failed" unless fminer_task.completed? - - #LOGGER.debug "Fminer finished #{Time.now}" - feature_dataset_uri = params[:feature_dataset_uri] - training_features = OpenTox::Dataset.find(feature_dataset_uri) - halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? - lazar = OpenTox::Model::PropertyLazar.new - lazar.trainingDataset = dataset_uri - lazar.feature_dataset_uri = feature_dataset_uri - #halt 404, "More than one descriptor type" unless training_features.features.size == 1 - lazar.features = training_features.features - training_features.data.each do |compound,features| - lazar.properties[compound] = {} unless lazar.properties[compound] - LOGGER.debug features.inspect - if features - features.each do |f| - f.each do |name,value| - #lazar.features.each do |feature| - lazar.properties[compound][name] = value - #lazar.properties[compound] = features - end - end - end - end - activities = {} - classification = true - training_activities.data.each do |compound,features| - lazar.activities[compound] = [] unless lazar.activities[compound] - features.each do |feature| - case feature[params[:prediction_feature]].to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - else - lazar.activities[compound] << feature[params[:prediction_feature]].to_f - classification = false - end - end - end - if classification - lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification" - else - lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression" - end + lazar.parameters = { + "dataset_uri" => dataset_uri, + "prediction_feature" => prediction_feature, + "feature_generation_uri" => feature_generation_uri + } model_uri = lazar.save LOGGER.info model_uri + " created #{Time.now}" @@ -170,3 +142,4 @@ post '/property_lazar/?' do # create a model end halt 202,task_uri end + -- cgit v1.2.3 From 578f57d48df70dc677ceb65720831885c3af99f2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 19 Nov 2010 14:42:29 +0100 Subject: lazar predictions and toxcreate are working --- fminer.rb | 49 ++++++++++++++++++++++++++------------- lazar.rb | 79 +++++++++++++++++++++++++++++++++------------------------------ 2 files changed, 74 insertions(+), 54 deletions(-) diff --git a/fminer.rb b/fminer.rb index 06b4e67..3e39eda 100644 --- a/fminer.rb +++ b/fminer.rb @@ -11,7 +11,6 @@ get '/fminer/?' do end # Get RDF/XML representation of fminer bbrc algorithm -# # @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm get "/fminer/bbrc/?" do response['Content-Type'] = 'application/rdf+xml' @@ -23,14 +22,17 @@ get "/fminer/bbrc/?" do OT.isA => OTA.PatternMiningSupervised, OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, + { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, ] } algorithm.to_rdfxml end # Get RDF/XML representation of fminer last algorithm -# # @return [application/rdf+xml] OWL-DL representation of fminer last algorithm get "/fminer/last/?" do algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full)) @@ -41,7 +43,10 @@ get "/fminer/last/?" do OT.isA => OTA.PatternMiningSupervised, OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, ] } algorithm.to_rdfxml @@ -49,18 +54,23 @@ end # Run bbrc algorithm on dataset # -# @param [URI] dataset_uri URI of the training dataset -# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) -# @param [optional, Integer] min_frequency minimum frequency (defaults to 5) +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] parameters BBRC parameters, accepted parameters are +# - minfreq Minimum frequency (default 5) +# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") +# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") +# - min_chisq_significance Significance threshold (between 0 and 1) # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do -#['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default - #post path do # TODO: is this thread safe?? @@fminer = Bbrc::Bbrc.new minfreq = 5 unless minfreq = params[:min_frequency] @@fminer.SetMinfreq(minfreq) + @@fminer.SetType(1) if params[:feature_type] == "paths" + @@fminer.SetBackbone(params[:backbone]) if params[:backbone] + @@fminer.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] @@fminer.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? @@ -70,7 +80,7 @@ post '/fminer/bbrc/?' do training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) - task_uri = OpenTox::Task.as_task("Mining BBRC features", url_for('/fminer',:full)) do + task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ @@ -193,19 +203,26 @@ post '/fminer/bbrc/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - halt 202,task_uri.to_s+"\n" + halt 202,task.uri.to_s+"\n" end #end # Run last algorithm on a dataset # -# @param [URI] dataset_uri URI of the training dataset -# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] parameters LAST parameters, accepted parameters are +# - minfreq Minimum frequency (default 5) +# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") +# - hops Maximum number of hops # @return [text/uri-list] Task URI post '/fminer/last/?' do @@fminer = Last::Last.new - @@fminer.SetMinfreq(5) + minfreq = 5 unless minfreq = params[:min_frequency] + @@fminer.SetMinfreq(minfreq) + @@fminer.SetType(1) if params[:feature_type] == "paths" + @@fminer.SetMaxHops(params[:hops]) if params[:hops] @@fminer.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? @@ -216,7 +233,7 @@ post '/fminer/last/?' do training_dataset.load_all halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) - task_uri = OpenTox::Task.as_task("Mining LAST features", url_for('/fminer',:full)) do + task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ @@ -327,5 +344,5 @@ post '/fminer/last/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - halt 202,task_uri.to_s+"\n" + halt 202,task.uri.to_s+"\n" end diff --git a/lazar.rb b/lazar.rb index e1b9846..c5a9259 100644 --- a/lazar.rb +++ b/lazar.rb @@ -1,7 +1,6 @@ @@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc") # Get RDF/XML representation of the lazar algorithm -# # @return [application/rdf+xml] OWL-DL representation of the lazar algorithm get '/lazar/?' do response['Content-Type'] = 'application/rdf+xml' @@ -23,20 +22,18 @@ get '/lazar/?' do end # Create a lazar prediction model -# +# @param [String] dataset_uri Training dataset URI +# @param [optional,String] prediction_feature URI of the feature to be predicted +# @param [optional,String] feature_generation_uri URI of the feature generation algorithm +# @param [optional] - further parameters for the feature generation service # @ return [text/uri-list] Task URI post '/lazar/?' do - LOGGER.debug params.inspect halt 404, "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] - begin - training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all - rescue => e - halt 404, "Dataset #{dataset_uri} not found (#{e.inspect})." - end + halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) + training_activities.load_all prediction_feature = params[:prediction_feature] unless prediction_feature # try to read prediction_feature from dataset @@ -51,23 +48,24 @@ post '/lazar/?' do training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task| + task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new + lazar.min_sim = params[:min_sim] if params[:min_sim] if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) case training_features.feature_type when "classification" - lazar.similarity_algorithm = "weighted_tanimoto" + lazar.similarity_algorithm = "Similarity.tanimoto" when "regression" - lazar.similarity_algorithm = "weighted_euclid" + lazar.similarity_algorithm = "Similarity.euclid" end else # create features params[:feature_generation_uri] = feature_generation_uri if feature_generation_uri.match(/fminer/) - lazar.feature_calculation_algorithm = "substructure_match" + lazar.feature_calculation_algorithm = "Substructure.match" else halt 404, "External feature generation services not yet supported" end @@ -82,11 +80,9 @@ post '/lazar/?' do lazar.features = training_features.features.sort if training_features.feature_type == "regression" training_features.data_entries.each do |compound,entry| - lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] + lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] entry.keys.each do |feature| - case training_features.feature_type - when "fminer" - # fingerprints are sets + if feature_generation_uri.match(/fminer/) smarts = training_features.features[feature][OT.smarts] lazar.fingerprints[compound] << smarts unless lazar.features.include? smarts @@ -94,20 +90,23 @@ post '/lazar/?' do lazar.p_values[smarts] = training_features.features[feature][OT.p_value] lazar.effects[smarts] = training_features.features[feature][OT.effect] end - when "classification" - # fingerprints are sets - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP) - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - when "regression" - # fingerprints are arrays - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + else + case training_features.feature_type + when "classification" + # fingerprints are sets + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + when "regression" + # fingerprints are arrays + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end end end end @@ -120,26 +119,30 @@ post '/lazar/?' do when "false" lazar.activities[compound] << false else + halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0 lazar.activities[compound] << value.to_f - lazar.prediction_type = "regression" + lazar.prediction_algorithm = "Neighbors.local_svm_regression" end end end + lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}" + # TODO: fix dependentVariable lazar.metadata[OT.dependentVariables] = params[:prediction_feature] lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri + lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget - lazar.parameters = { - "dataset_uri" => dataset_uri, - "prediction_feature" => prediction_feature, - "feature_generation_uri" => feature_generation_uri - } + lazar.metadata[OT.parameters] = [ + {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, + {DC.title => "prediction_feature", OT.paramValue => prediction_feature}, + {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} + ] model_uri = lazar.save LOGGER.info model_uri + " created #{Time.now}" model_uri end - halt 202,task_uri + halt 202,task.uri end -- cgit v1.2.3 From 89398b94b7dd8bd99641af62844449c3030e3d7d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 19 Nov 2010 16:49:53 +0100 Subject: @@bbrc and @@last instead of @@fminer to avoid instance conflicts --- fminer.rb | 57 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/fminer.rb b/fminer.rb index 3e39eda..76d52e9 100644 --- a/fminer.rb +++ b/fminer.rb @@ -2,6 +2,9 @@ ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' +@@bbrc = Bbrc::Bbrc.new +@@last = Last::Last.new + # Get list of fminer algorithms # # @return [text/uri-list] URIs of fminer algorithms @@ -65,13 +68,13 @@ end post '/fminer/bbrc/?' do # TODO: is this thread safe?? - @@fminer = Bbrc::Bbrc.new + #@@bbrc = Bbrc::Bbrc.new minfreq = 5 unless minfreq = params[:min_frequency] - @@fminer.SetMinfreq(minfreq) - @@fminer.SetType(1) if params[:feature_type] == "paths" - @@fminer.SetBackbone(params[:backbone]) if params[:backbone] - @@fminer.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] - @@fminer.SetConsoleOut(false) + @@bbrc.SetMinfreq(minfreq) + @@bbrc.SetType(1) if params[:feature_type] == "paths" + @@bbrc.SetBackbone(params[:backbone]) if params[:backbone] + @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] + @@bbrc.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? @@ -100,7 +103,7 @@ post '/fminer/bbrc/?' do nr_inactive=0 all_activities = Hash.new# DV: for effect calculation in regression part - @@fminer.Reset + @@bbrc.Reset training_dataset.data_entries.each do |compound,entry| begin smiles = OpenTox::Compound.new(compound.to_s).to_smiles @@ -126,11 +129,11 @@ post '/fminer/bbrc/?' do activity = 0 else activity = value.to_f - @@fminer.SetRegression(true) + @@bbrc.SetRegression(true) end begin - @@fminer.AddCompound(smiles,id) - @@fminer.AddActivity(activity, id) + @@bbrc.AddCompound(smiles,id) + @@bbrc.AddActivity(activity, id) all_activities[id]=activity # DV: insert global information compounds[id] = compound id += 1 @@ -148,16 +151,16 @@ post '/fminer/bbrc/?' do raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 features = Set.new - # run @@fminer - (0 .. @@fminer.GetNoRootNodes()-1).each do |j| + # run @@bbrc + (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| - results = @@fminer.MineRoot(j) + results = @@bbrc.MineRoot(j) results.each do |result| f = YAML.load(result)[0] smarts = f[0] p_value = f[1] - if (!@@fminer.GetRegression) + if (!@@bbrc.GetRegression) ids = f[2] + f[3] if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) effect = 'activating' @@ -218,12 +221,12 @@ post '/fminer/bbrc/?' do # @return [text/uri-list] Task URI post '/fminer/last/?' do - @@fminer = Last::Last.new + #@@last = Last::Last.new minfreq = 5 unless minfreq = params[:min_frequency] - @@fminer.SetMinfreq(minfreq) - @@fminer.SetType(1) if params[:feature_type] == "paths" - @@fminer.SetMaxHops(params[:hops]) if params[:hops] - @@fminer.SetConsoleOut(false) + @@last.SetMinfreq(minfreq) + @@last.SetType(1) if params[:feature_type] == "paths" + @@last.SetMaxHops(params[:hops]) if params[:hops] + @@last.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? @@ -254,7 +257,7 @@ post '/fminer/last/?' do nr_inactive=0 all_activities = Hash.new# DV: for effect calculation in regression part - @@fminer.Reset + @@last.Reset training_dataset.data_entries.each do |compound,entry| begin smiles = OpenTox::Compound.new(compound.to_s).to_smiles @@ -280,11 +283,11 @@ post '/fminer/last/?' do activity = 0 else activity = value.to_f - @@fminer.SetRegression(true) + @@last.SetRegression(true) end begin - @@fminer.AddCompound(smiles,id) - @@fminer.AddActivity(activity, id) + @@last.AddCompound(smiles,id) + @@last.AddActivity(activity, id) all_activities[id]=activity # DV: insert global information compounds[id] = compound smi[id] = smiles # AM LAST: changed this to store SMILES. @@ -302,12 +305,12 @@ post '/fminer/last/?' do raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 - # run @@fminer + # run @@last features = Set.new xml = "" - (0 .. @@fminer.GetNoRootNodes()-1).each do |j| - results = @@fminer.MineRoot(j) + (0 .. @@last.GetNoRootNodes()-1).each do |j| + results = @@last.MineRoot(j) results.each do |result| xml << result end @@ -319,7 +322,7 @@ post '/fminer/last/?' do instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations instances.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - @@fminer.GetRegression() ? p_value = @@fminer.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@fminer.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test effect = (p_value > 0) ? "activating" : "deactivating" -- cgit v1.2.3 From 3583682a8c9921aea86bd53ed80153ae1d5fb02d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 22 Nov 2010 18:03:57 +0100 Subject: OT.p_value changed to OT.pValue --- fminer.rb | 7 ++++--- lazar.rb | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fminer.rb b/fminer.rb index 76d52e9..0f06584 100644 --- a/fminer.rb +++ b/fminer.rb @@ -187,9 +187,9 @@ post '/fminer/bbrc/?' do features << smarts metadata = { OT.hasSource => url_for('/fminer/bbrc', :full), - OT.isA => OT.NominalFeature, + OT.isA => OT.Substructure, OT.smarts => smarts, - OT.p_value => p_value.to_f, + OT.pValue => p_value.to_f, OT.effect => effect, OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, @@ -330,9 +330,10 @@ post '/fminer/last/?' do unless features.include? smarts features << smarts metadata = { + OT.isA => OT.Substructure, OT.hasSource => feature_dataset.uri, OT.smarts => smarts, - OT.p_value => p_value.to_f, + OT.pValue => p_value.to_f, OT.effect => effect, OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, diff --git a/lazar.rb b/lazar.rb index c5a9259..ee83da3 100644 --- a/lazar.rb +++ b/lazar.rb @@ -87,7 +87,7 @@ post '/lazar/?' do lazar.fingerprints[compound] << smarts unless lazar.features.include? smarts lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature][OT.p_value] + lazar.p_values[smarts] = training_features.features[feature][OT.pValue] lazar.effects[smarts] = training_features.features[feature][OT.effect] end else -- cgit v1.2.3 From 524b59bca0bebde1efdf0ad4a92528ccd51f81f1 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 24 Nov 2010 11:48:32 +0100 Subject: status 503 for rejected tasks --- fminer.rb | 2 ++ lazar.rb | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 0f06584..06d66fb 100644 --- a/fminer.rb +++ b/fminer.rb @@ -206,6 +206,7 @@ post '/fminer/bbrc/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end #end @@ -348,5 +349,6 @@ post '/fminer/last/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end diff --git a/lazar.rb b/lazar.rb index ee83da3..dffe47b 100644 --- a/lazar.rb +++ b/lazar.rb @@ -47,7 +47,6 @@ post '/lazar/?' do halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) - response['Content-Type'] = 'text/uri-list' task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new @@ -143,6 +142,8 @@ post '/lazar/?' do LOGGER.info model_uri + " created #{Time.now}" model_uri end + response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" halt 202,task.uri end -- cgit v1.2.3 From 0423e0182ac4998c2b3542f9ccc678daa2bd776c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 24 Nov 2010 13:10:52 +0100 Subject: opentox-ruby-api-wrapper renamed to opentox-ruby --- application.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application.rb b/application.rb index 867cf24..15088d5 100644 --- a/application.rb +++ b/application.rb @@ -3,8 +3,8 @@ require 'rubygems' require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST -gem "opentox-ruby-api-wrapper", "= 1.6.6" -require 'opentox-ruby-api-wrapper' +gem "opentox-ruby", "~> 0" +require 'opentox-ruby' #require 'smarts.rb' #require 'similarity.rb' -- cgit v1.2.3 From 6cc34db9dfed2d49286d9c2a26fc6563c66b38a0 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 24 Nov 2010 14:43:20 +0100 Subject: opentox-ruby gem in config.ru --- Rakefile | 5 ++--- config.ru | 3 +-- last-utils | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Rakefile b/Rakefile index efadf01..d52f60d 100644 --- a/Rakefile +++ b/Rakefile @@ -1,7 +1,6 @@ require 'rubygems' require 'rake' -require 'opentox-ruby-api-wrapper' -#require 'tasks/opentox' +require 'opentox-ruby' namespace "fminer" do desc "Install required gems and fminer" @@ -40,7 +39,7 @@ namespace "fminer" do desc "Update gems and fminer" task :update do - puts `git submodule update` + puts `git submodule update --init` Dir.chdir('libfminer/libbbrc') puts `git checkout Makefile` puts `git pull` diff --git a/config.ru b/config.ru index 489932f..67d8493 100644 --- a/config.ru +++ b/config.ru @@ -1,5 +1,4 @@ require 'rubygems' -require 'opentox-ruby-api-wrapper' +require 'opentox-ruby' require 'config/config_ru' run Sinatra::Application - diff --git a/last-utils b/last-utils index d2ad4f2..9bba490 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit d2ad4f2bb82fdb5433d3f739400244ba89f07860 +Subproject commit 9bba490c02a5fea57d65b61e7f68e88ed72893c6 -- cgit v1.2.3 From e8b3984c34a193c0106d8e787c1bef0d3245cd43 Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 2 Dec 2010 11:31:47 +0100 Subject: merge with helma/development --- .gitmodules | 3 + Rakefile | 101 ++++++++---- application.rb | 12 +- balancer.rb | 98 ++++++++++++ config.ru | 3 +- fminer.rb | 492 ++++++++++++++++++++++++++++++++++++++++----------------- last-utils | 1 + lazar.rb | 281 ++++++++++++++------------------ libfminer | 2 +- smarts.rb | 2 +- 10 files changed, 646 insertions(+), 349 deletions(-) create mode 100644 balancer.rb create mode 160000 last-utils diff --git a/.gitmodules b/.gitmodules index 3330d61..75218e9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "libfminer"] path = libfminer url = http://github.com/amaunz/fminer2.git +[submodule "last-utils"] + path = last-utils + url = git://github.com/amaunz/last-utils.git diff --git a/Rakefile b/Rakefile index 70139f4..d52f60d 100644 --- a/Rakefile +++ b/Rakefile @@ -1,44 +1,77 @@ require 'rubygems' require 'rake' -require 'opentox-ruby-api-wrapper' -#require 'tasks/opentox' +require 'opentox-ruby' namespace "fminer" do - desc "Install required gems and fminer" - task :install do - puts `git submodule init` - puts `git submodule update` - Dir.chdir('libfminer/libbbrc') - puts `git checkout master` - puts `git pull` - puts `./configure` - if $? == 0 - puts `echo "Fminer successfully configured."` - else - puts `echo "Fminer configuration failed!"` - exit - end - puts `make ruby` - end + desc "Install required gems and fminer" + task :install do + puts `git submodule init` + puts `git submodule update` + Dir.chdir('libfminer/libbbrc') + puts `git checkout master` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibBbrc successfully configured."` + else + puts `echo "Fminer/LibBbrc configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../liblast') + puts `git checkout master` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibLast successfully configured."` + else + puts `echo "Fminer/LibLast configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../../last-utils') + puts `git fetch` + # AM LAST: need branch 'experimental' until merged to master in last-utils + puts `git checkout -f -b experimental origin/experimental` + puts `git checkout experimental` + puts `git pull` + end - desc "Update gems and fminer" - task :update do - puts `git submodule update` - Dir.chdir('libfminer/libbbrc') - puts `git checkout master` - puts `git pull` - puts `./configure` - if $? == 0 - puts `echo "Fminer successfully configured."` - else - puts `echo "Fminer configuration failed!"` - exit - end - puts `make ruby` - end + desc "Update gems and fminer" + task :update do + puts `git submodule update --init` + Dir.chdir('libfminer/libbbrc') + puts `git checkout Makefile` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibBbrc successfully configured."` + else + puts `echo "Fminer/LibBbrc configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../liblast') + puts `git checkout Makefile` + puts `git pull` + puts `./configure` + if $? == 0 + puts `echo "Fminer/LibLast successfully configured."` + else + puts `echo "Fminer/LibLast configuration failed!"` + exit + end + puts `make ruby` + Dir.chdir('../../last-utils') + puts `git fetch` + # AM LAST: need branch 'experimental' until merged to master in last-utils + puts `git checkout -f -b experimental origin/experimental` + puts `git checkout experimental` + puts `git pull` + end end desc "Run tests" task :test do - load 'test/test.rb' + load 'test/test.rb' end diff --git a/application.rb b/application.rb index c0363d5..15088d5 100644 --- a/application.rb +++ b/application.rb @@ -1,7 +1,10 @@ require 'rubygems' +# AM LAST: can include both libs, no problems require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems -gem "opentox-ruby-api-wrapper", "= 1.6.5" -require 'opentox-ruby-api-wrapper' +require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems +require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST +gem "opentox-ruby", "~> 0" +require 'opentox-ruby' #require 'smarts.rb' #require 'similarity.rb' @@ -15,7 +18,10 @@ before do LOGGER.debug "Request: " + request.path end +# Get a list of available algorithms +# +# @return [text/uri-list] algorithm URIs get '/?' do response['Content-Type'] = 'text/uri-list' - [ url_for('/lazar', :full), url_for('/fminer', :full) ].join("\n") + "\n" + [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" end diff --git a/balancer.rb b/balancer.rb new file mode 100644 index 0000000..4ed2fd7 --- /dev/null +++ b/balancer.rb @@ -0,0 +1,98 @@ +# cuts a classification dataset into balanced pieces +# let inact_act_ratio := majority_class.size/minority_class.size +# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5 +# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds. + +class Balancer + + attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets + + # Supply a OpenTox::Dataset here + # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given + def initialize(dataset, feature_uri, creator_url) + @act_arr = [] + @inact_arr = [] + @inact_act_ratio = 1.0/0 # trick to define +infinity + @nr_majority_splits = 1 # +/-1 means: no split + @split = [] # splitted arrays with ids + @datasets = [] # result datasets + @errors = [] + + classification = true + if dataset.features.include?(feature_uri) + dataset.data.each do |i,a| + inchi = i + acts = a + acts.each do |act| + value = act[feature_uri] + if OpenTox::Utils.is_true?(value) + @act_arr << inchi + elsif OpenTox::Utils.classification?(value) + @inact_arr << inchi + else + classification = false + break; + end + end + end + @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression + set_nr_majority_splits + # perform majority split + @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1 + @split.each do |s| + new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr) + @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url) + end + + else + errors << "Feature not present in dataset." + end + errors << "Can not split regression dataset." unless classification + end + + + + # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values. + def set_nr_majority_splits + @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression + end + + # does the actual shuffle and split + def shuffle_split (arr) + arr = arr.shuffle + arr.chunk(@nr_majority_splits.abs) + end + + # turns a hash into a 2 col csv + def hsh2csv (hsh) + res="" + hsh.each do |k,v| + arr = [v,(@nr_majority_splits > 0 ? 0 : 1)] + res += arr.join(", ") + "\n" + end + res + end + +end + +class Array + + # cuts an array into chunks - returns a two-dimensional array + def chunk(pieces) + q, r = length.divmod(pieces) + (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \ + .map { |a, b| slice(a...b) } + end + + # shuffles the elements of an array + def shuffle( seed=nil ) + srand seed.to_i if seed + sort_by { Kernel.rand } + end + + # shuffels self + def shuffle!( seed=nil ) + self.replace shuffle( seed ) + end + +end diff --git a/config.ru b/config.ru index 489932f..67d8493 100644 --- a/config.ru +++ b/config.ru @@ -1,5 +1,4 @@ require 'rubygems' -require 'opentox-ruby-api-wrapper' +require 'opentox-ruby' require 'config/config_ru' run Sinatra::Application - diff --git a/fminer.rb b/fminer.rb index 867ea0f..e647600 100644 --- a/fminer.rb +++ b/fminer.rb @@ -1,156 +1,356 @@ ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' -@@fminer = Bbrc::Bbrc.new +@@bbrc = Bbrc::Bbrc.new +@@last = Last::Last.new + +# Get list of fminer algorithms +# +# @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do - owl = OpenTox::Owl.create 'Algorithm', url_for('/fminer',:full) - owl.set 'title',"fminer" - owl.set 'creator',"http://github.com/amaunz/fminer2" - owl.parameters = { - "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri" }, - "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" } - } - rdf = owl.rdf - File.open('public/fminer.owl', 'w') {|f| f.print rdf} + response['Content-Type'] = 'text/uri-list' + [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" +end + +# Get RDF/XML representation of fminer bbrc algorithm +# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm +get "/fminer/bbrc/?" do response['Content-Type'] = 'application/rdf+xml' - rdf + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full)) + algorithm.metadata = { + DC.title => 'fminer backbone refinement class representatives', + DC.creator => "andreas@maunz.de, helma@in-silico.ch", + DC.contributor => "vorgrimmlerdavid@gmx.de", + OT.isA => OTA.PatternMiningSupervised, + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, + { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, + ] + } + algorithm.to_rdfxml +end + +# Get RDF/XML representation of fminer last algorithm +# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm +get "/fminer/last/?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full)) + algorithm.metadata = { + DC.title => 'fminer latent structure class representatives', + DC.creator => "andreas@maunz.de, helma@in-silico.ch", + DC.contributor => "vorgrimmlerdavid@gmx.de", + OT.isA => OTA.PatternMiningSupervised, + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, + ] + } + algorithm.to_rdfxml end -post '/fminer/?' do +# Run bbrc algorithm on dataset +# +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] parameters BBRC parameters, accepted parameters are +# - minfreq Minimum frequency (default 5) +# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") +# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") +# - min_chisq_significance Significance threshold (between 0 and 1) +# @return [text/uri-list] Task URI +post '/fminer/bbrc/?' do - halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil? - LOGGER.debug "Dataset: " + params[:dataset_uri] - LOGGER.debug "Endpoint: " + params[:feature_uri] - feature_uri = params[:feature_uri] - begin - LOGGER.debug "Retrieving #{params[:dataset_uri]}" - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" - rescue - LOGGER.error "Dataset #{params[:dataset_uri]} not found" - halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil? - end - halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri]) - - task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer',:full)) do - - feature_dataset = OpenTox::Dataset.new - title = "BBRC representatives for " + training_dataset.title - feature_dataset.title = title - feature_dataset.creator = url_for('/fminer',:full) - feature_dataset.token_id = params[:token_id] if params[:token_id] - feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] - - bbrc_uri = url_for("/fminer#BBRC_representative",:full) - feature_dataset.features << bbrc_uri - - id = 1 # fminer start id is not 0 - compounds = [] - - g_hash = Hash.new# DV: for effect calculation in regression part - @@fminer.Reset - #@@fminer.SetChisqSig(0.99) - LOGGER.debug "Fminer: initialising ..." - training_dataset.data.each do |c,features| - begin - smiles = OpenTox::Compound.new(:uri => c.to_s).smiles - rescue - LOGGER.warn "No resource for #{c.to_s}" - next - end - if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{c.to_s}." - else - feature_dataset.compounds << c.to_s - features.each do |feature| - act = feature[feature_uri] - if act.nil? - LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}." - else - case act.to_s - when "true" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s - activity = 1 - when "false" - #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s - activity = 0 - else - # AM: add quantitative activity - activity = act.to_f - @@fminer.SetRegression(true) - end - compounds[id] = c.to_s - begin - @@fminer.AddCompound(smiles,id) - @@fminer.AddActivity(activity, id) - g_hash[id]=activity # DV: insert global information - rescue - LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer" - end - end - end - id += 1 - end - end - g_array=g_hash.values # DV: calculation of global median for effect calculation - g_median=OpenTox::Utils.median(g_array) - minfreq = (0.02*id).round - @@fminer.SetMinfreq(minfreq) - LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}" - - raise "no compounds" if compounds.size==0 - - values = {} - # run @@fminer - LOGGER.debug "Fminer: mining ..." - (0 .. @@fminer.GetNoRootNodes()-1).each do |j| - results = @@fminer.MineRoot(j) - results.each do |result| - f = YAML.load(result)[0] - smarts = f[0] - p_value = f[1] - # AM: f[3] missing on regression - if (!@@fminer.GetRegression) - ids = f[2] + f[3] - if f[2].size > f[3].size - effect = 'activating' - else - effect = 'deactivating' - end - else #regression part - ids = f[2] - # DV: effect calculation - f_arr=Array.new - f[2].each do |id| - f_arr.push(g_hash[id]) - end - f_median=OpenTox::Utils.median(f_arr) - if g_median >= f_median - effect = 'activating' - else - effect = 'deactivating' - end - end - - tuple = { - url_for('/fminer#smarts',:full) => smarts, - url_for('/fminer#p_value',:full) => p_value.to_f, - url_for('/fminer#effect',:full) => effect - } - #LOGGER.debug "#{f[0]}\t#{f[1]}\t#{effect}" - ids.each do |id| - feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]] - feature_dataset.data[compounds[id]] << {bbrc_uri => tuple} - end - end - end - - uri = feature_dataset.save - LOGGER.debug "Fminer finished, dataset #{uri} created." - uri - end - LOGGER.debug "Fimer task started: "+task_uri.to_s - response['Content-Type'] = 'text/uri-list' - halt 202,task_uri.to_s+"\n" + # TODO: is this thread safe?? + #@@bbrc = Bbrc::Bbrc.new + minfreq = 5 unless minfreq = params[:min_frequency] + @@bbrc.SetMinfreq(minfreq) + @@bbrc.SetType(1) if params[:feature_type] == "paths" + @@bbrc.SetBackbone(params[:backbone]) if params[:backbone] + @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] + @@bbrc.SetConsoleOut(false) + + halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + prediction_feature = params[:prediction_feature] + + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) + + task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do + + feature_dataset = OpenTox::Dataset.new + feature_dataset.add_metadata({ + DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, + DC.creator => url_for('/fminer/bbrc',:full), + OT.hasSource => url_for('/fminer/bbrc', :full), + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + }) + feature_dataset.token_id = params[:token_id] if params[:token_id] + feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] + feature_dataset.save + + id = 1 # fminer start id is not 0 + compounds = [] + nr_active=0 + nr_inactive=0 + all_activities = Hash.new# DV: for effect calculation in regression part + + @@bbrc.Reset + training_dataset.data_entries.each do |compound,entry| + begin + smiles = OpenTox::Compound.new(compound.to_s).to_smiles + rescue + LOGGER.warn "No resource for #{compound.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{compound.to_s}." + next + end + entry.each do |feature,values| + values.each do |value| + if value.nil? + LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + else + case value.to_s + when "true" + nr_active += 1 + activity = 1 + when "false" + nr_inactive += 1 + activity = 0 + else + activity = value.to_f + @@bbrc.SetRegression(true) + end + begin + @@bbrc.AddCompound(smiles,id) + @@bbrc.AddActivity(activity, id) + all_activities[id]=activity # DV: insert global information + compounds[id] = compound + id += 1 + rescue + LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" + end + end + end + end + end + + g_array=all_activities.values # DV: calculation of global median for effect calculation + g_median=OpenTox::Algorithm.median(g_array) + + raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 + + features = Set.new + # run @@bbrc + (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| + + results = @@bbrc.MineRoot(j) + results.each do |result| + f = YAML.load(result)[0] + smarts = f[0] + p_value = f[1] + + if (!@@bbrc.GetRegression) + ids = f[2] + f[3] + if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) + effect = 'activating' + else + effect = 'deactivating' + end + else #regression part + ids = f[2] + # DV: effect calculation + f_arr=Array.new + f[2].each do |id| + f_arr.push(all_activities[id]) + end + f_median=OpenTox::Algorithm.median(f_arr) + if g_median >= f_median + effect = 'activating' + else + effect = 'deactivating' + end + end + + feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s + unless features.include? smarts + features << smarts + metadata = { + OT.hasSource => url_for('/fminer/bbrc', :full), + OT.isA => OT.Substructure, + OT.smarts => smarts, + OT.pValue => p_value.to_f, + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + } + feature_dataset.add_feature feature_uri, metadata + #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters + end + ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + end + end + feature_dataset.save + feature_dataset.uri + end + response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" + end +#end + +# Run last algorithm on a dataset +# +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] parameters LAST parameters, accepted parameters are +# - minfreq Minimum frequency (default 5) +# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") +# - hops Maximum number of hops +# @return [text/uri-list] Task URI +post '/fminer/last/?' do + + #@@last = Last::Last.new + minfreq = 5 unless minfreq = params[:min_frequency] + @@last.SetMinfreq(minfreq) + @@last.SetType(1) if params[:feature_type] == "paths" + @@last.SetMaxHops(params[:hops]) if params[:hops] + @@last.SetConsoleOut(false) + + halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + prediction_feature = params[:prediction_feature] + + training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" + training_dataset.load_all + halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) + + task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do + + feature_dataset = OpenTox::Dataset.new + feature_dataset.add_metadata({ + DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s, + DC.creator => url_for('/fminer/last',:full), + OT.hasSource => url_for('/fminer/last', :full), + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + }) + feature_dataset.save + + id = 1 # fminer start id is not 0 + compounds = [] + smi = [] # AM LAST: needed for matching the patterns back + nr_active=0 + nr_inactive=0 + all_activities = Hash.new# DV: for effect calculation in regression part + + @@last.Reset + training_dataset.data_entries.each do |compound,entry| + begin + smiles = OpenTox::Compound.new(compound.to_s).to_smiles + rescue + LOGGER.warn "No resource for #{compound.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{compound.to_s}." + next + end + entry.each do |feature,values| + values.each do |value| + if value.nil? + LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + else + case value.to_s + when "true" + nr_active += 1 + activity = 1 + when "false" + nr_inactive += 1 + activity = 0 + else + activity = value.to_f + @@last.SetRegression(true) + end + begin + @@last.AddCompound(smiles,id) + @@last.AddActivity(activity, id) + all_activities[id]=activity # DV: insert global information + compounds[id] = compound + smi[id] = smiles # AM LAST: changed this to store SMILES. + id += 1 + rescue + LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" + end + end + end + end + end + + g_array=all_activities.values # DV: calculation of global median for effect calculation + g_median=OpenTox::Algorithm.median(g_array) + + raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 + + # run @@last + features = Set.new + xml = "" + + (0 .. @@last.GetNoRootNodes()-1).each do |j| + results = @@last.MineRoot(j) + results.each do |result| + xml << result + end + end + + lu = LU.new # AM LAST: uses last-utils here + dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!) + smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) + instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations + instances.each do |smarts, ids| + feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax + @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + + + effect = (p_value > 0) ? "activating" : "deactivating" + feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s + unless features.include? smarts + features << smarts + metadata = { + OT.isA => OT.Substructure, + OT.hasSource => feature_dataset.uri, + OT.smarts => smarts, + OT.pValue => p_value.to_f, + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + } + feature_dataset.add_feature feature_uri, metadata + end + ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + end + feature_dataset.save + feature_dataset.uri + end + response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" end diff --git a/last-utils b/last-utils new file mode 160000 index 0000000..9bba490 --- /dev/null +++ b/last-utils @@ -0,0 +1 @@ +Subproject commit 9bba490c02a5fea57d65b61e7f68e88ed72893c6 diff --git a/lazar.rb b/lazar.rb index 649c190..8056c89 100644 --- a/lazar.rb +++ b/lazar.rb @@ -1,194 +1,151 @@ +@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc") + +# Get RDF/XML representation of the lazar algorithm +# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm get '/lazar/?' do - owl = OpenTox::Owl.create 'Algorithm', url_for('/lazar',:full) - owl.set 'title',"lazar" - owl.set 'creator',"http://github.com/helma/opentox-algorithm" - owl.parameters = { - "Dataset URI" => - { :scope => "mandatory", :value => "dataset_uri" }, - "Feature URI for dependent variable" => - { :scope => "mandatory", :value => "prediction_feature" }, - "Feature generation URI" => - { :scope => "mandatory", :value => "feature_generation_uri" } - } - rdf = owl.rdf - File.open('public/lazar.owl', 'w') {|f| f.print rdf} response['Content-Type'] = 'application/rdf+xml' - rdf + algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full)) + algorithm.metadata = { + DC.title => 'lazar', + DC.creator => "helma@in-silico.ch, andreas@maunz.de", + DC.contributor => "vorgrimmlerdavid@gmx.de", + OT.isA => OTA.ClassificationLazySingleTarget, + OT.parameters => [ + { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, + { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, + { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, + { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" } + ] + } + algorithm.to_rdfxml end -post '/lazar/?' do # create a model +# Create a lazar prediction model +# @param [String] dataset_uri Training dataset URI +# @param [optional,String] prediction_feature URI of the feature to be predicted +# @param [optional,String] feature_generation_uri URI of the feature generation algorithm +# @param [optional] - further parameters for the feature generation service +# @ return [text/uri-list] Task URI +post '/lazar/?' do - LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" - LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" - LOGGER.debug "Feature generation: '" + params[:feature_generation_uri].to_s + "'" - LOGGER.debug "Token ID: #{params[:token_id]}" - dataset_uri = "#{params[:dataset_uri]}" + halt 404, "No dataset_uri parameter." unless params[:dataset_uri] + dataset_uri = params[:dataset_uri] - begin - training_activities = OpenTox::Dataset.find(dataset_uri) - rescue - halt 404, "Dataset #{dataset_uri} not found" + halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) + training_activities.load_all + + prediction_feature = params[:prediction_feature] + unless prediction_feature # try to read prediction_feature from dataset + halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 + prediction_feature = training_activities.features.keys.first + params[:prediction_feature] = prediction_feature end - halt 404, "No prediction_feature parameter." unless params[:prediction_feature] - halt 404, "No feature_generation_uri parameter." unless params[:feature_generation_uri] - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature]) + feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] + + halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+ + training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) + + task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| - response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task| - - # create features - LOGGER.debug "Starting fminer" - params[:feature_uri] = params[:prediction_feature] - fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params) - fminer_task = OpenTox::Task.find(fminer_task_uri) - fminer_task.wait_for_completion - raise "fminer failed" unless fminer_task.completed? - - LOGGER.debug "Fminer finished #{Time.now}" - feature_dataset_uri = fminer_task.resultURI.to_s - training_features = OpenTox::Dataset.find(feature_dataset_uri) - halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? lazar = OpenTox::Model::Lazar.new lazar.token_id = params[:token_id] if params[:token_id] lazar.token_id = request.env["HTTP_TOKEN_ID"] if !lazar.token_id and request.env["HTTP_TOKEN_ID"] - lazar.trainingDataset = dataset_uri - lazar.feature_dataset_uri = feature_dataset_uri - halt 404, "More than one descriptor type" unless training_features.features.size == 1 - bbrc = training_features.features.first - training_features.data.each do |compound,features| - lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] - features.each do |feature| - tuple = feature[bbrc] - if tuple - smarts =nil; p_value = nil; effect = nil - tuple.each do |k,v| - case k - when /fminer#smarts/ - smarts = v - lazar.features << smarts - lazar.fingerprints[compound] << smarts - when /fminer#p_value/ - p_value = v - when /fminer#effect/ - effect = v - end - end - lazar.p_values[smarts] = p_value - lazar.effects[smarts] = effect - end - end - end - - activities = {} - classification = true - training_activities.data.each do |compound,features| - lazar.activities[compound] = [] unless lazar.activities[compound] - features.each do |feature| - case feature[params[:prediction_feature]].to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - # AM: handle quantitative activity values of features - else - lazar.activities[compound] << feature[params[:prediction_feature]].to_f - classification = false - end - end - end - # TODO: insert regression - if classification - lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification" - else - lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression" - end - - model_uri = lazar.save - LOGGER.info model_uri + " created #{Time.now}" - model_uri - end - halt 202,task_uri -end - -post '/property_lazar/?' do # create a model + lazar.min_sim = params[:min_sim] if params[:min_sim] - LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" - LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" - LOGGER.debug "Feature dataset: '" + params[:feature_dataset_uri].to_s + "'" - dataset_uri = "#{params[:dataset_uri]}" + if params[:feature_dataset_uri] + feature_dataset_uri = params[:feature_dataset_uri] + training_features = OpenTox::Dataset.new(feature_dataset_uri) + case training_features.feature_type + when "classification" + lazar.similarity_algorithm = "Similarity.tanimoto" + when "regression" + lazar.similarity_algorithm = "Similarity.euclid" + end + else # create features + params[:feature_generation_uri] = feature_generation_uri + if feature_generation_uri.match(/fminer/) + lazar.feature_calculation_algorithm = "Substructure.match" + else + halt 404, "External feature generation services not yet supported" + end + feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s + training_features = OpenTox::Dataset.new(feature_dataset_uri) + end - begin - training_activities = OpenTox::Dataset.find(dataset_uri) - rescue - halt 404, "Dataset #{dataset_uri} not found" - end + training_features.load_all + halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? - halt 404, "No prediction_feature parameter." unless params[:prediction_feature] - halt 404, "No feature_dataset_uri parameter." unless params[:feature_dataset_uri] - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature]) + # sorted features for index lookups + lazar.features = training_features.features.sort if training_features.feature_type == "regression" - response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/property_lazar',:full)) do |task| - - # create features - #LOGGER.debug "Starting fminer" - #params[:feature_uri] = params[:prediction_feature] - #fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params) - #fminer_task = OpenTox::Task.find(fminer_task_uri) - #fminer_task.wait_for_completion - #raise "fminer failed" unless fminer_task.completed? - - #LOGGER.debug "Fminer finished #{Time.now}" - feature_dataset_uri = params[:feature_dataset_uri] - training_features = OpenTox::Dataset.find(feature_dataset_uri) - halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? - lazar = OpenTox::Model::PropertyLazar.new - lazar.trainingDataset = dataset_uri - lazar.feature_dataset_uri = feature_dataset_uri - #halt 404, "More than one descriptor type" unless training_features.features.size == 1 - lazar.features = training_features.features - training_features.data.each do |compound,features| - lazar.properties[compound] = {} unless lazar.properties[compound] - LOGGER.debug features.inspect - if features - features.each do |f| - f.each do |name,value| - #lazar.features.each do |feature| - lazar.properties[compound][name] = value - #lazar.properties[compound] = features + training_features.data_entries.each do |compound,entry| + lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] + entry.keys.each do |feature| + if feature_generation_uri.match(/fminer/) + smarts = training_features.features[feature][OT.smarts] + lazar.fingerprints[compound] << smarts + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.pValue] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end + else + case training_features.feature_type + when "classification" + # fingerprints are sets + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + when "regression" + # fingerprints are arrays + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + end end end - end - end - - activities = {} - classification = true - training_activities.data.each do |compound,features| + lazar.activities[compound] = [] unless lazar.activities[compound] - features.each do |feature| - case feature[params[:prediction_feature]].to_s + training_activities.data_entries[compound][params[:prediction_feature]].each do |value| + case value.to_s when "true" lazar.activities[compound] << true when "false" lazar.activities[compound] << false else - lazar.activities[compound] << feature[params[:prediction_feature]].to_f - classification = false + halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0 + lazar.activities[compound] << value.to_f + lazar.prediction_algorithm = "Neighbors.local_svm_regression" end end - end - if classification - lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification" - else - lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression" - end + end + + lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}" + # TODO: fix dependentVariable + lazar.metadata[OT.dependentVariables] = params[:prediction_feature] + lazar.metadata[OT.trainingDataset] = dataset_uri + lazar.metadata[OT.featureDataset] = feature_dataset_uri + lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget + + lazar.metadata[OT.parameters] = [ + {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, + {DC.title => "prediction_feature", OT.paramValue => prediction_feature}, + {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} + ] model_uri = lazar.save LOGGER.info model_uri + " created #{Time.now}" model_uri end - halt 202,task_uri + response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri end + diff --git a/libfminer b/libfminer index 5a97d00..e0eee43 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 5a97d006e0ccfc48e53d5f24842a898ec9e912e2 +Subproject commit e0eee431ecb954328ff64e3cc48840c7003a2769 diff --git a/smarts.rb b/smarts.rb index 2ea54d2..4ae6949 100644 --- a/smarts.rb +++ b/smarts.rb @@ -1,3 +1,3 @@ get '/match/compound/*/smarts/*/?' do - "#{OpenTox::Compound.new(:inchi => params[:splat][0]).match?(params[:splat][1])}" + "#{OpenTox::Compound.from_inchi(params[:splat][0]).match?(params[:splat][1])}" end -- cgit v1.2.3 From b6df3d3f4c58e9dec6f7fedb2a9498f7443a8a03 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 3 Dec 2010 14:34:25 +0100 Subject: OpenBabel descriptors added --- fminer.rb | 2 -- libfminer | 2 +- openbabel.rb | 65 +++++++++++++++++++++++++++++++++++++----------------------- 3 files changed, 41 insertions(+), 28 deletions(-) diff --git a/fminer.rb b/fminer.rb index 06d66fb..df91701 100644 --- a/fminer.rb +++ b/fminer.rb @@ -6,7 +6,6 @@ ENV['FMINER_PVALUES'] = 'true' @@last = Last::Last.new # Get list of fminer algorithms -# # @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do response['Content-Type'] = 'text/uri-list' @@ -56,7 +55,6 @@ get "/fminer/last/?" do end # Run bbrc algorithm on dataset -# # @param [String] dataset_uri URI of the training dataset # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) # @param [optional] parameters BBRC parameters, accepted parameters are diff --git a/libfminer b/libfminer index e0eee43..6e35cac 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit e0eee431ecb954328ff64e3cc48840c7003a2769 +Subproject commit 6e35cacd8da45f21c4039591d339a74af9a44a6d diff --git a/openbabel.rb b/openbabel.rb index a261866..a5a8841 100644 --- a/openbabel.rb +++ b/openbabel.rb @@ -1,28 +1,43 @@ -get '/openbabel/:smiles/:property/?' do +# Calculate OpenBabel descriptors +# Supports the following OpenBabel methods (see OpenBabel API http://openbabel.org/api/2.2.0/) +# - NumAtoms Number of atoms +# - NumBonds Number of bonds +# - NumHvyAtoms Number of heavy atoms +# - NumResidues Number of residues +# - NumRotors Number of rotatable bonds +# - GetFormula Stochoimetric formula +# - GetEnergy Heat of formation for this molecule (in kcal/mol) +# - GetMolWt Standard molar mass given by IUPAC atomic masses (amu) +# - GetExactMass Mass given by isotopes (or most abundant isotope, if not specified) +# - GetTotalCharge Total charge +# - HBA1 Number of Hydrogen Bond Acceptors 1 (JoelLib) +# - HBA2 Number of Hydrogen Bond Acceptors 2 (JoelLib) +# - HBD Number of Hydrogen Bond Donors (JoelLib) +# - L5 Lipinski Rule of Five +# - logP Octanol/water partition coefficient +# - MR Molar refractivity +# - MW Molecular Weight +# - nF Number of Fluorine Atoms +# - nHal Number of halogen atoms +# - spinMult Total Spin Multiplicity +# - TPSA Topological polar surface area +# @param [URI] compound_uri Compound URI +# @return [Sting] descriptor value +post '/openbabel/:property' do obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new - obconversion.set_in_and_out_formats 'smi', 'can' - case params[:property] - when 'logP' - #logP = OpenBabel::OBLogP.new - #logP.predict(obmol) - "not yet implemented" - when 'psa' - #psa = OpenBabel::OBPSA.new - "not yet implemented" - when 'mr' - #mr = OpenBabel::OBMR.new - "not yet implemented" - else - begin - obconversion.read_string obmol, params[:smiles] - rescue - halt 404, "Incorrect Smiles string #{params[:smiles]}" - end - begin - eval("obmol.#{params[:property]}").to_s - rescue - halt 404, "Could not calculate property #{params[:property]}" - end - end + compound = OpenTox::Compound.new params[:compound_uri] + obconversion.set_in_and_out_formats 'inchi', 'can' + obconversion.read_string obmol, compound.to_inchi + obmol_methods = ["num_atoms", "num_bonds", "num_hvy_atoms", "num_residues", "num_rotors", "get_formula", "get_energy", "get_mol_wt", "get_exact_mass", "get_total_charge", "get_total_spin_multiplicity"] + + descriptor_methods = [ "HBA1", "HBA2", "HBD", "L5", "logP", "MR", "MW", "nF", "nHal", "spinMult", "TPSA" ] + if obmol_methods.include? params[:property].underscore + eval("obmol.#{params[:property].underscore}").to_s + elsif descriptor_methods.include? params[:property] + descriptor = OpenBabel::OBDescriptor.find_type(params[:property]) + descriptor.predict(obmol).to_s + else + halt 404, "Cannot calculate property #{params[:property]} with OpenBabel" + end end -- cgit v1.2.3 From 27e449c9fc43ca97fed0c090316e6e1e4305229a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 3 Dec 2010 16:47:57 +0100 Subject: batch prediction of openbabel parameters added --- fminer.rb | 3 -- openbabel.rb | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 120 insertions(+), 18 deletions(-) diff --git a/fminer.rb b/fminer.rb index df91701..d74f183 100644 --- a/fminer.rb +++ b/fminer.rb @@ -195,7 +195,6 @@ post '/fminer/bbrc/?' do ] } feature_dataset.add_feature feature_uri, metadata - #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters end ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end @@ -207,10 +206,8 @@ post '/fminer/bbrc/?' do halt 503,task.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end -#end # Run last algorithm on a dataset -# # @param [String] dataset_uri URI of the training dataset # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) # @param [optional] parameters LAST parameters, accepted parameters are diff --git a/openbabel.rb b/openbabel.rb index a5a8841..3a873c0 100644 --- a/openbabel.rb +++ b/openbabel.rb @@ -1,3 +1,58 @@ +OBMOL_METHODS = { + "NumAtoms" => "Number of atoms", + "NumBonds" => "Number of bonds", + "NumHvyAtoms" => "Number of heavy atoms", + "NumResidues" => "Number of residues", + "NumRotors" => "Number of rotatable bonds", + "GetEnergy" => "Heat of formation for this molecule (in kcal/mol)", + "GetMolWt" => "Standard molar mass given by IUPAC atomic masses (amu)", + "GetExactMass" => "Mass given by isotopes (or most abundant isotope, if not specified)", + "GetTotalCharge" => "Total charge", +} + +OBDESCRIPTOR_METHODS = { + "HBA1" => "Number of hydrogen bond acceptors 1 (JoelLib)", + "HBA2" => "Number of hydrogen bond acceptors 2 (JoelLib)", + "HBD" => "Number of hydrogen bond donors (JoelLib)", + "L5" => "Lipinski rule of five", + "logP" => "Octanol/water partition coefficient", + "MR" => "Molar refractivity", + "MW" => "Molecular weight", + "nF" => "Number of fluorine atoms", + "nHal" => "Number of halogen atoms", + "spinMult" => "Total spin multiplicity", + "TPSA" => "Topological polar surface area", +} + +# Get a list of OpenBabel algorithms +# @return [text/uri-list] URIs of OpenBabel algorithms +get '/openbabel' do + algorithms = OBMOL_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)} + algorithms << OBDESCRIPTOR_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)} + response['Content-Type'] = 'text/uri-list' + algorithms.join("\n") +end + +# Get RDF/XML representation of OpenBabel algorithm +# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm +get '/openbabel/:property' do + description = OBMOL_METHODS[params[:property]] if OBMOL_METHODS.include? params[:property] + description = OBDESCRIPTOR_METHODS[params[:property]] if OBDESCRIPTOR_METHODS.include? params[:property] + if description + algorithm = OpenTox::Algorithm::Generic.new(url_for("/openbabel/#{params[:property]}",:full)) + algorithm.metadata = { + DC.title => params[:property], + DC.creator => "helma@in-silico.ch", + DC.description => description, + OT.isA => OTA.DescriptorCalculation, + } + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + else + halt 404, "Unknown OpenBabel descriptor #{params[:property]}." + end +end + # Calculate OpenBabel descriptors # Supports the following OpenBabel methods (see OpenBabel API http://openbabel.org/api/2.2.0/) # - NumAtoms Number of atoms @@ -5,39 +60,89 @@ # - NumHvyAtoms Number of heavy atoms # - NumResidues Number of residues # - NumRotors Number of rotatable bonds -# - GetFormula Stochoimetric formula # - GetEnergy Heat of formation for this molecule (in kcal/mol) # - GetMolWt Standard molar mass given by IUPAC atomic masses (amu) # - GetExactMass Mass given by isotopes (or most abundant isotope, if not specified) # - GetTotalCharge Total charge -# - HBA1 Number of Hydrogen Bond Acceptors 1 (JoelLib) -# - HBA2 Number of Hydrogen Bond Acceptors 2 (JoelLib) -# - HBD Number of Hydrogen Bond Donors (JoelLib) -# - L5 Lipinski Rule of Five +# - HBA1 Number of hydrogen bond acceptors 1 (JoelLib) +# - HBA2 Number of hydrogen bond acceptors 2 (JoelLib) +# - HBD Number of hydrogen bond donors (JoelLib) +# - L5 Lipinski rule of five # - logP Octanol/water partition coefficient # - MR Molar refractivity -# - MW Molecular Weight -# - nF Number of Fluorine Atoms +# - MW Molecular weight +# - nF Number of fluorine atoms # - nHal Number of halogen atoms -# - spinMult Total Spin Multiplicity +# - spinMult Total spin multiplicity # - TPSA Topological polar surface area -# @param [URI] compound_uri Compound URI -# @return [Sting] descriptor value +# @param [String] compound_uri Compound URI +# @return [String] descriptor value post '/openbabel/:property' do obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new compound = OpenTox::Compound.new params[:compound_uri] obconversion.set_in_and_out_formats 'inchi', 'can' obconversion.read_string obmol, compound.to_inchi - obmol_methods = ["num_atoms", "num_bonds", "num_hvy_atoms", "num_residues", "num_rotors", "get_formula", "get_energy", "get_mol_wt", "get_exact_mass", "get_total_charge", "get_total_spin_multiplicity"] - - descriptor_methods = [ "HBA1", "HBA2", "HBD", "L5", "logP", "MR", "MW", "nF", "nHal", "spinMult", "TPSA" ] - if obmol_methods.include? params[:property].underscore + if OBMOL_METHODS.keys.include? params[:property] eval("obmol.#{params[:property].underscore}").to_s - elsif descriptor_methods.include? params[:property] + elsif OBDESCRIPTOR_METHODS.keys.include? params[:property] descriptor = OpenBabel::OBDescriptor.find_type(params[:property]) descriptor.predict(obmol).to_s else halt 404, "Cannot calculate property #{params[:property]} with OpenBabel" end end + +# Calculate all OpenBabel descriptors for a dataset +# @param [String] dataset_uri Dataset URI +# @return [text/uri-list] Task URI +post '/openbabel' do + task = OpenTox::Task.create("Calculating OpenBabel descriptors for #{params[:dataset_uri]}", url_for('/openbabel',:full)) do + + dataset = OpenTox::Dataset.find(params[:dataset_uri]) + result_dataset = OpenTox::Dataset.create + result_dataset.add_metadata({ + DC.title => "OpenBabel descriptors for " + dataset.metadata[DC.title].to_s, + DC.creator => url_for('/openbabel',:full), + OT.hasSource => url_for('/openbabel', :full), + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + ] + }) + + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_and_out_formats 'inchi', 'can' + + OBMOL_METHODS.merge(OBDESCRIPTOR_METHODS).each do |name,description| + feature_uri = File.join result_dataset.uri, "feature", "openbabel", name + metadata = { + OT.hasSource => url_for("/openbabel/#{name}", :full), + DC.description => description, + DC.title => name, + } + result_dataset.add_feature feature_uri, metadata + end + + dataset.compounds.each do |compound_uri| + compound = OpenTox::Compound.new(compound_uri) + obconversion.read_string obmol, compound.to_inchi + #result_dataset.add_compound compound_uri + OBMOL_METHODS.keys.each do |name| + feature_uri = File.join result_dataset.uri, "feature", "openbabel", name + value = eval("obmol.#{name.underscore}").to_f + result_dataset.add compound_uri, feature_uri, value + end + OBDESCRIPTOR_METHODS.keys.each do |name| + feature_uri = File.join result_dataset.uri, "feature", "openbabel", name + value = OpenBabel::OBDescriptor.find_type(params[:property]).predict(obmol).to_f + result_dataset.add compound_uri, feature_uri, value + end + end + result_dataset.save + result_dataset.uri + end + response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" +end -- cgit v1.2.3 From 0ff6a589e691b88a46566fafadee5e4ff4c53bb6 Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 9 Dec 2010 11:02:49 +0100 Subject: A&A implementation --- fminer.rb | 11 +++++++---- lazar.rb | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fminer.rb b/fminer.rb index e647600..093b699 100644 --- a/fminer.rb +++ b/fminer.rb @@ -9,8 +9,8 @@ ENV['FMINER_PVALUES'] = 'true' # # @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do - response['Content-Type'] = 'text/uri-list' - [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + response['Content-Type'] = 'text/uri-list' + [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" end # Get RDF/XML representation of fminer bbrc algorithm @@ -95,8 +95,8 @@ post '/fminer/bbrc/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - feature_dataset.token_id = params[:token_id] if params[:token_id] - feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] + feature_dataset.token_id = params[:token_id] if params[:token_id] + feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] feature_dataset.save id = 1 # fminer start id is not 0 @@ -236,6 +236,7 @@ post '/fminer/last/?' do prediction_feature = params[:prediction_feature] training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" + training_dataset.load_all halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) @@ -251,6 +252,8 @@ post '/fminer/last/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) + feature_dataset.token_id = params[:token_id] if params[:token_id] + feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] feature_dataset.save id = 1 # fminer start id is not 0 diff --git a/lazar.rb b/lazar.rb index 8056c89..20d169f 100644 --- a/lazar.rb +++ b/lazar.rb @@ -71,7 +71,7 @@ post '/lazar/?' do halt 404, "External feature generation services not yet supported" end feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s - training_features = OpenTox::Dataset.new(feature_dataset_uri) + training_features = OpenTox::Dataset.new(feature_dataset_uri, lazar.token_id) end training_features.load_all -- cgit v1.2.3 From 9487e82a1bd6b6cf42512d6bfed43194c109b114 Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 14 Dec 2010 12:45:03 +0100 Subject: remove token_id from saving in table --- fminer.rb | 16 ++++++++-------- lazar.rb | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fminer.rb b/fminer.rb index 093b699..4b92dba 100644 --- a/fminer.rb +++ b/fminer.rb @@ -95,9 +95,9 @@ post '/fminer/bbrc/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - feature_dataset.token_id = params[:token_id] if params[:token_id] - feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] - feature_dataset.save + token_id = params[:token_id] if params[:token_id] + token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !token_id and request.env["HTTP_TOKEN_ID"] + feature_dataset.save(token_id) id = 1 # fminer start id is not 0 compounds = [] @@ -204,7 +204,7 @@ post '/fminer/bbrc/?' do ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end - feature_dataset.save + feature_dataset.save(token_id) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' @@ -252,9 +252,9 @@ post '/fminer/last/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - feature_dataset.token_id = params[:token_id] if params[:token_id] - feature_dataset.token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !feature_dataset.token_id and request.env["HTTP_TOKEN_ID"] - feature_dataset.save + token_id = params[:token_id] if params[:token_id] + token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !token_id and request.env["HTTP_TOKEN_ID"] + feature_dataset.save(token_id) id = 1 # fminer start id is not 0 compounds = [] @@ -350,7 +350,7 @@ post '/fminer/last/?' do end ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end - feature_dataset.save + feature_dataset.save(token_id) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' diff --git a/lazar.rb b/lazar.rb index 20d169f..96cf746 100644 --- a/lazar.rb +++ b/lazar.rb @@ -50,8 +50,8 @@ post '/lazar/?' do task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new - lazar.token_id = params[:token_id] if params[:token_id] - lazar.token_id = request.env["HTTP_TOKEN_ID"] if !lazar.token_id and request.env["HTTP_TOKEN_ID"] + token_id = params[:token_id] if params[:token_id] + token_id = request.env["HTTP_TOKEN_ID"] if !token_id and request.env["HTTP_TOKEN_ID"] lazar.min_sim = params[:min_sim] if params[:min_sim] if params[:feature_dataset_uri] @@ -71,7 +71,7 @@ post '/lazar/?' do halt 404, "External feature generation services not yet supported" end feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s - training_features = OpenTox::Dataset.new(feature_dataset_uri, lazar.token_id) + training_features = OpenTox::Dataset.new(feature_dataset_uri) end training_features.load_all @@ -140,7 +140,7 @@ post '/lazar/?' do {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} ] - model_uri = lazar.save + model_uri = lazar.save(token_id) LOGGER.info model_uri + " created #{Time.now}" model_uri end -- cgit v1.2.3 From 709768fcda9e9090442f7cda4120e18e21cadc2e Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 14 Dec 2010 14:57:54 +0100 Subject: rename token_id to subjectid --- fminer.rb | 16 ++++++++-------- lazar.rb | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fminer.rb b/fminer.rb index 4b92dba..2d0cbb0 100644 --- a/fminer.rb +++ b/fminer.rb @@ -95,9 +95,9 @@ post '/fminer/bbrc/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - token_id = params[:token_id] if params[:token_id] - token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !token_id and request.env["HTTP_TOKEN_ID"] - feature_dataset.save(token_id) + subjectid = params[:subjectid] if params[:subjectid] + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] + feature_dataset.save(subjectid) id = 1 # fminer start id is not 0 compounds = [] @@ -204,7 +204,7 @@ post '/fminer/bbrc/?' do ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end - feature_dataset.save(token_id) + feature_dataset.save(subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' @@ -252,9 +252,9 @@ post '/fminer/last/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - token_id = params[:token_id] if params[:token_id] - token_id = CGI.unescape(request.env["HTTP_TOKEN_ID"]) if !token_id and request.env["HTTP_TOKEN_ID"] - feature_dataset.save(token_id) + subjectid = params[:subjectid] if params[:subjectid] + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] + feature_dataset.save(subjectid) id = 1 # fminer start id is not 0 compounds = [] @@ -350,7 +350,7 @@ post '/fminer/last/?' do end ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end - feature_dataset.save(token_id) + feature_dataset.save(subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' diff --git a/lazar.rb b/lazar.rb index 96cf746..3e9b65f 100644 --- a/lazar.rb +++ b/lazar.rb @@ -50,8 +50,8 @@ post '/lazar/?' do task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new - token_id = params[:token_id] if params[:token_id] - token_id = request.env["HTTP_TOKEN_ID"] if !token_id and request.env["HTTP_TOKEN_ID"] + subjectid = params[:subjectid] if params[:subjectid] + subjectid = request.env["HTTP_SUBJECTID"] if !subjectid and request.env["HTTP_SUBJECTID"] lazar.min_sim = params[:min_sim] if params[:min_sim] if params[:feature_dataset_uri] @@ -140,7 +140,7 @@ post '/lazar/?' do {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} ] - model_uri = lazar.save(token_id) + model_uri = lazar.save(subjectid) LOGGER.info model_uri + " created #{Time.now}" model_uri end -- cgit v1.2.3 From 46764eb6ffc4dfbc9efdbcd3babf73b7b74cc331 Mon Sep 17 00:00:00 2001 From: mr Date: Mon, 10 Jan 2011 16:57:05 +0100 Subject: typos and A&A --- .gitignore | 2 ++ fminer.rb | 15 +++++++-------- lazar.rb | 9 +++++---- libfminer | 2 +- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index a3ee470..d7d5bc7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ tmp/* log/* public/*.owl +*.*~ +.gitignore diff --git a/fminer.rb b/fminer.rb index 2d0cbb0..9608a50 100644 --- a/fminer.rb +++ b/fminer.rb @@ -66,7 +66,9 @@ end # - min_chisq_significance Significance threshold (between 0 and 1) # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - + + subjectid = params[:subjectid] ? params[:subjectid] : nil + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] # TODO: is this thread safe?? #@@bbrc = Bbrc::Bbrc.new minfreq = 5 unless minfreq = params[:min_frequency] @@ -80,7 +82,7 @@ post '/fminer/bbrc/?' do halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do @@ -95,8 +97,6 @@ post '/fminer/bbrc/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - subjectid = params[:subjectid] if params[:subjectid] - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] feature_dataset.save(subjectid) id = 1 # fminer start id is not 0 @@ -223,7 +223,8 @@ post '/fminer/bbrc/?' do # - hops Maximum number of hops # @return [text/uri-list] Task URI post '/fminer/last/?' do - + subjectid = params[:subjectid] ? params[:subjectid] : nil + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] #@@last = Last::Last.new minfreq = 5 unless minfreq = params[:min_frequency] @@last.SetMinfreq(minfreq) @@ -237,7 +238,7 @@ post '/fminer/last/?' do training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" - training_dataset.load_all + training_dataset.load_all(subjectid) halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do @@ -252,8 +253,6 @@ post '/fminer/last/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - subjectid = params[:subjectid] if params[:subjectid] - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] feature_dataset.save(subjectid) id = 1 # fminer start id is not 0 diff --git a/lazar.rb b/lazar.rb index 3e9b65f..4b69400 100644 --- a/lazar.rb +++ b/lazar.rb @@ -29,11 +29,14 @@ end # @ return [text/uri-list] Task URI post '/lazar/?' do + subjectid = params[:subjectid] ? params[:subjectid] : nil + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] + halt 404, "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all + training_activities.load_all(subjectid) prediction_feature = params[:prediction_feature] unless prediction_feature # try to read prediction_feature from dataset @@ -50,8 +53,6 @@ post '/lazar/?' do task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new - subjectid = params[:subjectid] if params[:subjectid] - subjectid = request.env["HTTP_SUBJECTID"] if !subjectid and request.env["HTTP_SUBJECTID"] lazar.min_sim = params[:min_sim] if params[:min_sim] if params[:feature_dataset_uri] @@ -74,7 +75,7 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) end - training_features.load_all + training_features.load_all(subjectid) halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? # sorted features for index lookups diff --git a/libfminer b/libfminer index e0eee43..c72bb7d 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit e0eee431ecb954328ff64e3cc48840c7003a2769 +Subproject commit c72bb7d99bb7f583f009e44be426910fb44cd4f9 -- cgit v1.2.3 From abf4cc4ecff47728d6c5f16ed94ed20fd41a8c1f Mon Sep 17 00:00:00 2001 From: mr Date: Wed, 12 Jan 2011 09:56:37 +0100 Subject: A&A --- fminer.rb | 16 ++++++++-------- lazar.rb | 9 +++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fminer.rb b/fminer.rb index 2d0cbb0..8e3c2c2 100644 --- a/fminer.rb +++ b/fminer.rb @@ -66,7 +66,9 @@ end # - min_chisq_significance Significance threshold (between 0 and 1) # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - + + subjectid = params[:subjectid] ? subjectid = params[:subjectid] : nil + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] # TODO: is this thread safe?? #@@bbrc = Bbrc::Bbrc.new minfreq = 5 unless minfreq = params[:min_frequency] @@ -80,7 +82,7 @@ post '/fminer/bbrc/?' do halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do @@ -95,8 +97,6 @@ post '/fminer/bbrc/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - subjectid = params[:subjectid] if params[:subjectid] - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] feature_dataset.save(subjectid) id = 1 # fminer start id is not 0 @@ -223,7 +223,9 @@ post '/fminer/bbrc/?' do # - hops Maximum number of hops # @return [text/uri-list] Task URI post '/fminer/last/?' do - + subjectid = nil + subjectid = params[:subjectid] if params[:subjectid] + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] #@@last = Last::Last.new minfreq = 5 unless minfreq = params[:min_frequency] @@last.SetMinfreq(minfreq) @@ -237,7 +239,7 @@ post '/fminer/last/?' do training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" - training_dataset.load_all + training_dataset.load_all(subjectid) halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do @@ -252,8 +254,6 @@ post '/fminer/last/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - subjectid = params[:subjectid] if params[:subjectid] - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] feature_dataset.save(subjectid) id = 1 # fminer start id is not 0 diff --git a/lazar.rb b/lazar.rb index 3e9b65f..b95a444 100644 --- a/lazar.rb +++ b/lazar.rb @@ -29,11 +29,14 @@ end # @ return [text/uri-list] Task URI post '/lazar/?' do + subjectid = params[:subjectid] ? subjectid = params[:subjectid] : nil + subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] + halt 404, "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all + training_activities.load_all(subjectid) prediction_feature = params[:prediction_feature] unless prediction_feature # try to read prediction_feature from dataset @@ -50,8 +53,6 @@ post '/lazar/?' do task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new - subjectid = params[:subjectid] if params[:subjectid] - subjectid = request.env["HTTP_SUBJECTID"] if !subjectid and request.env["HTTP_SUBJECTID"] lazar.min_sim = params[:min_sim] if params[:min_sim] if params[:feature_dataset_uri] @@ -74,7 +75,7 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) end - training_features.load_all + training_features.load_all(subjectid) halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? # sorted features for index lookups -- cgit v1.2.3 From 44a0ae95288fdf5db78801e36b84299217be10f1 Mon Sep 17 00:00:00 2001 From: mr Date: Wed, 12 Jan 2011 09:57:33 +0100 Subject: .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a3ee470..e3debba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ tmp/* log/* public/*.owl +*.*~ -- cgit v1.2.3 From fae87bece86eb78ac36d7d1a315affa01506d84a Mon Sep 17 00:00:00 2001 From: mr Date: Wed, 12 Jan 2011 16:44:40 +0100 Subject: fminer A&A --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 9608a50..9ec14c8 100644 --- a/fminer.rb +++ b/fminer.rb @@ -87,7 +87,7 @@ post '/fminer/bbrc/?' do task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do - feature_dataset = OpenTox::Dataset.new + feature_dataset = OpenTox::Dataset.new(nil, subjectid) feature_dataset.add_metadata({ DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc',:full), @@ -236,7 +236,7 @@ post '/fminer/last/?' do halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", subjectid training_dataset.load_all(subjectid) halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) -- cgit v1.2.3 From 4100a87f3784006687a2c8787b57936c96218f79 Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 18 Jan 2011 11:03:11 +0100 Subject: get subjectid from api-wrapper helper --- fminer.rb | 20 ++++++++------------ lazar.rb | 9 +++------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/fminer.rb b/fminer.rb index 9ec14c8..5cc83ed 100644 --- a/fminer.rb +++ b/fminer.rb @@ -67,8 +67,6 @@ end # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - subjectid = params[:subjectid] ? params[:subjectid] : nil - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] # TODO: is this thread safe?? #@@bbrc = Bbrc::Bbrc.new minfreq = 5 unless minfreq = params[:min_frequency] @@ -82,12 +80,12 @@ post '/fminer/bbrc/?' do halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do - feature_dataset = OpenTox::Dataset.new(nil, subjectid) + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc',:full), @@ -97,7 +95,7 @@ post '/fminer/bbrc/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) id = 1 # fminer start id is not 0 compounds = [] @@ -204,7 +202,7 @@ post '/fminer/bbrc/?' do ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' @@ -223,8 +221,6 @@ post '/fminer/bbrc/?' do # - hops Maximum number of hops # @return [text/uri-list] Task URI post '/fminer/last/?' do - subjectid = params[:subjectid] ? params[:subjectid] : nil - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] #@@last = Last::Last.new minfreq = 5 unless minfreq = params[:min_frequency] @@last.SetMinfreq(minfreq) @@ -236,9 +232,9 @@ post '/fminer/last/?' do halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", subjectid + training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid - training_dataset.load_all(subjectid) + training_dataset.load_all(@subjectid) halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do @@ -253,7 +249,7 @@ post '/fminer/last/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) id = 1 # fminer start id is not 0 compounds = [] @@ -349,7 +345,7 @@ post '/fminer/last/?' do end ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' diff --git a/lazar.rb b/lazar.rb index 4b69400..fc037a9 100644 --- a/lazar.rb +++ b/lazar.rb @@ -29,14 +29,11 @@ end # @ return [text/uri-list] Task URI post '/lazar/?' do - subjectid = params[:subjectid] ? params[:subjectid] : nil - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] - halt 404, "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all(subjectid) + training_activities.load_all(@subjectid) prediction_feature = params[:prediction_feature] unless prediction_feature # try to read prediction_feature from dataset @@ -75,7 +72,7 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) end - training_features.load_all(subjectid) + training_features.load_all(@subjectid) halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? # sorted features for index lookups @@ -141,7 +138,7 @@ post '/lazar/?' do {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} ] - model_uri = lazar.save(subjectid) + model_uri = lazar.save(@subjectid) LOGGER.info model_uri + " created #{Time.now}" model_uri end -- cgit v1.2.3 From 20d132d6d960f93558ce6e61f6cef8581f4a5e1c Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 20 Jan 2011 11:20:22 +0100 Subject: disable sinatra built in error handling to allow new ot error handling --- config.ru | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config.ru b/config.ru index 67d8493..a1aab0d 100644 --- a/config.ru +++ b/config.ru @@ -2,3 +2,5 @@ require 'rubygems' require 'opentox-ruby' require 'config/config_ru' run Sinatra::Application +set :raise_errors, false +set :show_exceptions, false \ No newline at end of file -- cgit v1.2.3 From 255f70b3bda4997b37ef3139c527ea9f481665bd Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 25 Jan 2011 17:00:17 +0100 Subject: merged (BY HAND) michas GET authorization --- .gitignore | 1 + fminer.rb | 23 +++++++++-------------- lazar.rb | 8 +++----- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index a3ee470..e3debba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ tmp/* log/* public/*.owl +*.*~ diff --git a/fminer.rb b/fminer.rb index 2d0cbb0..5cc83ed 100644 --- a/fminer.rb +++ b/fminer.rb @@ -66,7 +66,7 @@ end # - min_chisq_significance Significance threshold (between 0 and 1) # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - + # TODO: is this thread safe?? #@@bbrc = Bbrc::Bbrc.new minfreq = 5 unless minfreq = params[:min_frequency] @@ -80,12 +80,12 @@ post '/fminer/bbrc/?' do halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do - feature_dataset = OpenTox::Dataset.new + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc',:full), @@ -95,9 +95,7 @@ post '/fminer/bbrc/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - subjectid = params[:subjectid] if params[:subjectid] - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) id = 1 # fminer start id is not 0 compounds = [] @@ -204,7 +202,7 @@ post '/fminer/bbrc/?' do ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' @@ -223,7 +221,6 @@ post '/fminer/bbrc/?' do # - hops Maximum number of hops # @return [text/uri-list] Task URI post '/fminer/last/?' do - #@@last = Last::Last.new minfreq = 5 unless minfreq = params[:min_frequency] @@last.SetMinfreq(minfreq) @@ -235,9 +232,9 @@ post '/fminer/last/?' do halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" + training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid - training_dataset.load_all + training_dataset.load_all(@subjectid) halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do @@ -252,9 +249,7 @@ post '/fminer/last/?' do { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } ] }) - subjectid = params[:subjectid] if params[:subjectid] - subjectid = CGI.unescape(request.env["HTTP_SUBJECTID"]) if !subjectid and request.env["HTTP_SUBJECTID"] - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) id = 1 # fminer start id is not 0 compounds = [] @@ -350,7 +345,7 @@ post '/fminer/last/?' do end ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end - feature_dataset.save(subjectid) + feature_dataset.save(@subjectid) feature_dataset.uri end response['Content-Type'] = 'text/uri-list' diff --git a/lazar.rb b/lazar.rb index 3e9b65f..fc037a9 100644 --- a/lazar.rb +++ b/lazar.rb @@ -33,7 +33,7 @@ post '/lazar/?' do dataset_uri = params[:dataset_uri] halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all + training_activities.load_all(@subjectid) prediction_feature = params[:prediction_feature] unless prediction_feature # try to read prediction_feature from dataset @@ -50,8 +50,6 @@ post '/lazar/?' do task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new - subjectid = params[:subjectid] if params[:subjectid] - subjectid = request.env["HTTP_SUBJECTID"] if !subjectid and request.env["HTTP_SUBJECTID"] lazar.min_sim = params[:min_sim] if params[:min_sim] if params[:feature_dataset_uri] @@ -74,7 +72,7 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) end - training_features.load_all + training_features.load_all(@subjectid) halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? # sorted features for index lookups @@ -140,7 +138,7 @@ post '/lazar/?' do {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} ] - model_uri = lazar.save(subjectid) + model_uri = lazar.save(@subjectid) LOGGER.info model_uri + " created #{Time.now}" model_uri end -- cgit v1.2.3 From 98f71b5587f0dfb9b5293947b7e78fde5a3e642d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 27 Jan 2011 17:41:23 +0100 Subject: master branch for las-utils --- Rakefile | 12 ++++++------ last-utils | 2 +- lazar.rb | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Rakefile b/Rakefile index d52f60d..e60ffc7 100644 --- a/Rakefile +++ b/Rakefile @@ -30,10 +30,10 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../../last-utils') - puts `git fetch` + #puts `git fetch` # AM LAST: need branch 'experimental' until merged to master in last-utils - puts `git checkout -f -b experimental origin/experimental` - puts `git checkout experimental` + #puts `git checkout -f -b experimental origin/experimental` + puts `git checkout master` puts `git pull` end @@ -63,10 +63,10 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../../last-utils') - puts `git fetch` + #puts `git fetch` # AM LAST: need branch 'experimental' until merged to master in last-utils - puts `git checkout -f -b experimental origin/experimental` - puts `git checkout experimental` + #puts `git checkout -f -b experimental origin/experimental` + puts `git checkout master` puts `git pull` end end diff --git a/last-utils b/last-utils index 9bba490..324a179 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit 9bba490c02a5fea57d65b61e7f68e88ed72893c6 +Subproject commit 324a179b992c7b8b6f52963d4912ce5f92fe81cc diff --git a/lazar.rb b/lazar.rb index 3e9b65f..2a9455a 100644 --- a/lazar.rb +++ b/lazar.rb @@ -25,8 +25,8 @@ end # @param [String] dataset_uri Training dataset URI # @param [optional,String] prediction_feature URI of the feature to be predicted # @param [optional,String] feature_generation_uri URI of the feature generation algorithm -# @param [optional] - further parameters for the feature generation service -# @ return [text/uri-list] Task URI +# @param [optional,String] - further parameters for the feature generation service +# @return [text/uri-list] Task URI post '/lazar/?' do halt 404, "No dataset_uri parameter." unless params[:dataset_uri] -- cgit v1.2.3 From 7ac05a889f1c7f3bb43ef764564920ce0cd84f34 Mon Sep 17 00:00:00 2001 From: mr Date: Mon, 31 Jan 2011 11:55:57 +0100 Subject: merge with helma/development --- Rakefile | 12 ++--- config.ru | 2 + last-utils | 2 +- lazar.rb | 4 +- libfminer | 2 +- openbabel.rb | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 6 files changed, 157 insertions(+), 35 deletions(-) diff --git a/Rakefile b/Rakefile index d52f60d..e60ffc7 100644 --- a/Rakefile +++ b/Rakefile @@ -30,10 +30,10 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../../last-utils') - puts `git fetch` + #puts `git fetch` # AM LAST: need branch 'experimental' until merged to master in last-utils - puts `git checkout -f -b experimental origin/experimental` - puts `git checkout experimental` + #puts `git checkout -f -b experimental origin/experimental` + puts `git checkout master` puts `git pull` end @@ -63,10 +63,10 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../../last-utils') - puts `git fetch` + #puts `git fetch` # AM LAST: need branch 'experimental' until merged to master in last-utils - puts `git checkout -f -b experimental origin/experimental` - puts `git checkout experimental` + #puts `git checkout -f -b experimental origin/experimental` + puts `git checkout master` puts `git pull` end end diff --git a/config.ru b/config.ru index 67d8493..a1aab0d 100644 --- a/config.ru +++ b/config.ru @@ -2,3 +2,5 @@ require 'rubygems' require 'opentox-ruby' require 'config/config_ru' run Sinatra::Application +set :raise_errors, false +set :show_exceptions, false \ No newline at end of file diff --git a/last-utils b/last-utils index 9bba490..324a179 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit 9bba490c02a5fea57d65b61e7f68e88ed72893c6 +Subproject commit 324a179b992c7b8b6f52963d4912ce5f92fe81cc diff --git a/lazar.rb b/lazar.rb index fc037a9..c8cf6ea 100644 --- a/lazar.rb +++ b/lazar.rb @@ -25,8 +25,8 @@ end # @param [String] dataset_uri Training dataset URI # @param [optional,String] prediction_feature URI of the feature to be predicted # @param [optional,String] feature_generation_uri URI of the feature generation algorithm -# @param [optional] - further parameters for the feature generation service -# @ return [text/uri-list] Task URI +# @param [optional,String] - further parameters for the feature generation service +# @return [text/uri-list] Task URI post '/lazar/?' do halt 404, "No dataset_uri parameter." unless params[:dataset_uri] diff --git a/libfminer b/libfminer index c72bb7d..e0eee43 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit c72bb7d99bb7f583f009e44be426910fb44cd4f9 +Subproject commit e0eee431ecb954328ff64e3cc48840c7003a2769 diff --git a/openbabel.rb b/openbabel.rb index a261866..3a873c0 100644 --- a/openbabel.rb +++ b/openbabel.rb @@ -1,28 +1,148 @@ -get '/openbabel/:smiles/:property/?' do +OBMOL_METHODS = { + "NumAtoms" => "Number of atoms", + "NumBonds" => "Number of bonds", + "NumHvyAtoms" => "Number of heavy atoms", + "NumResidues" => "Number of residues", + "NumRotors" => "Number of rotatable bonds", + "GetEnergy" => "Heat of formation for this molecule (in kcal/mol)", + "GetMolWt" => "Standard molar mass given by IUPAC atomic masses (amu)", + "GetExactMass" => "Mass given by isotopes (or most abundant isotope, if not specified)", + "GetTotalCharge" => "Total charge", +} + +OBDESCRIPTOR_METHODS = { + "HBA1" => "Number of hydrogen bond acceptors 1 (JoelLib)", + "HBA2" => "Number of hydrogen bond acceptors 2 (JoelLib)", + "HBD" => "Number of hydrogen bond donors (JoelLib)", + "L5" => "Lipinski rule of five", + "logP" => "Octanol/water partition coefficient", + "MR" => "Molar refractivity", + "MW" => "Molecular weight", + "nF" => "Number of fluorine atoms", + "nHal" => "Number of halogen atoms", + "spinMult" => "Total spin multiplicity", + "TPSA" => "Topological polar surface area", +} + +# Get a list of OpenBabel algorithms +# @return [text/uri-list] URIs of OpenBabel algorithms +get '/openbabel' do + algorithms = OBMOL_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)} + algorithms << OBDESCRIPTOR_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)} + response['Content-Type'] = 'text/uri-list' + algorithms.join("\n") +end + +# Get RDF/XML representation of OpenBabel algorithm +# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm +get '/openbabel/:property' do + description = OBMOL_METHODS[params[:property]] if OBMOL_METHODS.include? params[:property] + description = OBDESCRIPTOR_METHODS[params[:property]] if OBDESCRIPTOR_METHODS.include? params[:property] + if description + algorithm = OpenTox::Algorithm::Generic.new(url_for("/openbabel/#{params[:property]}",:full)) + algorithm.metadata = { + DC.title => params[:property], + DC.creator => "helma@in-silico.ch", + DC.description => description, + OT.isA => OTA.DescriptorCalculation, + } + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + else + halt 404, "Unknown OpenBabel descriptor #{params[:property]}." + end +end + +# Calculate OpenBabel descriptors +# Supports the following OpenBabel methods (see OpenBabel API http://openbabel.org/api/2.2.0/) +# - NumAtoms Number of atoms +# - NumBonds Number of bonds +# - NumHvyAtoms Number of heavy atoms +# - NumResidues Number of residues +# - NumRotors Number of rotatable bonds +# - GetEnergy Heat of formation for this molecule (in kcal/mol) +# - GetMolWt Standard molar mass given by IUPAC atomic masses (amu) +# - GetExactMass Mass given by isotopes (or most abundant isotope, if not specified) +# - GetTotalCharge Total charge +# - HBA1 Number of hydrogen bond acceptors 1 (JoelLib) +# - HBA2 Number of hydrogen bond acceptors 2 (JoelLib) +# - HBD Number of hydrogen bond donors (JoelLib) +# - L5 Lipinski rule of five +# - logP Octanol/water partition coefficient +# - MR Molar refractivity +# - MW Molecular weight +# - nF Number of fluorine atoms +# - nHal Number of halogen atoms +# - spinMult Total spin multiplicity +# - TPSA Topological polar surface area +# @param [String] compound_uri Compound URI +# @return [String] descriptor value +post '/openbabel/:property' do obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new - obconversion.set_in_and_out_formats 'smi', 'can' - case params[:property] - when 'logP' - #logP = OpenBabel::OBLogP.new - #logP.predict(obmol) - "not yet implemented" - when 'psa' - #psa = OpenBabel::OBPSA.new - "not yet implemented" - when 'mr' - #mr = OpenBabel::OBMR.new - "not yet implemented" - else - begin - obconversion.read_string obmol, params[:smiles] - rescue - halt 404, "Incorrect Smiles string #{params[:smiles]}" - end - begin - eval("obmol.#{params[:property]}").to_s - rescue - halt 404, "Could not calculate property #{params[:property]}" - end - end + compound = OpenTox::Compound.new params[:compound_uri] + obconversion.set_in_and_out_formats 'inchi', 'can' + obconversion.read_string obmol, compound.to_inchi + if OBMOL_METHODS.keys.include? params[:property] + eval("obmol.#{params[:property].underscore}").to_s + elsif OBDESCRIPTOR_METHODS.keys.include? params[:property] + descriptor = OpenBabel::OBDescriptor.find_type(params[:property]) + descriptor.predict(obmol).to_s + else + halt 404, "Cannot calculate property #{params[:property]} with OpenBabel" + end +end + +# Calculate all OpenBabel descriptors for a dataset +# @param [String] dataset_uri Dataset URI +# @return [text/uri-list] Task URI +post '/openbabel' do + task = OpenTox::Task.create("Calculating OpenBabel descriptors for #{params[:dataset_uri]}", url_for('/openbabel',:full)) do + + dataset = OpenTox::Dataset.find(params[:dataset_uri]) + result_dataset = OpenTox::Dataset.create + result_dataset.add_metadata({ + DC.title => "OpenBabel descriptors for " + dataset.metadata[DC.title].to_s, + DC.creator => url_for('/openbabel',:full), + OT.hasSource => url_for('/openbabel', :full), + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + ] + }) + + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_and_out_formats 'inchi', 'can' + + OBMOL_METHODS.merge(OBDESCRIPTOR_METHODS).each do |name,description| + feature_uri = File.join result_dataset.uri, "feature", "openbabel", name + metadata = { + OT.hasSource => url_for("/openbabel/#{name}", :full), + DC.description => description, + DC.title => name, + } + result_dataset.add_feature feature_uri, metadata + end + + dataset.compounds.each do |compound_uri| + compound = OpenTox::Compound.new(compound_uri) + obconversion.read_string obmol, compound.to_inchi + #result_dataset.add_compound compound_uri + OBMOL_METHODS.keys.each do |name| + feature_uri = File.join result_dataset.uri, "feature", "openbabel", name + value = eval("obmol.#{name.underscore}").to_f + result_dataset.add compound_uri, feature_uri, value + end + OBDESCRIPTOR_METHODS.keys.each do |name| + feature_uri = File.join result_dataset.uri, "feature", "openbabel", name + value = OpenBabel::OBDescriptor.find_type(params[:property]).predict(obmol).to_f + result_dataset.add compound_uri, feature_uri, value + end + end + result_dataset.save + result_dataset.uri + end + response['Content-Type'] = 'text/uri-list' + halt 503,task.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" end -- cgit v1.2.3 From c957aae6cae380681ff2feac27222ac1decf44ae Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 10 Feb 2011 09:18:52 +0100 Subject: merged mr algorithm manually --- Rakefile | 12 ++++++------ lazar.rb | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Rakefile b/Rakefile index d52f60d..e60ffc7 100644 --- a/Rakefile +++ b/Rakefile @@ -30,10 +30,10 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../../last-utils') - puts `git fetch` + #puts `git fetch` # AM LAST: need branch 'experimental' until merged to master in last-utils - puts `git checkout -f -b experimental origin/experimental` - puts `git checkout experimental` + #puts `git checkout -f -b experimental origin/experimental` + puts `git checkout master` puts `git pull` end @@ -63,10 +63,10 @@ namespace "fminer" do end puts `make ruby` Dir.chdir('../../last-utils') - puts `git fetch` + #puts `git fetch` # AM LAST: need branch 'experimental' until merged to master in last-utils - puts `git checkout -f -b experimental origin/experimental` - puts `git checkout experimental` + #puts `git checkout -f -b experimental origin/experimental` + puts `git checkout master` puts `git pull` end end diff --git a/lazar.rb b/lazar.rb index fc037a9..c8cf6ea 100644 --- a/lazar.rb +++ b/lazar.rb @@ -25,8 +25,8 @@ end # @param [String] dataset_uri Training dataset URI # @param [optional,String] prediction_feature URI of the feature to be predicted # @param [optional,String] feature_generation_uri URI of the feature generation algorithm -# @param [optional] - further parameters for the feature generation service -# @ return [text/uri-list] Task URI +# @param [optional,String] - further parameters for the feature generation service +# @return [text/uri-list] Task URI post '/lazar/?' do halt 404, "No dataset_uri parameter." unless params[:dataset_uri] -- cgit v1.2.3 From 20a3b56d4fde6e6bc7ccd7772aff26448888c8a0 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 10 Feb 2011 10:09:35 +0100 Subject: add missing subjectid --- lazar.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lazar.rb b/lazar.rb index c8cf6ea..dc1e865 100644 --- a/lazar.rb +++ b/lazar.rb @@ -68,6 +68,7 @@ post '/lazar/?' do else halt 404, "External feature generation services not yet supported" end + params[:subjectid] = @subjectid feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s training_features = OpenTox::Dataset.new(feature_dataset_uri) end -- cgit v1.2.3 From a1c4830bff4145391c607d80dc1d29312044e98c Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 10 Feb 2011 11:09:13 +0100 Subject: add missing subjectid --- lazar.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lazar.rb b/lazar.rb index c8cf6ea..9fea258 100644 --- a/lazar.rb +++ b/lazar.rb @@ -29,6 +29,7 @@ end # @return [text/uri-list] Task URI post '/lazar/?' do + params[:subjectid] = @subjectid halt 404, "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] -- cgit v1.2.3 From d1983f442a9014d66173c7ff8ab8ae0ac35a23e8 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 9 Mar 2011 12:00:09 +0100 Subject: version bumped to 1.0.0 --- application.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application.rb b/application.rb index 15088d5..e36643b 100644 --- a/application.rb +++ b/application.rb @@ -3,7 +3,7 @@ require 'rubygems' require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST -gem "opentox-ruby", "~> 0" +gem "opentox-ruby", "~> 1" require 'opentox-ruby' #require 'smarts.rb' -- cgit v1.2.3