From f2ca545448ab8a6f654309f23cfce9416b2e9856 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 13 Jan 2011 14:02:58 +0100 Subject: find methods for algorithm and model, split method for dataset, feature_type method for model and feature, perform single predicitons in resuce block, add to-html.rb, fix handling of rest-client-wrapper --- lib/algorithm.rb | 14 ++++++ lib/dataset.rb | 33 +++++++++++++- lib/feature.rb | 23 +++++++++- lib/model.rb | 76 +++++++++++++++++++++++++++++++- lib/opentox-ruby.rb | 2 +- lib/overwrite.rb | 18 +++++++- lib/rest_client_wrapper.rb | 18 +++----- lib/task.rb | 106 ++++++++++++++++++++++++++++++++++++++++++++- lib/to-html.rb | 80 ++++++++++++++++++++++++++++++++++ 9 files changed, 351 insertions(+), 19 deletions(-) create mode 100755 lib/to-html.rb diff --git a/lib/algorithm.rb b/lib/algorithm.rb index a2f7786..0aa86e6 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -29,6 +29,20 @@ module OpenTox # Generic Algorithm class, should work with all OpenTox webservices class Generic include Algorithm + + # Find Generic Opentox Algorithm via URI, and loads metadata + # @param [String] uri Algorithm URI + # @return [OpenTox::Algorithm::Generic] Algorithm instance, nil if alogrithm was not found + def self.find(uri) + alg = Generic.new(uri) + alg.load_metadata + if alg.metadata==nil or alg.metadata.size==0 + nil + else + alg + end + end + end # Fminer algorithms (https://github.com/amaunz/fminer2) diff --git a/lib/dataset.rb b/lib/dataset.rb index aba7754..d45c821 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -46,7 +46,7 @@ module OpenTox dataset.save(subjectid) dataset end - + # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. # @param [String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object with all data @@ -242,6 +242,37 @@ module OpenTox def add_feature_metadata(feature,metadata) metadata.each { |k,v| @features[feature][k] = v } end + + # Add a new compound + # @param [String] compound Compound URI + def add_compound (compound) + @compounds << compound unless @compounds.include? compound + end + + # Creates a new dataset, by splitting the current dataset, i.e. using only a subset of compounds and features + # @param [Array] compounds List of compound URIs + # @param [Array] features List of feature URIs + # @param [Hash] metadata Hash containing the metadata for the new dataset + # @return [OpenTox::Dataset] newly created dataset, already saved + def split( compounds, features, metadata) + LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds" + raise "no new compounds selected" unless compounds and compounds.size>0 + dataset = OpenTox::Dataset.create + if features.size==0 + compounds.each{ |c| dataset.add_compound(c) } + else + compounds.each do |c| + features.each do |f| + @data_entries[c][f].each do |v| + dataset.add(c,f,v) + end + end + end + end + dataset.add_metadata(metadata) + dataset.save + dataset + end # Save dataset at the dataset service # - creates a new dataset if uri is not set diff --git a/lib/feature.rb b/lib/feature.rb index 9e28077..de7c757 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -1,7 +1,7 @@ module OpenTox class Feature include OpenTox - + def self.find(uri) feature = Feature.new uri if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) @@ -11,5 +11,26 @@ module OpenTox end feature end + + # provides domain (possible target values) of classification feature + # @return [Array] list with possible target values + def domain + #TODO derieve from metadata / ontology + return [true, false] + end + + # provides feature type, possible types are "regression" or "classification" + # @return [String] feature type, unknown if OT.isA property is unknown/ not set + def feature_type + case metadata[OT.isA] + when /NominalFeature/ + "classification" + when /NumericFeature/ + "regression" + else + "unknown" + end + end + end end diff --git a/lib/model.rb b/lib/model.rb index c645bdc..fb266e0 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -24,8 +24,76 @@ module OpenTox # Generic OpenTox model class for all API compliant services class Generic include Model + + # Find Generic Opentox Model via URI, and loads metadata + # @param [String] uri Model URI + # @return [OpenTox::Model::Generic] Model instance, nil if model was not found + def self.find(uri) + model = Generic.new(uri) + model.load_metadata + if model.metadata==nil or model.metadata.size==0 + nil + else + model + end + end + + # provides feature type, possible types are "regression" or "classification" + # @return [String] feature type, "unknown" if type could not be estimated + def feature_type + # dynamically perform restcalls if necessary + load_metadata if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) + @dependentVariable = OpenTox::Feature.find( @metadata[OT.dependentVariables] ) unless @dependentVariable + + [@dependentVariable.feature_type, @metadata[OT.isA], @metadata[DC.title], @uri].each do |type| + case type + when /(?i)classification/ + return "classification" + when /(?i)regression/ + return "regression" + end + end + raise "unknown model "+[@dependentVariable.feature_type, @metadata[OT.isA], @metadata[DC.title], @uri].inspect + end + +# def classification? +# # TODO test on various services / request to ontology service needed? +# # TODO replace bool (for classification/regression) with string value (more types are coming) +# #raise "classification?: type: "+@type.to_s+", title: "+@title.to_s+", uri: "+@uri.to_s+" "+((@uri =~ /class/) != nil).to_s +# +# load_metadata if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) +# @dependentVariable = OpenTox::Feature.find( @metadata[OT.dependentVariables] ) unless @dependentVariable +# case @dependentVariable.feature_type +# when "classification" +# return true +# when "regression" +# return false +# end +# +# if @metadata[OT.isA] =~ /(?i)classification/ +# return true +# end +# +# if @metadata[DC.title] =~ /(?i)classification/ +# return true +# elsif @metadata[DC.title] =~ /(?i)regression/ +# return false +# elsif @uri =~/ntua/ and @metadata[DC.title] =~ /mlr/ +# return false +# elsif @uri =~/tu-muenchen/ and @metadata[DC.title] =~ /regression|M5P|GaussP/ +# return false +# elsif @uri =~/ambit2/ and @metadata[DC.title] =~ /pKa/ || @metadata[DC.title] =~ /Regression|Caco/ +# return false +# elsif @uri =~/majority/ +# return (@uri =~ /class/) != nil +# else +# raise "unknown model, uri:'"+@uri.to_s+"' title:'"+@metadata[DC.title].to_s+"'" +# end +# end +# end + end - + # Lazy Structure Activity Relationship class class Lazar @@ -101,7 +169,11 @@ module OpenTox d = Dataset.new(dataset_uri) d.load_compounds d.compounds.each do |compound_uri| - predict(compound_uri,false,subjectid) + begin + predict(compound_uri,false,subjectid) + rescue => ex + LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message + end end @prediction_dataset.save(subjectid) @prediction_dataset diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb index c0bff95..fb3803b 100644 --- a/lib/opentox-ruby.rb +++ b/lib/opentox-ruby.rb @@ -8,6 +8,6 @@ rescue LoadError puts "Please install Openbabel with 'rake openbabel:install' in the compound component" end -['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper', 'authorization', 'policy', 'helper'].each do |lib| +['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper', 'authorization', 'policy', 'helper', 'to-html'].each do |lib| require lib end diff --git a/lib/overwrite.rb b/lib/overwrite.rb index 8d787a6..e5ed5c3 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -2,6 +2,22 @@ # hack: store sinatra in global var to make url_for and halt methods accessible before{ $sinatra = self unless $sinatra } +# handle errors manually +# this is to return 502, when an error occurs during a rest-call (see rest_client_wrapper.rb) +set :raise_errors, Proc.new { false } +set :show_exceptions, false +error do + # try if the error is an OpenTox::Error + if OpenTox::Error.parse(request.env['sinatra.error'].to_s) + # if true, this error comes from rest_client_wrapper, halt with 502 + # (502 is defined in OT API as Error coming from other service) + halt 502,request.env['sinatra.error'] + else + # else, raise exception, this will return 500 = internal error + raise request.env['sinatra.error'] + end +end + class Sinatra::Base # overwriting halt to log halts (!= 202) def halt(*response) @@ -60,7 +76,7 @@ class OTLogger < Logger n = 2 line = lines[n] - while (line =~ /spork.rb/ or line =~ /create/ or line =~ /ot-logger.rb/) + while (line =~ /spork.rb/ or line =~ /create/ or line =~ /overwrite.rb/) n += 1 line = lines[n] end diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 5f5273b..2f0e215 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -115,7 +115,7 @@ module OpenTox task = OpenTox::Task.from_yaml(res) when /text\// raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task - task = OpenTox::Task.find(res.to_s) if res.to_s.uri? + task = OpenTox::Task.find(res.to_s.chomp) if res.to_s.uri? else raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s end @@ -151,18 +151,14 @@ module OpenTox File.new(File.join(error_dir,file_name+"_"+time+"_"+count.to_s),"w").puts(body) # handle error - # we are either in a task, or in sinatra # PENDING: always return yaml for now - if $self_task #this global var in Task.create to mark that the current process is running in a task - raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.create - #elsif $sinatra #else halt sinatra - #$sinatra.halt(502,error.to_yaml) - elsif defined?(halt) - halt(502,error.to_yaml) - else #for testing purposes (if classes used directly) - raise error.to_yaml - end + # raising OpenTox::Error + # to handle the error yourself, put rest-call in begin, rescue block + # if the error is not caught: + # if we are in a task, the error is caught, logged, and task state is set to error in Task.as_task + # if we are in a default call, the error is handled in overwrite.rb to return 502 (according to OT API) + raise error.to_yaml end end end diff --git a/lib/task.rb b/lib/task.rb index 18fba6e..dcbff3f 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -1,4 +1,3 @@ -$self_task=nil module OpenTox @@ -60,7 +59,6 @@ module OpenTox task_pid = Spork.spork(:logger => LOGGER) do LOGGER.debug "Task #{task.uri} started #{Time.now}" - $self_task = task begin result = catch(:halt) do @@ -254,7 +252,111 @@ module OpenTox RestClientWrapper.raise_uri_error(ex.message, @uri) end end + + public + #hint: do not overwrite percentageCompleted=, this is used in toYaml + def progress(pct) +# #puts "task := "+pct.to_s +# raise "no numeric >= 0 and <= 100 : '"+pct.to_s+"'" unless pct.is_a?(Numeric) and pct>=0 and pct<=100 +# RestClientWrapper.put(File.join(@uri,'Running'),{:percentageCompleted => pct}) +# reload + end + + end + + # Convenience class to split a (sub)task into subtasks + # + # example: + # a crossvalidation is split into creating datasets and performing the validations + # creating the dataset is 1/3 of the work, perform the validations is 2/3: + # Task.as_task do |task| + # create_datasets( SubTask.new(task, 0, 33) ) + # perfom_validations( SubTask.new(task, 33, 100) ) + # end + # inside the create_datasets / perform_validations you can use subtask.progress() + # with vals from 0-100 + # + # note that you can split a subtask into further subtasks + class SubTask + + def initialize(task, min, max) + raise "not a task or subtask" unless task.is_a?(Task) or task.is_a?(SubTask) + raise "invalid max ("+max.to_s+"), min ("+min.to_s+") params" unless + min.is_a?(Numeric) and max.is_a?(Numeric) and min >= 0 and max <= 100 and max > min + @task = task + @min = min + @max = max + @delta = max - min + end + # convenience method to handle null tasks + def self.create(task, min, max) + if task + SubTask.new(task, min, max) + else + nil + end + end + + def progress(pct) + raise "no numeric >= 0 and <= 100 : '"+pct.to_s+"'" unless pct.is_a?(Numeric) and pct>=0 and pct<=100 + #puts "subtask := "+pct.to_s+" -> task := "+(@min + @delta * pct.to_f * 0.01).to_s + @task.progress( @min + @delta * pct.to_f * 0.01 ) + end + + def running?() + @task.running? + end + end + + + # The David Gallagher feature: + # a fake sub task to keep the progress bar movin for external jobs + # note: param could be a subtask + # + # usage (for a call that is normally finished in under 60 seconds): + # fsk = FakeSubTask.new(task, 60) + # external_lib_call.start + # external_lib_call.wait_until_finished + # fsk.finished + # + # what happens: + # the FakeSubTask updates the task.progress each second until + # runtime is up or the finished mehtod is called + # + # example if the param runtime is too low: + # 25% .. 50% .. 75% .. 100% .. 100% .. 100% .. 100% .. 100% + # example if the param runtime is too high: + # 5% .. 10% .. 15% .. 20% .. 25% .. 30% .. 35% .. 100% + # the latter example is better (keep the bar movin!) + # -> better make a conservative runtime estimate + class FakeSubTask + + def initialize(task, runtime) + @task = task + @thread = Thread.new do + timeleft = runtime + while (timeleft > 0 and @task.running?) + sleep 1 + timeleft -= 1 + @task.progress( (runtime - timeleft) / runtime.to_f * 100 ) + end + end + end + + # convenience method to handle null tasks + def self.create(task, runtime) + if task + FakeSubTask.new(task, runtime) + else + nil + end + end + + def finished + @thread.exit + @task.progress(100) if @task.running? + end end end diff --git a/lib/to-html.rb b/lib/to-html.rb new file mode 100755 index 0000000..1bc1496 --- /dev/null +++ b/lib/to-html.rb @@ -0,0 +1,80 @@ + +OT_LOGO = "http://opentox.informatik.uni-freiburg.de/ot-logo.png" + + +class String + + # encloses URI in text with with link tag + # @return [String] new text with marked links + def link_urls + self.gsub(/(?i)http:\/\/[^\r\n\s']*/, '\0') + end +end + +module OpenTox + + # produces a html page for making web services browser friendly + # format of text (=string params) is preserved (e.g. line breaks) + # urls are marked as links + # @example post params: + # [ [ [:mandatory_param_1], [:mandatory_param_2], [:optional_param,"default_value"] ], + # [ [:alteranative_mandatory_param_1], [:alteranative_mandatory_param_2] ] + # ] + # @param [String] text this is the actual content, + # @param [optional,String] related_links info on related resources + # @param [optional,String] description general info + # @param [optional,Array] post_params, array of arrays containing info on POST operation, see example + # @return [String] html page + def self.text_to_html( text, related_links=nil, description=nil, post_params=nil ) + + title = $sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra + + html = < +EOF + html.chomp! + html += ""+title+"" if title + html += < + +EOF + html.chomp! + html += "

Description

"+description.link_urls+"

" if description + html += "

Related links

"+related_links.link_urls+"

" if related_links + if post_params + html += "

POST parameters

" + count = 0 + post_params.each do |p| + html += "

alternatively:

" if count > 0 + html += "

" + p.each do |k,v| + html += "" + end + html += "
paramdefault_value
"+k.to_s+""+(v!=nil ? v.to_s : "mandatory")+"

" + count += 1 + end + end + html += "

Content

" if description || related_links + html += < +

+EOF + html.chomp! + html += text.link_urls + html += < + + + +EOF + html + end + +end + +#puts OpenTox.text_to_html("bla") \ No newline at end of file -- cgit v1.2.3