summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/algorithm.rb14
-rw-r--r--lib/dataset.rb33
-rw-r--r--lib/feature.rb23
-rw-r--r--lib/model.rb76
-rw-r--r--lib/opentox-ruby.rb2
-rw-r--r--lib/overwrite.rb18
-rw-r--r--lib/rest_client_wrapper.rb18
-rw-r--r--lib/task.rb106
-rwxr-xr-xlib/to-html.rb80
9 files changed, 351 insertions, 19 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index a2f7786..0aa86e6 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -29,6 +29,20 @@ module OpenTox
# Generic Algorithm class, should work with all OpenTox webservices
class Generic
include Algorithm
+
+ # Find Generic Opentox Algorithm via URI, and loads metadata
+ # @param [String] uri Algorithm URI
+ # @return [OpenTox::Algorithm::Generic] Algorithm instance, nil if alogrithm was not found
+ def self.find(uri)
+ alg = Generic.new(uri)
+ alg.load_metadata
+ if alg.metadata==nil or alg.metadata.size==0
+ nil
+ else
+ alg
+ end
+ end
+
end
# Fminer algorithms (https://github.com/amaunz/fminer2)
diff --git a/lib/dataset.rb b/lib/dataset.rb
index aba7754..d45c821 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -46,7 +46,7 @@ module OpenTox
dataset.save(subjectid)
dataset
end
-
+
# Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading.
# @param [String] uri Dataset URI
# @return [OpenTox::Dataset] Dataset object with all data
@@ -242,6 +242,37 @@ module OpenTox
def add_feature_metadata(feature,metadata)
metadata.each { |k,v| @features[feature][k] = v }
end
+
+ # Add a new compound
+ # @param [String] compound Compound URI
+ def add_compound (compound)
+ @compounds << compound unless @compounds.include? compound
+ end
+
+ # Creates a new dataset, by splitting the current dataset, i.e. using only a subset of compounds and features
+ # @param [Array] compounds List of compound URIs
+ # @param [Array] features List of feature URIs
+ # @param [Hash] metadata Hash containing the metadata for the new dataset
+ # @return [OpenTox::Dataset] newly created dataset, already saved
+ def split( compounds, features, metadata)
+ LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds"
+ raise "no new compounds selected" unless compounds and compounds.size>0
+ dataset = OpenTox::Dataset.create
+ if features.size==0
+ compounds.each{ |c| dataset.add_compound(c) }
+ else
+ compounds.each do |c|
+ features.each do |f|
+ @data_entries[c][f].each do |v|
+ dataset.add(c,f,v)
+ end
+ end
+ end
+ end
+ dataset.add_metadata(metadata)
+ dataset.save
+ dataset
+ end
# Save dataset at the dataset service
# - creates a new dataset if uri is not set
diff --git a/lib/feature.rb b/lib/feature.rb
index 9e28077..de7c757 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -1,7 +1,7 @@
module OpenTox
class Feature
include OpenTox
-
+
def self.find(uri)
feature = Feature.new uri
if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
@@ -11,5 +11,26 @@ module OpenTox
end
feature
end
+
+ # provides domain (possible target values) of classification feature
+ # @return [Array] list with possible target values
+ def domain
+ #TODO derieve from metadata / ontology
+ return [true, false]
+ end
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, unknown if OT.isA property is unknown/ not set
+ def feature_type
+ case metadata[OT.isA]
+ when /NominalFeature/
+ "classification"
+ when /NumericFeature/
+ "regression"
+ else
+ "unknown"
+ end
+ end
+
end
end
diff --git a/lib/model.rb b/lib/model.rb
index c645bdc..fb266e0 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -24,8 +24,76 @@ module OpenTox
# Generic OpenTox model class for all API compliant services
class Generic
include Model
+
+ # Find Generic Opentox Model via URI, and loads metadata
+ # @param [String] uri Model URI
+ # @return [OpenTox::Model::Generic] Model instance, nil if model was not found
+ def self.find(uri)
+ model = Generic.new(uri)
+ model.load_metadata
+ if model.metadata==nil or model.metadata.size==0
+ nil
+ else
+ model
+ end
+ end
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, "unknown" if type could not be estimated
+ def feature_type
+ # dynamically perform restcalls if necessary
+ load_metadata if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
+ @dependentVariable = OpenTox::Feature.find( @metadata[OT.dependentVariables] ) unless @dependentVariable
+
+ [@dependentVariable.feature_type, @metadata[OT.isA], @metadata[DC.title], @uri].each do |type|
+ case type
+ when /(?i)classification/
+ return "classification"
+ when /(?i)regression/
+ return "regression"
+ end
+ end
+ raise "unknown model "+[@dependentVariable.feature_type, @metadata[OT.isA], @metadata[DC.title], @uri].inspect
+ end
+
+# def classification?
+# # TODO test on various services / request to ontology service needed?
+# # TODO replace bool (for classification/regression) with string value (more types are coming)
+# #raise "classification?: type: "+@type.to_s+", title: "+@title.to_s+", uri: "+@uri.to_s+" "+((@uri =~ /class/) != nil).to_s
+#
+# load_metadata if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
+# @dependentVariable = OpenTox::Feature.find( @metadata[OT.dependentVariables] ) unless @dependentVariable
+# case @dependentVariable.feature_type
+# when "classification"
+# return true
+# when "regression"
+# return false
+# end
+#
+# if @metadata[OT.isA] =~ /(?i)classification/
+# return true
+# end
+#
+# if @metadata[DC.title] =~ /(?i)classification/
+# return true
+# elsif @metadata[DC.title] =~ /(?i)regression/
+# return false
+# elsif @uri =~/ntua/ and @metadata[DC.title] =~ /mlr/
+# return false
+# elsif @uri =~/tu-muenchen/ and @metadata[DC.title] =~ /regression|M5P|GaussP/
+# return false
+# elsif @uri =~/ambit2/ and @metadata[DC.title] =~ /pKa/ || @metadata[DC.title] =~ /Regression|Caco/
+# return false
+# elsif @uri =~/majority/
+# return (@uri =~ /class/) != nil
+# else
+# raise "unknown model, uri:'"+@uri.to_s+"' title:'"+@metadata[DC.title].to_s+"'"
+# end
+# end
+# end
+
end
-
+
# Lazy Structure Activity Relationship class
class Lazar
@@ -101,7 +169,11 @@ module OpenTox
d = Dataset.new(dataset_uri)
d.load_compounds
d.compounds.each do |compound_uri|
- predict(compound_uri,false,subjectid)
+ begin
+ predict(compound_uri,false,subjectid)
+ rescue => ex
+ LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
+ end
end
@prediction_dataset.save(subjectid)
@prediction_dataset
diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb
index c0bff95..fb3803b 100644
--- a/lib/opentox-ruby.rb
+++ b/lib/opentox-ruby.rb
@@ -8,6 +8,6 @@ rescue LoadError
puts "Please install Openbabel with 'rake openbabel:install' in the compound component"
end
-['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper', 'authorization', 'policy', 'helper'].each do |lib|
+['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper', 'authorization', 'policy', 'helper', 'to-html'].each do |lib|
require lib
end
diff --git a/lib/overwrite.rb b/lib/overwrite.rb
index 8d787a6..e5ed5c3 100644
--- a/lib/overwrite.rb
+++ b/lib/overwrite.rb
@@ -2,6 +2,22 @@
# hack: store sinatra in global var to make url_for and halt methods accessible
before{ $sinatra = self unless $sinatra }
+# handle errors manually
+# this is to return 502, when an error occurs during a rest-call (see rest_client_wrapper.rb)
+set :raise_errors, Proc.new { false }
+set :show_exceptions, false
+error do
+ # try if the error is an OpenTox::Error
+ if OpenTox::Error.parse(request.env['sinatra.error'].to_s)
+ # if true, this error comes from rest_client_wrapper, halt with 502
+ # (502 is defined in OT API as Error coming from other service)
+ halt 502,request.env['sinatra.error']
+ else
+ # else, raise exception, this will return 500 = internal error
+ raise request.env['sinatra.error']
+ end
+end
+
class Sinatra::Base
# overwriting halt to log halts (!= 202)
def halt(*response)
@@ -60,7 +76,7 @@ class OTLogger < Logger
n = 2
line = lines[n]
- while (line =~ /spork.rb/ or line =~ /create/ or line =~ /ot-logger.rb/)
+ while (line =~ /spork.rb/ or line =~ /create/ or line =~ /overwrite.rb/)
n += 1
line = lines[n]
end
diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb
index 5f5273b..2f0e215 100644
--- a/lib/rest_client_wrapper.rb
+++ b/lib/rest_client_wrapper.rb
@@ -115,7 +115,7 @@ module OpenTox
task = OpenTox::Task.from_yaml(res)
when /text\//
raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task
- task = OpenTox::Task.find(res.to_s) if res.to_s.uri?
+ task = OpenTox::Task.find(res.to_s.chomp) if res.to_s.uri?
else
raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s
end
@@ -151,18 +151,14 @@ module OpenTox
File.new(File.join(error_dir,file_name+"_"+time+"_"+count.to_s),"w").puts(body)
# handle error
- # we are either in a task, or in sinatra
# PENDING: always return yaml for now
- if $self_task #this global var in Task.create to mark that the current process is running in a task
- raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.create
- #elsif $sinatra #else halt sinatra
- #$sinatra.halt(502,error.to_yaml)
- elsif defined?(halt)
- halt(502,error.to_yaml)
- else #for testing purposes (if classes used directly)
- raise error.to_yaml
- end
+ # raising OpenTox::Error
+ # to handle the error yourself, put rest-call in begin, rescue block
+ # if the error is not caught:
+ # if we are in a task, the error is caught, logged, and task state is set to error in Task.as_task
+ # if we are in a default call, the error is handled in overwrite.rb to return 502 (according to OT API)
+ raise error.to_yaml
end
end
end
diff --git a/lib/task.rb b/lib/task.rb
index 18fba6e..dcbff3f 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -1,4 +1,3 @@
-$self_task=nil
module OpenTox
@@ -60,7 +59,6 @@ module OpenTox
task_pid = Spork.spork(:logger => LOGGER) do
LOGGER.debug "Task #{task.uri} started #{Time.now}"
- $self_task = task
begin
result = catch(:halt) do
@@ -254,7 +252,111 @@ module OpenTox
RestClientWrapper.raise_uri_error(ex.message, @uri)
end
end
+
+ public
+ #hint: do not overwrite percentageCompleted=, this is used in toYaml
+ def progress(pct)
+# #puts "task := "+pct.to_s
+# raise "no numeric >= 0 and <= 100 : '"+pct.to_s+"'" unless pct.is_a?(Numeric) and pct>=0 and pct<=100
+# RestClientWrapper.put(File.join(@uri,'Running'),{:percentageCompleted => pct})
+# reload
+ end
+
+ end
+
+ # Convenience class to split a (sub)task into subtasks
+ #
+ # example:
+ # a crossvalidation is split into creating datasets and performing the validations
+ # creating the dataset is 1/3 of the work, perform the validations is 2/3:
+ # Task.as_task do |task|
+ # create_datasets( SubTask.new(task, 0, 33) )
+ # perfom_validations( SubTask.new(task, 33, 100) )
+ # end
+ # inside the create_datasets / perform_validations you can use subtask.progress(<val>)
+ # with vals from 0-100
+ #
+ # note that you can split a subtask into further subtasks
+ class SubTask
+
+ def initialize(task, min, max)
+ raise "not a task or subtask" unless task.is_a?(Task) or task.is_a?(SubTask)
+ raise "invalid max ("+max.to_s+"), min ("+min.to_s+") params" unless
+ min.is_a?(Numeric) and max.is_a?(Numeric) and min >= 0 and max <= 100 and max > min
+ @task = task
+ @min = min
+ @max = max
+ @delta = max - min
+ end
+ # convenience method to handle null tasks
+ def self.create(task, min, max)
+ if task
+ SubTask.new(task, min, max)
+ else
+ nil
+ end
+ end
+
+ def progress(pct)
+ raise "no numeric >= 0 and <= 100 : '"+pct.to_s+"'" unless pct.is_a?(Numeric) and pct>=0 and pct<=100
+ #puts "subtask := "+pct.to_s+" -> task := "+(@min + @delta * pct.to_f * 0.01).to_s
+ @task.progress( @min + @delta * pct.to_f * 0.01 )
+ end
+
+ def running?()
+ @task.running?
+ end
+ end
+
+
+ # The David Gallagher feature:
+ # a fake sub task to keep the progress bar movin for external jobs
+ # note: param could be a subtask
+ #
+ # usage (for a call that is normally finished in under 60 seconds):
+ # fsk = FakeSubTask.new(task, 60)
+ # external_lib_call.start
+ # external_lib_call.wait_until_finished
+ # fsk.finished
+ #
+ # what happens:
+ # the FakeSubTask updates the task.progress each second until
+ # runtime is up or the finished mehtod is called
+ #
+ # example if the param runtime is too low:
+ # 25% .. 50% .. 75% .. 100% .. 100% .. 100% .. 100% .. 100%
+ # example if the param runtime is too high:
+ # 5% .. 10% .. 15% .. 20% .. 25% .. 30% .. 35% .. 100%
+ # the latter example is better (keep the bar movin!)
+ # -> better make a conservative runtime estimate
+ class FakeSubTask
+
+ def initialize(task, runtime)
+ @task = task
+ @thread = Thread.new do
+ timeleft = runtime
+ while (timeleft > 0 and @task.running?)
+ sleep 1
+ timeleft -= 1
+ @task.progress( (runtime - timeleft) / runtime.to_f * 100 )
+ end
+ end
+ end
+
+ # convenience method to handle null tasks
+ def self.create(task, runtime)
+ if task
+ FakeSubTask.new(task, runtime)
+ else
+ nil
+ end
+ end
+
+ def finished
+ @thread.exit
+ @task.progress(100) if @task.running?
+ end
end
end
diff --git a/lib/to-html.rb b/lib/to-html.rb
new file mode 100755
index 0000000..1bc1496
--- /dev/null
+++ b/lib/to-html.rb
@@ -0,0 +1,80 @@
+
+OT_LOGO = "http://opentox.informatik.uni-freiburg.de/ot-logo.png"
+
+
+class String
+
+ # encloses URI in text with with link tag
+ # @return [String] new text with marked links
+ def link_urls
+ self.gsub(/(?i)http:\/\/[^\r\n\s']*/, '<a href=\0>\0</a>')
+ end
+end
+
+module OpenTox
+
+ # produces a html page for making web services browser friendly
+ # format of text (=string params) is preserved (e.g. line breaks)
+ # urls are marked as links
+ # @example post params:
+ # [ [ [:mandatory_param_1], [:mandatory_param_2], [:optional_param,"default_value"] ],
+ # [ [:alteranative_mandatory_param_1], [:alteranative_mandatory_param_2] ]
+ # ]
+ # @param [String] text this is the actual content,
+ # @param [optional,String] related_links info on related resources
+ # @param [optional,String] description general info
+ # @param [optional,Array] post_params, array of arrays containing info on POST operation, see example
+ # @return [String] html page
+ def self.text_to_html( text, related_links=nil, description=nil, post_params=nil )
+
+ title = $sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra
+
+ html = <<EOF
+<html>
+EOF
+ html.chomp!
+ html += "<title>"+title+"</title>" if title
+ html += <<EOF
+<img src="
+EOF
+ html.chomp!
+ html += OT_LOGO
+ html += <<EOF
+">
+<body>
+EOF
+ html.chomp!
+ html += "<h3>Description</h3><pre><p>"+description.link_urls+"</p></pre>" if description
+ html += "<h3>Related links</h3><pre><p>"+related_links.link_urls+"</p></pre>" if related_links
+ if post_params
+ html += "<h3>POST parameters</h3>"
+ count = 0
+ post_params.each do |p|
+ html += "<pre><p>alternatively:</p></pre>" if count > 0
+ html += "<pre><p><table><thead><tr><th>param</th><th>default_value</th></tr></thead>"
+ p.each do |k,v|
+ html += "<tr><th>"+k.to_s+"</th><th>"+(v!=nil ? v.to_s : "<i>mandatory</i>")+"</th></tr>"
+ end
+ html += "</table></p></pre>"
+ count += 1
+ end
+ end
+ html += "<h3>Content</h3>" if description || related_links
+ html += <<EOF
+<pre>
+<p style="padding:15px; border:10px solid #5D308A">
+EOF
+ html.chomp!
+ html += text.link_urls
+ html += <<EOF
+</p>
+</pre>
+</body>
+<html>
+EOF
+ html
+ end
+
+end
+
+#puts OpenTox.text_to_html("bla") \ No newline at end of file