summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorMartin Gütlein <martin.guetlein@gmail.com>2010-03-25 19:39:29 +0100
committerMartin Gütlein <martin.guetlein@gmail.com>2010-03-25 19:39:29 +0100
commit0230d687322bab8c0fd24cf41e33a28554a364db (patch)
tree9dba7f07db99cd96e220236dbf84989441ce5f44 /lib
parent7c6b9a46e012a3e541f63d245c344e5876d7da5e (diff)
wrapper adjustments for validation
Diffstat (limited to 'lib')
-rw-r--r--lib/dataset.rb169
-rw-r--r--lib/environment.rb6
-rw-r--r--lib/model.rb51
-rw-r--r--lib/opentox-ruby-api-wrapper.rb2
-rw-r--r--lib/owl.rb13
-rw-r--r--lib/spork.rb2
-rw-r--r--lib/task.rb32
-rw-r--r--lib/utils.rb19
8 files changed, 195 insertions, 99 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index f416921..c472d84 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -1,7 +1,7 @@
LOGGER.progname = File.expand_path(__FILE__)
module OpenTox
-
+
class Dataset
attr_accessor :uri, :title, :source, :identifier, :data, :features, :compounds
@@ -13,26 +13,96 @@ module OpenTox
end
def self.find(uri)
- if uri.match(/webservices.in-silico.ch|localhost/) # try to get YAML first
- YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s
+
+ if uri.match(/webservices.in-silico.ch|localhost|ot.dataset.de/) # try to get YAML first
+ d = YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s
else # get default rdf+xml
owl = OpenTox::Owl.from_uri(uri)
- @title = owl.title
- @source = owl.source
- @identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'')
- @uri = @identifier
- @data = owl.data
- halt 404, "Dataset #{uri} empty!" if @data.empty?
- @data.each do |compound,features|
- @compounds << compound
+
+ d = Dataset.new
+ d.title = owl.title
+ d.source = owl.source
+ d.identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'')
+ d.uri = d.identifier
+ d.data = owl.data
+ halt 404, "Dataset #{uri} empty!" if d.data.empty?
+ d.data.each do |compound,features|
+ d.compounds << compound
features.each do |f,v|
- @features << f
+ d.features << f
end
end
- @compounds.uniq!
- @features.uniq!
- end
+ d.compounds.uniq!
+ d.features.uniq!
+ end
+ return d
end
+
+ # creates a new dataset, using only those compounsd specified in new_compounds
+ # returns uri of new dataset
+ def create_new_dataset( new_compounds, new_title, new_source )
+
+ dataset = OpenTox::Dataset.new
+ dataset.title = new_title
+ dataset.source = new_source
+ dataset.features = @features
+ dataset.compounds = new_compounds
+ new_compounds.each do |c|
+ dataset.data[c] = @data[c]
+ end
+ return dataset.save
+ end
+
+ # returns classification value
+ def get_predicted_class(compound, feature)
+ v = get_value(compound, feature)
+ if v.is_a?(Hash)
+ if v.has_key?(:classification)
+ return v[:classification]
+ else
+ return "no classification key"
+ end
+ else
+ raise "invalid value type"
+ end
+
+ end
+
+ # returns prediction confidence if available
+ def get_prediction_confidence(compound, feature)
+ v = get_value(compound, feature)
+ if v.is_a?(Hash)
+ if v.has_key?(:confidence)
+ return v[:confidence].abs
+ else
+ # PENDING: return nil isntead of raising an exception
+ raise "no confidence key"
+ end
+ else
+ raise "invalid value type"
+ end
+ end
+
+ # return compound-feature value
+ def get_value(compound, feature)
+ v = @data[compound]
+ raise "no values for compound "+compound.to_s if v==nil
+ if v.is_a?(Array)
+ # PENDING: why using an array here?
+ v.each do |e|
+ if e.is_a?(Hash)
+ if e.has_key?(feature)
+ return e[feature]
+ end
+ else
+ raise "invalid internal value type"
+ end
+ end
+ raise "feature value no found: "+feature.to_s
+ else
+ raise "invalid value type"
+ end
+ end
def save
@@ -120,21 +190,7 @@ module OpenTox
resource = RestClient::Resource.new(@@config[:services]["opentox-dataset"], :user => @@users[:users].keys[0], :password => @@users[:users].values[0])
uri = resource.post data, :content_type => content_type
dataset = Dataset.new
-<<<<<<< HEAD
- dataset.read uri.to_s
- dataset
- end
-
- def self.find(uri)
- dataset = Dataset.new
- LOGGER.debug "Getting data from #{uri}"
- data = `curl "#{uri}" 2> /dev/null`
- #LOGGER.debug data
- #data = RestClient.get(uri, :accept => 'application/rdf+xml') # unclear why this does not work for complex uris, Dataset.find works from irb
- dataset.rdf = data
-=======
dataset.read uri.chomp.to_s
->>>>>>> helma/development
dataset
end
@@ -146,61 +202,6 @@ module OpenTox
features
end
-<<<<<<< HEAD
- def data
- data = {}
- @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
- compound_node = @model.object(data_entry, OT['compound'])
- compound_uri = @model.object(compound_node, DC['identifier']).to_s
- @model.find(data_entry, OT['values'], nil) do |s,p,values|
- feature_node = @model.object values, OT['feature']
- feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
- type = @model.object(values, RDF['type'])
- if type == OT['FeatureValue']
- value = @model.object(values, OT['value']).to_s
- case value.to_s
- when TRUE_REGEXP # defined in environment.rb
- value = true
- when FALSE_REGEXP # defined in environment.rb
- value = false
- when /.*\^\^<.*XMLSchema#.*>/
- case value.to_s
- when /XMLSchema#string/
- value = value.to_s[0..(value.to_s.index("^^")-1)]
- when /XMLSchema#double/
- value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
- else
- LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
- value = nil
- end
- else
- LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
- value = nil
- end
- data[compound_uri] = {} unless data[compound_uri]
- data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
- data[compound_uri][feature_uri] << value unless value.nil?
- elsif type == OT['Tuple']
- entry = {}
- data[compound_uri] = {} unless data[compound_uri]
- data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
- @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
- name_node = @model.object complex_value, OT['feature']
- name = @model.object(name_node, DC['title']).to_s
- value = @model.object(complex_value, OT['value']).to_s
- v = value.sub(/\^\^.*$/,'') # remove XML datatype
- v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
- entry[name] = v
- end
- data[compound_uri][feature_uri] << entry
- end
- end
- end
- data
- end
-=======
->>>>>>> helma/development
-
def compounds
compounds = []
@model.subjects(RDF['type'], OT["Compound"]).each do |compound_node|
diff --git a/lib/environment.rb b/lib/environment.rb
index 4c501b4..2838c29 100644
--- a/lib/environment.rb
+++ b/lib/environment.rb
@@ -95,12 +95,14 @@ class MyLogger < Logger
end
-logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
+logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
LOGGER = MyLogger.new(logfile,'daily') # daily rotation
+
#LOGGER = MyLogger.new(STDOUT)
+#LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S "
-LOGGER.level = Logger::DEBUG
+#LOGGER.level = Logger::DEBUG
if File.exist?(user_file)
@@users = YAML.load_file(user_file)
diff --git a/lib/model.rb b/lib/model.rb
index 8877b84..e8eee09 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -3,11 +3,16 @@ module OpenTox
class Generic
- attr_accessor :predicted_variables, :independent_variables, :dependent_variables, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm
+ attr_accessor :uri, :title, :source, :identifier, :predicted_variables, :independent_variables, :dependent_variables, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm
def self.find(uri)
owl = OpenTox::Owl.from_uri(uri)
- @title = owl.title
+ return self.new(owl)
+ end
+
+ protected
+ def initialize(owl)
+ @title = owl.title
@source = owl.source
@identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'')
@uri = @identifier
@@ -16,8 +21,46 @@ module OpenTox
@independent_variables = owl.independentVariables
@predicted_variables = owl.predictedVariables
end
-
- end
+ end
+
+
+ class PredictionModel < Generic
+
+ def self.build( algorithm_uri, algorithm_params )
+
+ LOGGER.debug "Build model, algorithm_uri:"+algorithm_uri.to_s+", algorithm_parms: "+algorithm_params.to_s
+ uri = OpenTox::RestClientWrapper.post(algorithm_uri,algorithm_params).to_s
+ uri = OpenTox::Task.find(uri).wait_for_resource.to_s if Utils.task_uri?(uri)
+ return PredictionModel.find(uri)
+ end
+
+ def predict_dataset( dataset_uri )
+
+ LOGGER.debug "Predict dataset: "+dataset_uri.to_s+" with model "+@uri.to_s
+
+ #HACK using curl
+ uri = ""
+ IO.popen("curl -X POST -d dataset_uri='"+dataset_uri+"' "+@uri.to_s+" 2> /dev/null") do |f|
+ while line = f.gets
+ uri += line
+ end
+ end
+ uri = OpenTox::Task.find(uri).wait_for_resource.to_s if Utils.task_uri?(uri)
+ return uri if Utils.dataset_uri?(uri)
+ raise "not sure about prediction result: "+uri.to_s
+ end
+
+ def classification?
+ #HACK replace with request to ontology server
+ case @title
+ when /lazar classification/
+ return true
+ else
+ raise "unknown model: "+@title.to_s
+ end
+ end
+ end
+
class Lazar < Generic
diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb
index 849043c..c795ca8 100644
--- a/lib/opentox-ruby-api-wrapper.rb
+++ b/lib/opentox-ruby-api-wrapper.rb
@@ -8,6 +8,6 @@ rescue LoadError
puts "Please install Openbabel with 'rake openbabel:install' in the compound component"
end
-['owl', 'compound','dataset','algorithm','model','task','validation','utils','authorization'].each do |lib|
+['owl', 'compound','dataset','algorithm','model','task','validation','utils','authorization','features'].each do |lib|
require lib
end
diff --git a/lib/owl.rb b/lib/owl.rb
index 1d47d2c..2cd89c4 100644
--- a/lib/owl.rb
+++ b/lib/owl.rb
@@ -15,15 +15,18 @@ module OpenTox
owl.model.add owl.uri, RDF['type'], OT[owl.ot_class]
owl.model.add owl.uri, DC['identifier'], owl.uri
owl
- end
-
+ end
+
def self.from_uri(uri)
owl = OpenTox::Owl.new
parser = Redland::Parser.new
begin
- parser.parse_into_model(owl.model,uri)
- rescue => e
- raise "Error parsing #{uri}: #{e.message + e.backtrace}"
+ data = RestClient.get(uri,:accept => "application/rdf+xml").to_s
+ parser.parse_string_into_model(owl.model, data, uri)
+ #parser.parse_into_model(owl.model,uri)
+ rescue => e
+ raise "Error parsing #{uri}: "+e.message
+ #raise "Error parsing #{uri}: #{e.message.to_s + e.backtrace.to_s}"
end
owl.uri = Redland::Uri.new(uri.chomp)
owl
diff --git a/lib/spork.rb b/lib/spork.rb
index e8cf37f..c77b5b5 100644
--- a/lib/spork.rb
+++ b/lib/spork.rb
@@ -53,7 +53,7 @@ module Spork
yield
rescue => ex
- raise ex
+ #raise ex
logger.error "spork> Exception in child[#{Process.pid}] - #{ex.class}: #{ex.message}" if logger
ensure
logger.info "spork> child[#{Process.pid}] took #{Time.now - start} sec" if logger
diff --git a/lib/task.rb b/lib/task.rb
index 5591a34..75cc2d2 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -90,7 +90,37 @@ module OpenTox
until self.completed? or self.failed?
sleep dur
end
- end
+ end
+
+ def self.as_task
+ task = OpenTox::Task.create
+ LOGGER.debug "Starting task"
+ pid = Spork.spork(:logger => LOGGER) do
+ task.started
+ LOGGER.debug "Task #{task.uri} started #{Time.now}"
+ begin
+ result = yield
+ task.completed(result)
+ rescue => ex
+ raise ex
+ LOGGER.error ex.message
+ task.failed
+ end
+ raise "Invalid task state" unless task.completed? || task.failed?
+ end
+ LOGGER.debug "task PID: " + pid.to_s
+ task.pid = pid
+ task.uri
+ end
+
+ def wait_for_resource
+ wait_for_completion
+ if failed?
+ LOGGER.error "task failed: "+uri.to_s
+ return nil
+ end
+ return resource
+ end
end
diff --git a/lib/utils.rb b/lib/utils.rb
index cccb5ea..bbb750d 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -4,6 +4,23 @@ module OpenTox
def self.gauss(sim, sigma = 0.3)
x = 1.0 - sim
Math.exp(-(x*x)/(2*sigma*sigma))
- end
+ end
+
+ def self.task_uri?(uri)
+ is_uri?(uri) && uri.to_s =~ /task/
+ end
+
+ def self.dataset_uri?(uri)
+ is_uri?(uri) && uri.to_s =~ /dataset/
+ end
+
+ def self.is_uri?(uri)
+ begin
+ URI::parse(uri)
+ rescue URI::InvalidURIError
+ false
+ end
+ end
+
end
end