diff options
author | Martin Gütlein <martin.guetlein@gmail.com> | 2010-03-25 19:39:29 +0100 |
---|---|---|
committer | Martin Gütlein <martin.guetlein@gmail.com> | 2010-03-25 19:39:29 +0100 |
commit | 0230d687322bab8c0fd24cf41e33a28554a364db (patch) | |
tree | 9dba7f07db99cd96e220236dbf84989441ce5f44 /lib | |
parent | 7c6b9a46e012a3e541f63d245c344e5876d7da5e (diff) |
wrapper adjustments for validation
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dataset.rb | 169 | ||||
-rw-r--r-- | lib/environment.rb | 6 | ||||
-rw-r--r-- | lib/model.rb | 51 | ||||
-rw-r--r-- | lib/opentox-ruby-api-wrapper.rb | 2 | ||||
-rw-r--r-- | lib/owl.rb | 13 | ||||
-rw-r--r-- | lib/spork.rb | 2 | ||||
-rw-r--r-- | lib/task.rb | 32 | ||||
-rw-r--r-- | lib/utils.rb | 19 |
8 files changed, 195 insertions, 99 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb index f416921..c472d84 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -1,7 +1,7 @@ LOGGER.progname = File.expand_path(__FILE__) module OpenTox - + class Dataset attr_accessor :uri, :title, :source, :identifier, :data, :features, :compounds @@ -13,26 +13,96 @@ module OpenTox end def self.find(uri) - if uri.match(/webservices.in-silico.ch|localhost/) # try to get YAML first - YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s + + if uri.match(/webservices.in-silico.ch|localhost|ot.dataset.de/) # try to get YAML first + d = YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s else # get default rdf+xml owl = OpenTox::Owl.from_uri(uri) - @title = owl.title - @source = owl.source - @identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'') - @uri = @identifier - @data = owl.data - halt 404, "Dataset #{uri} empty!" if @data.empty? - @data.each do |compound,features| - @compounds << compound + + d = Dataset.new + d.title = owl.title + d.source = owl.source + d.identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'') + d.uri = d.identifier + d.data = owl.data + halt 404, "Dataset #{uri} empty!" if d.data.empty? + d.data.each do |compound,features| + d.compounds << compound features.each do |f,v| - @features << f + d.features << f end end - @compounds.uniq! - @features.uniq! - end + d.compounds.uniq! + d.features.uniq! + end + return d end + + # creates a new dataset, using only those compounsd specified in new_compounds + # returns uri of new dataset + def create_new_dataset( new_compounds, new_title, new_source ) + + dataset = OpenTox::Dataset.new + dataset.title = new_title + dataset.source = new_source + dataset.features = @features + dataset.compounds = new_compounds + new_compounds.each do |c| + dataset.data[c] = @data[c] + end + return dataset.save + end + + # returns classification value + def get_predicted_class(compound, feature) + v = get_value(compound, feature) + if v.is_a?(Hash) + if v.has_key?(:classification) + return v[:classification] + else + return "no classification key" + end + else + raise "invalid value type" + end + + end + + # returns prediction confidence if available + def get_prediction_confidence(compound, feature) + v = get_value(compound, feature) + if v.is_a?(Hash) + if v.has_key?(:confidence) + return v[:confidence].abs + else + # PENDING: return nil isntead of raising an exception + raise "no confidence key" + end + else + raise "invalid value type" + end + end + + # return compound-feature value + def get_value(compound, feature) + v = @data[compound] + raise "no values for compound "+compound.to_s if v==nil + if v.is_a?(Array) + # PENDING: why using an array here? + v.each do |e| + if e.is_a?(Hash) + if e.has_key?(feature) + return e[feature] + end + else + raise "invalid internal value type" + end + end + raise "feature value no found: "+feature.to_s + else + raise "invalid value type" + end + end def save @@ -120,21 +190,7 @@ module OpenTox resource = RestClient::Resource.new(@@config[:services]["opentox-dataset"], :user => @@users[:users].keys[0], :password => @@users[:users].values[0]) uri = resource.post data, :content_type => content_type dataset = Dataset.new -<<<<<<< HEAD - dataset.read uri.to_s - dataset - end - - def self.find(uri) - dataset = Dataset.new - LOGGER.debug "Getting data from #{uri}" - data = `curl "#{uri}" 2> /dev/null` - #LOGGER.debug data - #data = RestClient.get(uri, :accept => 'application/rdf+xml') # unclear why this does not work for complex uris, Dataset.find works from irb - dataset.rdf = data -======= dataset.read uri.chomp.to_s ->>>>>>> helma/development dataset end @@ -146,61 +202,6 @@ module OpenTox features end -<<<<<<< HEAD - def data - data = {} - @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry| - compound_node = @model.object(data_entry, OT['compound']) - compound_uri = @model.object(compound_node, DC['identifier']).to_s - @model.find(data_entry, OT['values'], nil) do |s,p,values| - feature_node = @model.object values, OT['feature'] - feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype - type = @model.object(values, RDF['type']) - if type == OT['FeatureValue'] - value = @model.object(values, OT['value']).to_s - case value.to_s - when TRUE_REGEXP # defined in environment.rb - value = true - when FALSE_REGEXP # defined in environment.rb - value = false - when /.*\^\^<.*XMLSchema#.*>/ - case value.to_s - when /XMLSchema#string/ - value = value.to_s[0..(value.to_s.index("^^")-1)] - when /XMLSchema#double/ - value = value.to_s[0..(value.to_s.index("^^")-1)].to_f - else - LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri - value = nil - end - else - LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri - value = nil - end - data[compound_uri] = {} unless data[compound_uri] - data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri] - data[compound_uri][feature_uri] << value unless value.nil? - elsif type == OT['Tuple'] - entry = {} - data[compound_uri] = {} unless data[compound_uri] - data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri] - @model.find(values, OT['complexValue'],nil) do |s,p,complex_value| - name_node = @model.object complex_value, OT['feature'] - name = @model.object(name_node, DC['title']).to_s - value = @model.object(complex_value, OT['value']).to_s - v = value.sub(/\^\^.*$/,'') # remove XML datatype - v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype - entry[name] = v - end - data[compound_uri][feature_uri] << entry - end - end - end - data - end -======= ->>>>>>> helma/development - def compounds compounds = [] @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node| diff --git a/lib/environment.rb b/lib/environment.rb index 4c501b4..2838c29 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -95,12 +95,14 @@ class MyLogger < Logger end -logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log" +logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log" LOGGER = MyLogger.new(logfile,'daily') # daily rotation + #LOGGER = MyLogger.new(STDOUT) +#LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " -LOGGER.level = Logger::DEBUG +#LOGGER.level = Logger::DEBUG if File.exist?(user_file) @@users = YAML.load_file(user_file) diff --git a/lib/model.rb b/lib/model.rb index 8877b84..e8eee09 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -3,11 +3,16 @@ module OpenTox class Generic - attr_accessor :predicted_variables, :independent_variables, :dependent_variables, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm + attr_accessor :uri, :title, :source, :identifier, :predicted_variables, :independent_variables, :dependent_variables, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm def self.find(uri) owl = OpenTox::Owl.from_uri(uri) - @title = owl.title + return self.new(owl) + end + + protected + def initialize(owl) + @title = owl.title @source = owl.source @identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'') @uri = @identifier @@ -16,8 +21,46 @@ module OpenTox @independent_variables = owl.independentVariables @predicted_variables = owl.predictedVariables end - - end + end + + + class PredictionModel < Generic + + def self.build( algorithm_uri, algorithm_params ) + + LOGGER.debug "Build model, algorithm_uri:"+algorithm_uri.to_s+", algorithm_parms: "+algorithm_params.to_s + uri = OpenTox::RestClientWrapper.post(algorithm_uri,algorithm_params).to_s + uri = OpenTox::Task.find(uri).wait_for_resource.to_s if Utils.task_uri?(uri) + return PredictionModel.find(uri) + end + + def predict_dataset( dataset_uri ) + + LOGGER.debug "Predict dataset: "+dataset_uri.to_s+" with model "+@uri.to_s + + #HACK using curl + uri = "" + IO.popen("curl -X POST -d dataset_uri='"+dataset_uri+"' "+@uri.to_s+" 2> /dev/null") do |f| + while line = f.gets + uri += line + end + end + uri = OpenTox::Task.find(uri).wait_for_resource.to_s if Utils.task_uri?(uri) + return uri if Utils.dataset_uri?(uri) + raise "not sure about prediction result: "+uri.to_s + end + + def classification? + #HACK replace with request to ontology server + case @title + when /lazar classification/ + return true + else + raise "unknown model: "+@title.to_s + end + end + end + class Lazar < Generic diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb index 849043c..c795ca8 100644 --- a/lib/opentox-ruby-api-wrapper.rb +++ b/lib/opentox-ruby-api-wrapper.rb @@ -8,6 +8,6 @@ rescue LoadError puts "Please install Openbabel with 'rake openbabel:install' in the compound component" end -['owl', 'compound','dataset','algorithm','model','task','validation','utils','authorization'].each do |lib| +['owl', 'compound','dataset','algorithm','model','task','validation','utils','authorization','features'].each do |lib| require lib end @@ -15,15 +15,18 @@ module OpenTox owl.model.add owl.uri, RDF['type'], OT[owl.ot_class] owl.model.add owl.uri, DC['identifier'], owl.uri owl - end - + end + def self.from_uri(uri) owl = OpenTox::Owl.new parser = Redland::Parser.new begin - parser.parse_into_model(owl.model,uri) - rescue => e - raise "Error parsing #{uri}: #{e.message + e.backtrace}" + data = RestClient.get(uri,:accept => "application/rdf+xml").to_s + parser.parse_string_into_model(owl.model, data, uri) + #parser.parse_into_model(owl.model,uri) + rescue => e + raise "Error parsing #{uri}: "+e.message + #raise "Error parsing #{uri}: #{e.message.to_s + e.backtrace.to_s}" end owl.uri = Redland::Uri.new(uri.chomp) owl diff --git a/lib/spork.rb b/lib/spork.rb index e8cf37f..c77b5b5 100644 --- a/lib/spork.rb +++ b/lib/spork.rb @@ -53,7 +53,7 @@ module Spork yield rescue => ex - raise ex + #raise ex logger.error "spork> Exception in child[#{Process.pid}] - #{ex.class}: #{ex.message}" if logger ensure logger.info "spork> child[#{Process.pid}] took #{Time.now - start} sec" if logger diff --git a/lib/task.rb b/lib/task.rb index 5591a34..75cc2d2 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -90,7 +90,37 @@ module OpenTox until self.completed? or self.failed? sleep dur end - end + end + + def self.as_task + task = OpenTox::Task.create + LOGGER.debug "Starting task" + pid = Spork.spork(:logger => LOGGER) do + task.started + LOGGER.debug "Task #{task.uri} started #{Time.now}" + begin + result = yield + task.completed(result) + rescue => ex + raise ex + LOGGER.error ex.message + task.failed + end + raise "Invalid task state" unless task.completed? || task.failed? + end + LOGGER.debug "task PID: " + pid.to_s + task.pid = pid + task.uri + end + + def wait_for_resource + wait_for_completion + if failed? + LOGGER.error "task failed: "+uri.to_s + return nil + end + return resource + end end diff --git a/lib/utils.rb b/lib/utils.rb index cccb5ea..bbb750d 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -4,6 +4,23 @@ module OpenTox def self.gauss(sim, sigma = 0.3) x = 1.0 - sim Math.exp(-(x*x)/(2*sigma*sigma)) - end + end + + def self.task_uri?(uri) + is_uri?(uri) && uri.to_s =~ /task/ + end + + def self.dataset_uri?(uri) + is_uri?(uri) && uri.to_s =~ /dataset/ + end + + def self.is_uri?(uri) + begin + URI::parse(uri) + rescue URI::InvalidURIError + false + end + end + end end |