From 9a523f0fb2d5ee0058af5b5b82e01f39549f68fb Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 9 Feb 2011 14:06:12 +0100 Subject: fix rdf parsing to work with ambit dataset service with a&a, minor changes --- lib/error.rb | 2 +- lib/model.rb | 9 +++++---- lib/parser.rb | 41 +++++++++++++++++++++++++++-------------- lib/rest_client_wrapper.rb | 13 ++++++++----- lib/task.rb | 26 +++++++++----------------- 5 files changed, 50 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/error.rb b/lib/error.rb index 49756d5..7ca9767 100644 --- a/lib/error.rb +++ b/lib/error.rb @@ -55,7 +55,7 @@ module OpenTox end def self.from_rdf(rdf) - metadata = OpenTox::Parser::Owl.metadata_from_rdf( rdf, OT.ErrorReport ) + metadata = OpenTox::Parser::Owl.from_rdf( rdf, OT.ErrorReport ).metadata ErrorReport.new(metadata[OT.statusCode], metadata[OT.errorCode], metadata[OT.message], metadata[OT.actor], metadata[OT.errorCause]) end diff --git a/lib/model.rb b/lib/model.rb index 64d178f..9622d65 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -43,9 +43,11 @@ module OpenTox @algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid) unless @algorithm algorithm_title = @algorithm ? @algorithm.metadata[DC.title] : nil + algorithm_type = @algorithm ? @algorithm.metadata[OT.isA] : nil @dependentVariable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid ) unless @dependentVariable - - [@dependentVariable.feature_type, @metadata[OT.isA], @metadata[DC.title], @uri, algorithm_title].each do |type| + type_indicators = [@dependentVariable.feature_type, @metadata[OT.isA], @metadata[DC.title], + @uri, algorithm_type, algorithm_title] + type_indicators.each do |type| case type when /(?i)classification/ return "classification" @@ -53,8 +55,7 @@ module OpenTox return "regression" end end - raise "unknown model "+[@dependentVariable.feature_type, @metadata[OT.isA], - @metadata[DC.title], @uri, algorithm_title].inspect + raise "unknown model "+type_indicators.inspect end end diff --git a/lib/parser.rb b/lib/parser.rb index 27dfeee..2f59d15 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -41,17 +41,17 @@ module OpenTox ##uri += "?subjectid=#{CGI.escape(subjectid)}" if subjectid ## `rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line| file = Tempfile.new("ot-rdfxml") - file.puts OpenTox::RestClientWrapper.get @uri,{:subjectid => subjectid,:accept => "application/rdf+xml"} + file.puts OpenTox::RestClientWrapper.get @uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false file.close - file = "file://"+file.path statements = [] parameter_ids = [] - `rapper -i rdfxml -o ntriples #{file} 2>/dev/null`.each_line do |line| + `rapper -i rdfxml -o ntriples file://#{file.path} 2>/dev/null`.each_line do |line| triple = line.to_triple @metadata[triple[1]] = triple[2].split('^^').first if triple[0] == @uri and triple[1] != RDF['type'] statements << triple parameter_ids << triple[2] if triple[1] == OT.parameters end + File.delete(file.path) unless parameter_ids.empty? @metadata[OT.parameters] = [] parameter_ids.each do |p| @@ -63,18 +63,17 @@ module OpenTox @metadata end - # loads metadata from rdf-data + # creates owl object from rdf-data # @param [String] rdf # @param [String] type of the info (e.g. OT.Task, OT.ErrorReport) needed to get the subject-uri - # @return [Hash] metadata - def self.metadata_from_rdf( rdf, type ) + # @return [Owl] with uri and metadata set + def self.from_rdf( rdf, type ) # write to file and read convert with rapper into tripples file = Tempfile.new("ot-rdfxml") file.puts rdf file.close - file = "file://"+file.path #puts "cmd: rapper -i rdfxml -o ntriples #{file} 2>/dev/null" - triples = `rapper -i rdfxml -o ntriples #{file} 2>/dev/null` + triples = `rapper -i rdfxml -o ntriples file://#{file.path} 2>/dev/null` # load uri via type uri = nil @@ -85,19 +84,23 @@ module OpenTox uri = triple[0] end end - + File.delete(file) # load metadata metadata = {} triples.each_line do |line| triple = line.to_triple metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type'] end - metadata + owl = Owl::Generic.new(uri) + owl.metadata = metadata + owl end - + # Generic parser for all OpenTox classes class Generic include Owl + + attr_accessor :uri, :metadata end # OWL-DL parser for datasets @@ -128,12 +131,21 @@ module OpenTox # @return [Hash] Internal dataset representation def load_uri(subjectid=nil) uri = @uri - uri += "?subjectid=#{CGI.escape(subjectid)}" if subjectid + + # avoid using rapper directly because of 2 reasons: + # * http errors wont be noticed + # * subjectid cannot be sent as header + ##uri += "?subjectid=#{CGI.escape(subjectid)}" if subjectid + ##`rapper -i rdfxml -o ntriples #{file} 2>/dev/null`.each_line do |line| + file = Tempfile.new("ot-rdfxml") + file.puts OpenTox::RestClientWrapper.get @uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false + file.close + data = {} feature_values = {} feature = {} other_statements = {} - `rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line| + `rapper -i rdfxml -o ntriples file://#{file.path} 2>/dev/null`.each_line do |line| triple = line.chomp.split(' ',3) triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} case triple[1] @@ -150,6 +162,7 @@ module OpenTox else end end + File.delete(file.path) data.each do |id,entry| entry[:values].each do |value_id| value = feature_values[value_id].split(/\^\^/).first # remove XSD.type @@ -157,7 +170,7 @@ module OpenTox end end load_features - @dataset.metadata = load_metadata + @dataset.metadata = load_metadata(subjectid) @dataset end diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 658f111..fcc0d08 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -61,9 +61,8 @@ module OpenTox raise "__waiting_task__ must be 'nil' or '(sub)task', is "+waiting_task.class.to_s if waiting_task!=nil and !(waiting_task.is_a?(Task) || waiting_task.is_a?(SubTask)) headers.each{ |k,v| headers.delete(k) if v==nil } if headers #remove keys with empty values, as this can cause problems - - # PENDING needed for NUTA, until we finally agree on how to send subjectid - headers[:subjectid] = payload.delete(:subjectid) if uri=~/ntua/ and payload and payload.is_a?(Hash) and payload.has_key?(:subjectid) + ## PENDING partner services accept subjectid only in header + headers[:subjectid] = payload.delete(:subjectid) if payload and payload.is_a?(Hash) and payload.has_key?(:subjectid) begin #LOGGER.debug "RestCall: "+rest_call.to_s+" "+uri.to_s+" "+headers.inspect+" "+payload.inspect @@ -94,6 +93,8 @@ module OpenTox rescue RestClient::RequestTimeout => ex received_error ex.message, 408, nil, {:rest_uri => uri, :headers => headers, :payload => payload} + rescue Errno::ECONNREFUSED => ex + received_error ex.message, 500, nil, {:rest_uri => uri, :headers => headers, :payload => payload} rescue RestClient::ExceptionWithResponse => ex # error comming from a different webservice, received_error ex.http_body, ex.http_code, ex.response.net_http_res.content_type, {:rest_uri => uri, :headers => headers, :payload => payload} @@ -107,7 +108,9 @@ module OpenTox end def self.wait_for_task( res, base_uri, waiting_task=nil ) - + #TODO remove TUM hack + content_type = "text/uri-list" if base_uri =~/tu-muenchen/ and res.content_type == "application/x-www-form-urlencoded;charset=UTF-8" +s task = nil case res.content_type when /application\/rdf\+xml/ @@ -118,7 +121,7 @@ module OpenTox raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task task = OpenTox::Task.find(res.to_s.chomp) if res.to_s.uri? else - raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s + raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" content: "+res[0..200].to_s end LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion" diff --git a/lib/task.rb b/lib/task.rb index 742afb4..27dc1c2 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -94,23 +94,11 @@ module OpenTox def self.from_yaml(yaml) @metadata = YAML.load(yaml) end - def self.from_rdfxml(rdfxml) - file = Tempfile.new("ot-rdfxml") - file.puts rdfxml - file.close - file = "file://"+file.path - - # PENDING - raise "Parse from file not working: what is the base-object-uri??? (omitted in triples)" - - parser = Parser::Owl::Generic.new file - metadata = parser.load_metadata - puts metadata.inspect - - task = Task.new(uri) - task.add_metadata(metadata) + owl = OpenTox::Parser::Owl.from_rdf(rdfxml, OT.Task) + task = Task.new(owl.uri) + task.add_metadata(owl.metadata) task end @@ -176,7 +164,7 @@ module OpenTox end def load_metadata - if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) + if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) result = RestClientWrapper.get(@uri, {:accept => 'application/x-yaml'}, nil, false) @metadata = YAML.load result.to_s @http_code = result.code @@ -184,6 +172,7 @@ module OpenTox @metadata = Parser::Owl::Generic.new(@uri).load_metadata @http_code = RestClientWrapper.get(uri, {:accept => 'application/rdf+xml'}, nil, false).code end + raise "could not load task metadata for task "+@uri.to_s if @metadata==nil || @metadata.size==0 end # create is private now, use OpenTox::Task.as_task @@ -274,11 +263,14 @@ module OpenTox end private + VALID_TASK_STATES = ["Cancelled", "Completed", "Running", "Error"] + def check_state begin + raise "illegal task state, invalid status: '"+@metadata[OT.hasStatus].to_s+"'" unless + @metadata[OT.hasStatus] unless VALID_TASK_STATES.include?(@metadata[OT.hasStatus]) raise "illegal task state, task is completed, resultURI is no URI: '"+@metadata[OT.resultURI].to_s+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? if completed? - if @http_code == 202 raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running? elsif @http_code == 201 -- cgit v1.2.3