From 9ee59712fab04f7345e3f44dd02321e50cd460c5 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 11 Jul 2013 14:59:41 +0200 Subject: duplicated and empty data_entries fixed --- application.rb | 21 +++++---------------- helper.rb | 30 ++++++++++++++++++++---------- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/application.rb b/application.rb index edfbb91..61406ec 100644 --- a/application.rb +++ b/application.rb @@ -12,21 +12,10 @@ require_relative 'test.rb' # Library code $logger.debug "Dataset booting: #{$dataset.collect{|k,v| "#{k}: '#{v}'"} }" -require 'profiler' - # Entry point module OpenTox class Application < Service - before do - #Profiler__::start_profile - end - - after do - #Profiler__::stop_profile - #Profiler__::print_profile($stdout) - end - before "/#{SERVICE}/:id/:property" do @uri = uri("/#{SERVICE}/#{params[:id]}") end @@ -73,10 +62,10 @@ module OpenTox get '/dataset/:id/metadata' do case @accept when "application/rdf+xml", "text/turtle", "text/plain" - sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE { - { ?s ?p ?o. <#{@uri}> ?p ?o. } UNION - { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> . } - FILTER (?p != <#{RDF::OT.dataEntry}>) + sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE { + { <#{@uri}> ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Dataset}>. } + UNION { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> . } + MINUS { ?s <#{RDF::OT.dataEntry}> ?o. } } " FourStore.query sparql, @accept else @@ -107,7 +96,7 @@ module OpenTox when "application/rdf+xml", "text/turtle", "text/plain" sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>; ?p ?o. }" when "text/uri-list" - sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>. ?s <#{RDF::OLO.index}> ?idx } ORDER BY ?idx" + sparql = "SELECT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>. ?s <#{RDF::OLO.index}> ?idx } ORDER BY ?idx" else bad_request_error "'#{@accept}' is not a supported content type." end diff --git a/helper.rb b/helper.rb index 54b4540..cb2e1c1 100644 --- a/helper.rb +++ b/helper.rb @@ -102,8 +102,10 @@ module OpenTox # compounds and values compound_uris = [] + r = -1 table.each_with_index do |values,j| compound = values.shift + compound_uri = nil begin case compound_format when /URI|URL/i @@ -112,6 +114,7 @@ module OpenTox c = OpenTox::Compound.from_smiles(compound) if c.inchi.empty? @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." + next else compound_uri = c.uri end @@ -119,6 +122,7 @@ module OpenTox compound = OpenTox::Compound.from_inchi(compound) if c.inchi.empty? @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." + next else compound_uri = c.uri end @@ -130,26 +134,32 @@ module OpenTox @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." # be careful with double quotes in literals! \C in smiles is an illegal Turtle string next end + + r += 1 compound_uris << compound_uri unless values.size == features.size @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored." next end - ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>." - ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ." - data_entry_node = "_:dataentry"+ j.to_s + ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>." + ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{r} ." + #data_entry_node = "<#{File.join @uri,"dataentry",j.to_s}>" # too slow or not accepted by 4store + data_entry_node = "_:dataentry"+ r.to_s ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ." ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ." - ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ." + ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{r} ." ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ." values.each_with_index do |v,i| - @warnings << "Empty value for compound '#{compound}' (row #{j+2}) and feature '#{feature_names[i]}' (column #{i+2})." if v.blank? - - value_node = data_entry_node+ "_value"+ i.to_s - ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ." - ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ." - ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ." + if v.blank? + @warnings << "Empty value for compound '#{compound}' (row #{r+2}) and feature '#{feature_names[i]}' (column #{i+2})." + next + else + value_node = data_entry_node+ "_value"+ i.to_s + ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ." + ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ." + ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ." + end end end -- cgit v1.2.3