summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2013-03-26 10:52:55 +0100
committerChristoph Helma <helma@in-silico.ch>2013-03-26 10:52:55 +0100
commitce7216c6da3767e3e55429eaa0b55c367939ee38 (patch)
tree20889a1f3d404984272c13054f9f45485a1ca4c5
parentbb35f26cc0e1bc60303465f991cde2ac01a54b38 (diff)
code cleanup and refactoring.
-rw-r--r--application.rb353
-rw-r--r--compound.rb6
-rw-r--r--helper.rb347
-rw-r--r--webapp/dataset.rb125
-rw-r--r--webapp/sinatra.rb4
-rw-r--r--webapp/test.rb16
6 files changed, 366 insertions, 485 deletions
diff --git a/application.rb b/application.rb
index dfdc952..20fadad 100644
--- a/application.rb
+++ b/application.rb
@@ -4,325 +4,27 @@
require 'roo'
require 'opentox-server'
-require './compound.rb'
+require_relative 'helper.rb'
+require_relative 'compound.rb'
+# TODO: remove and find a better way to test task errors
+require_relative 'test.rb'
# Library code
$logger.debug "Dataset booting: #{$dataset.collect{|k,v| "#{k}: '#{v}'"} }"
-Dir['./lib/compound/*.rb'].each { |f| require f; also_reload f } # Libs
-Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs
-Dir['./webapp/*.rb'].each { |f| require f; also_reload f } # Webapps
-#require 'profiler'
+require 'profiler'
# Entry point
module OpenTox
class Application < Service
- @warnings = []
-
- helpers do
-
- def from_csv(csv)
- table = CSV.parse(csv)
- # CSVs with unexpected encodings may have blanks instead of nil
- table.collect! { |row|
- row.collect! { |val|
- (val.class == String and val.strip == "") ? nil : val
- }
- }
- from_table table
- end
-
- def from_spreadsheet spreadsheet
- extensions = { Excel => ".xls", Excelx => ".xlsx", Openoffice => ".ods" }
- input = params[:file][:tempfile].path + ".xls"
- csv_file = params[:file][:tempfile].path + ".csv"
- File.rename params[:file][:tempfile].path, input # roo needs "correct" extensions
- spreadsheet.new(input).to_csv csv_file # roo cannot write to strings
- @body = from_csv File.read(csv_file)
- @content_type = "text/plain"
- end
-
-=begin
- def from_sdf(sdf)
-
- #obconversion = OpenBabel::OBConversion.new
- #obmol = OpenBabel::OBMol.new
- #obconversion.set_in_and_out_formats "sdf", "inchi"
-
- table = []
-
- properties = []
- sdf.each_line { |l| properties << l.to_s if l.match(/</) }
- properties.sort!
- properties.uniq!
- properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }
- properties.insert 0, "InChI"
- table[0] = properties
-
- rec = 0
- sdf.split(/\$\$\$\$\r*\n/).each do |s|
- rec += 1
- table << []
- begin
- # TODO: use compound service
- compound = OpenTox::Compound.from_sdf sdf
- #obconversion.read_string obmol, s
- table.last << obconversion.write_string(obmol).gsub(/\s/,'').chomp
- rescue
- # TODO: Fix, will lead to follow up errors
- table.last << "Could not convert structure at record #{rec}) have been ignored! \n#{s}"
- end
- obmol.get_data.each { |d| table.last[table.first.index(d.get_attribute)] = d.get_value }
- end
- from_table table
- end
-=end
-
- def from_table table
-
- @warnings = []
- ntriples = ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.Dataset}>."]
- ntriples << ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.OrderedDataset}>."]
-
- # features
- feature_names = table.shift.collect{|f| f.strip}
- @warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size
- compound_format = feature_names.shift.strip
- bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: URI, SMILES, InChI." unless compound_format =~ /URI|URL|SMILES|InChI/i
- features = []
- ignored_feature_indices = []
- feature_names.each_with_index do |f,i|
- values = table.collect{|row| val=row[i+1]; val.strip! unless val.nil?; val }.uniq.compact
- types = values.collect{|v| feature_type(v)}.uniq
- metadata = {}
- if values.size == 0 # empty feature
- elsif values.size <= 5 # max classes
- metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ]
- metadata[RDF::OT.acceptValue] = values
- end
- if types.size == 1 and types[0] == RDF::OT.NumericFeature
- metadata[RDF.type] = [] unless metadata[RDF.type]
- metadata[RDF.type] << RDF::OT.NumericFeature
- else
- metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] # only nominal type for mixed cases
- metadata[RDF::OT.acceptValue] = values
- end
- feature = OpenTox::Feature.find_by_title(f,metadata) # AM: find or generate
- features << feature unless feature.nil?
- ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}>."
- ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> #{i} ."
- end
-
- # compounds and values
- compound_uris = []
- table.each_with_index do |values,j|
- compound = values.shift
- begin
- case compound_format
- when /URI|URL/i
- compound_uri = compound
- when /SMILES/i
- compound_uri = OpenTox::Compound.from_smiles($compound[:uri], compound).uri
- when /InChI/i
- compound_uri = OpenTox::Compound.from_inchi($compound[:uri], compound).uri
- end
- rescue
- @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." # be careful with double quotes in literals!
- next
- end
- compound_uris << compound_uri
- unless values.size == features.size
- @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored."
- next
- end
- ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>."
- ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ."
-
- values.each_with_index do |v,i|
- @warnings << "Empty value for compound #{compound} (row #{j+2}) and feature \"#{feature_names[i]}\" (column #{i+2})." if v.blank?
- #@warnings << "Empty value in row #{j+2}, column #{i+2} (feature \"#{feature_names[i]}\")." if v.blank?
-
- data_entry_node = "_:dataentry"+ j.to_s
- value_node = data_entry_node+ "_value"+ i.to_s
- ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ."
- ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ."
- ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ."
- ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ."
- ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ."
- ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ."
- ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ."
-
- end
-
- end
- compound_uris.duplicates.each do |uri|
- positions = []
- compound_uris.each_with_index{|c,i| positions << i+1 if c == uri}
- @warnings << "Duplicate compound #{uri} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
- end
-
- ntriples << "<#{@uri}> <#{RDF::OT.Warnings}> \"#{@warnings.join('\n')}\" ."
- ntriples.join("\n")
- end
-
-=begin
- def to_xlsx
-
- # both simple_xlsx and axlsx create empty documents with OLE2 errors
- xlsx = @uri.split("/").last+".xlsx"
- p = Axlsx::Package.new
- wb = p.workbook
- wb.add_worksheet(:name => "test") do |sheet|
- to_table.each { |row| sheet.add_row row; puts row }
- end
- p.serialize("test.xlsx")
-
- p.to_stream
-#```
- #Tempfile.open(@uri.split("/").last+".xlsx") do |xlsx|
- SimpleXlsx::Serializer.new(xlsx) do |doc|
- doc.add_sheet("People") do |sheet|
- to_table.each { |row| sheet.add_row row }
- end
- end
- send_file xlsx
- #end
- end
-=end
-
- def to_csv
- csv_string = CSV.generate do |csv|
- to_table.each { |row| csv << row }
- end
- csv_string.gsub(/\"\"/,"") # AM: no quotes for missing values
- end
-
- def to_table
-=begin
- table = []
- dataset = OpenTox::Dataset.new @uri
- dataset.get
- table << ["SMILES"] + dataset.features.collect{|f| f.get; f.title}
- dataset.data_entries.each_with_index do |data_entry,i|
- table << [dataset.compounds[i]] + data_entry
- end
- table
-=end
- accept = "text/uri-list"
- table = []
- if ordered?
- features = OpenTox::Dataset.find_features_sparql(@uri)
- sparql_constraints = {:type => RDF.type, :title => RDF::DC.title}
- feature_props = OpenTox::Dataset.find_props_sparql(features.collect { |f| f.uri }, sparql_constraints)
- quoted_features = []; feature_names = []
- features.each { |feature|
- quoted_features << feature_props[feature.uri][:type].include?(RDF::OT.NominalFeature)
- feature_names << "\"#{feature_props[feature.uri][:title][0].strip}\""
- }
- compounds = OpenTox::Dataset.find_compounds_sparql(@uri)
- values = OpenTox::Dataset.find_data_entries_sparql(@uri)
- values += Array.new(compounds.size*features.size-values.size, "")
- clim=(compounds.size-1)
- cidx = fidx = 0
- num=(!quoted_features[fidx])
- table = (Array.new((features.size)*(compounds.size))).each_slice(features.size).to_a
- values.each { |val|
- unless val.blank?
- table[cidx][fidx] = (num ? val : "\"#{val}\"")
- end
- if (cidx < clim)
- cidx+=1
- else
- cidx=0
- fidx+=1
- num=(!quoted_features[fidx])
- end
- }
- table.each_with_index { |row,idx| row.unshift("\"#{compounds[idx].inchi}\"") }
- table.unshift([ "\"InChI\"" ] + feature_names)
- else
- sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Feature}>}"
- features = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Feature.new uri}.each { |f| f.get }
- quoted_features = features.each_with_index.collect { |f,idx|
- if (f[RDF.type].include?(RDF::OT.NominalFeature) or
- f[RDF.type].include?(RDF::OT.StringFeature) and
- !f[RDF.type].include?(RDF::OT.NumericFeature))
- idx+1
- end
- }.compact
- table << ["InChI"] + features.collect{ |f| "\"" + f[RDF::DC.title] + "\"" }
- sparql = "SELECT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>. }"
- compounds = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Compound.new uri}
- compounds.each do |compound|
- data_entries = []
- features.each do |feature|
- sparql = "SELECT ?value FROM <#{@uri}> WHERE {
- ?data_entry <#{RDF::OT.compound}> <#{compound.uri}>;
- <#{RDF::OT.values}> ?v .
- ?v <#{RDF::OT.feature}> <#{feature.uri}>;
- <#{RDF::OT.value}> ?value.
- } ORDER BY ?data_entry"
- FourStore.query(sparql, accept).split("\n").each do |value|
- data_entries << value
- end
- end
- row = ["\"#{compound.inchi}\""] + data_entries
- row = row.each_with_index.collect { |value,idx| (quoted_features.include?(idx) ? "\"#{value}\"" : value) }
- table << row
- end
- end
- table
- end
-
- def feature_type(value)
- if value.blank?
- nil
- elsif value.numeric?
- RDF::OT.NumericFeature
- else
- RDF::OT.NominalFeature
- end
- end
-
- def ordered?
- sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.OrderedDataset}>}"
- FourStore.query(sparql, "text/uri-list").split("\n").empty? ? false : true
- end
+ before do
+ #Profiler__::start_profile
+ end
- def parse_put
- task = OpenTox::Task.create $task[:uri], nil, RDF::DC.description => "Dataset upload: #{@uri}" do
- #Profiler__::start_profile
- case @content_type
- when "text/plain", "text/turtle", "application/rdf+xml" # no conversion needed
- when "text/csv"
- @body = from_csv @body
- @content_type = "text/plain"
- when "application/vnd.ms-excel"
- from_spreadsheet Excel
- when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
- from_spreadsheet Excelx
- when "application/vnd.oasis.opendocument.spreadsheet"
- from_spreadsheet Openoffice
- # when "chemical/x-mdl-sdfile"
- # @body = parse_sdf @body
- # @content_type = "text/plain"
- else
- bad_request_error "#{@content_type} is not a supported content type."
- end
- #puts @body
- FourStore.put @uri, @body, @content_type
- if params[:file]
- nt = "<#{@uri}> <#{RDF::DC.title}> \"#{params[:file][:filename]}\".\n<#{uri}> <#{RDF::OT.hasSource}> \"#{params[:file][:filename]}\"."
- FourStore.post(@uri, nt, "text/plain")
- end
- #Profiler__::stop_profile
- #Profiler__::print_profile($stdout)
- @uri
- end
- response['Content-Type'] = "text/uri-list"
- halt 202, task.uri
- end
+ after do
+ #Profiler__::stop_profile
+ #Profiler__::print_profile($stdout)
end
before "/#{SERVICE}/:id/:property" do
@@ -336,7 +38,6 @@ module OpenTox
end
get "/dataset/:id/?" do
- #Profiler__::start_profile
case @accept
when "application/rdf+xml", "text/turtle", "text/plain", /html/
r = FourStore.get(@uri, @accept)
@@ -355,8 +56,6 @@ module OpenTox
bad_request_error "'#{@accept}' is not a supported content type."
end
end
- #Profiler__::stop_profile
- #Profiler__::print_profile($stdout)
r
end
@@ -371,11 +70,9 @@ module OpenTox
case @accept
when "application/rdf+xml", "text/turtle", "text/plain"
sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE {
- { ?s ?p ?o. <#{@uri}> <#{RDF.type}> ?o. } UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::DC.title}> ?o.} UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::DC.creator}> ?o.} UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::OT.Warnings}> ?o.} UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::OT.hasSource}> ?o.}
+ { ?s ?p ?o. <#{@uri}> ?p ?o. } UNION
+ { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> . }
+ FILTER (?p != <#{RDF::OT.dataEntry}>)
} "
FourStore.query sparql, @accept
else
@@ -413,28 +110,6 @@ module OpenTox
FourStore.query sparql, @accept
end
- # Get everything but the data entries
- # @param [Header] Accept one of `application/rdf+xml, text/turtle, text/plain, text/uri-list` (default application/rdf+xml)
- # @return [application/rdf+xml, text/turtle, text/plain, text/uri-list] The data
- get '/dataset/:id/allnde' do
- case @accept
- when "application/rdf+xml", "text/turtle", "text/plain"
- sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE {
- { ?s ?p ?o. <#{@uri}> <#{RDF.type}> ?o. } UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::DC.title}> ?o.} UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::DC.creator}> ?o.} UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::OT.Warnings}> ?o.} UNION
- { ?s ?p ?o. <#{@uri}> <#{RDF::OT.hasSource}> ?o.} UNION
- { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Feature}> } UNION
- { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> }
- }"
- else
- bad_request_error "'#{@accept}' is not a supported content type."
- end
- FourStore.query sparql, @accept
- end
-
-
end
end
diff --git a/compound.rb b/compound.rb
index cb194ff..3373737 100644
--- a/compound.rb
+++ b/compound.rb
@@ -11,7 +11,7 @@ module OpenTox
# compound = OpenTox::Compound.from_smiles("c1ccccc1")
# @param [String] smiles Smiles string
# @return [OpenTox::Compound] Compound
- def self.from_smiles service_uri, smiles, subjectid=nil
+ def self.from_smiles smiles, subjectid=nil
inchi = obconversion(smiles,'smi','inchi')
compound = Compound.new(File.join service_uri, inchi)
compound.inchi = inchi
@@ -22,7 +22,7 @@ module OpenTox
# Create a compound from inchi string
# @param [String] smiles InChI string
# @return [OpenTox::Compound] Compound
- def self.from_inchi service_uri, inchi, subjectid=nil
+ def self.from_inchi inchi, subjectid=nil
compound = Compound.new(File.join service_uri, inchi)
compound.inchi = inchi
compound
@@ -31,7 +31,7 @@ module OpenTox
# Create a compound from sdf string
# @param [String] smiles SDF string
# @return [OpenTox::Compound] Compound
- def self.from_sdf service_uri, sdf, subjectid=nil
+ def self.from_sdf sdf, subjectid=nil
inchi = obconversion(sdf,'sdf','inchi')
compound = Compound.new(File.join service_uri, inchi)
compound.inchi = inchi
diff --git a/helper.rb b/helper.rb
new file mode 100644
index 0000000..c405963
--- /dev/null
+++ b/helper.rb
@@ -0,0 +1,347 @@
+# Author: Christoph Helma, Andreas Maunz
+
+module OpenTox
+ class Application < Service
+
+ @warnings = []
+
+ helpers do
+
+ def from_csv(csv)
+ table = CSV.parse(csv)
+ # CSVs with unexpected encodings may have blanks instead of nil
+ table.collect! { |row|
+ row.collect! { |val|
+ (val.class == String and val.strip == "") ? nil : val
+ }
+ }
+ from_table table
+ end
+
+ def from_spreadsheet spreadsheet
+ extensions = { Excel => ".xls", Excelx => ".xlsx", Openoffice => ".ods" }
+ input = params[:file][:tempfile].path + ".xls"
+ csv_file = params[:file][:tempfile].path + ".csv"
+ File.rename params[:file][:tempfile].path, input # roo needs "correct" extensions
+ spreadsheet.new(input).to_csv csv_file # roo cannot write to strings
+ @body = from_csv File.read(csv_file)
+ @content_type = "text/plain"
+ end
+
+=begin
+ def from_sdf(sdf)
+
+ #obconversion = OpenBabel::OBConversion.new
+ #obmol = OpenBabel::OBMol.new
+ #obconversion.set_in_and_out_formats "sdf", "inchi"
+
+ table = []
+
+ properties = []
+ sdf.each_line { |l| properties << l.to_s if l.match(/</) }
+ properties.sort!
+ properties.uniq!
+ properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }
+ properties.insert 0, "InChI"
+ table[0] = properties
+
+ rec = 0
+ sdf.split(/\$\$\$\$\r*\n/).each do |s|
+ rec += 1
+ table << []
+ begin
+ # TODO: use compound service
+ compound = OpenTox::Compound.from_sdf sdf
+ #obconversion.read_string obmol, s
+ table.last << obconversion.write_string(obmol).gsub(/\s/,'').chomp
+ rescue
+ # TODO: Fix, will lead to follow up errors
+ table.last << "Could not convert structure at record #{rec}) have been ignored! \n#{s}"
+ end
+ obmol.get_data.each { |d| table.last[table.first.index(d.get_attribute)] = d.get_value }
+ end
+ from_table table
+ end
+=end
+
+ def from_table table
+
+ @warnings = []
+ ntriples = ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.Dataset}>."]
+ ntriples << ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.OrderedDataset}>."]
+
+ # features
+ feature_names = table.shift.collect{|f| f.strip}
+ @warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size
+ compound_format = feature_names.shift.strip
+ bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: URI, SMILES, InChI." unless compound_format =~ /URI|URL|SMILES|InChI/i
+ features = []
+ ignored_feature_indices = []
+ feature_names.each_with_index do |f,i|
+ values = table.collect{|row| val=row[i+1]; val.strip! unless val.nil?; val }.uniq.compact
+ types = values.collect{|v| feature_type(v)}.uniq
+ metadata = {RDF::DC.title => f}
+ if values.size == 0 # empty feature
+ elsif values.size <= 5 # max classes
+ metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ]
+ metadata[RDF::OT.acceptValue] = values
+ end
+ if types.size == 1 and types[0] == RDF::OT.NumericFeature
+ metadata[RDF.type] = [] unless metadata[RDF.type]
+ metadata[RDF.type] << RDF::OT.NumericFeature
+ else
+ metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] # only nominal type for mixed cases
+ metadata[RDF::OT.acceptValue] = values
+ end
+ feature = OpenTox::Feature.find_or_create metadata, @subjectid # AM: find or generate
+ features << feature unless feature.nil?
+ ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}>."
+ ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> #{i} ."
+ end
+
+ # compounds and values
+ compound_uris = []
+ table.each_with_index do |values,j|
+ compound = values.shift
+ begin
+ case compound_format
+ when /URI|URL/i
+ compound_uri = compound
+ when /SMILES/i
+ compound_uri = OpenTox::Compound.from_smiles(compound).uri
+ when /InChI/i
+ compound_uri = OpenTox::Compound.from_inchi(compound).uri
+ end
+ rescue
+ @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." # be careful with double quotes in literals! \C in smiles is an illegal Turtle string
+ next
+ end
+ compound_uris << compound_uri
+ unless values.size == features.size
+ @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored."
+ next
+ end
+ ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>."
+ ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ."
+
+ values.each_with_index do |v,i|
+ @warnings << "Empty value for compound '#{compound}' (row #{j+2}) and feature '#{feature_names[i]}' (column #{i+2})." if v.blank?
+
+ data_entry_node = "_:dataentry"+ j.to_s
+ value_node = data_entry_node+ "_value"+ i.to_s
+ ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ."
+ ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ."
+ ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ."
+ ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ."
+ ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ."
+ ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ."
+ ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ."
+
+ end
+
+ end
+ compound_uris.duplicates.each do |uri|
+ positions = []
+ compound_uris.each_with_index{|c,i| positions << i+1 if c == uri}
+ @warnings << "Duplicate compound #{uri} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
+ end
+
+ ntriples << "<#{@uri}> <#{RDF::OT.Warnings}> \"#{@warnings.join('\n')}\" ."
+ ntriples.join("\n")
+ end
+
+=begin
+ def to_xlsx
+
+ # both simple_xlsx and axlsx create empty documents with OLE2 errors
+ xlsx = @uri.split("/").last+".xlsx"
+ p = Axlsx::Package.new
+ wb = p.workbook
+ wb.add_worksheet(:name => "test") do |sheet|
+ to_table.each { |row| sheet.add_row row; puts row }
+ end
+ p.serialize("test.xlsx")
+
+ p.to_stream
+#```
+ #Tempfile.open(@uri.split("/").last+".xlsx") do |xlsx|
+ SimpleXlsx::Serializer.new(xlsx) do |doc|
+ doc.add_sheet("People") do |sheet|
+ to_table.each { |row| sheet.add_row row }
+ end
+ end
+ send_file xlsx
+ #end
+ end
+=end
+
+ def to_csv
+ csv_string = CSV.generate do |csv|
+ to_table.each { |row| csv << row }
+ end
+ csv_string.gsub(/\"\"/,"") # AM: no quotes for missing values
+ #to_table
+ end
+
+ def compound_uris
+ end
+
+ def features
+ end
+
+ def data_entries
+ end
+
+ def to_table
+ # TODO: fix and speed up
+ sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {
+ ?s <#{RDF.type}> <#{RDF::OT.Feature}> ;
+ <#{RDF::OLO.index}> ?fidx
+ } ORDER BY ?fidx"
+ features = FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| OpenTox::Feature.new uri }
+ sparql = "SELECT DISTINCT ?compound FROM <#{@uri}> WHERE {
+ ?compound <#{RDF.type}> <#{RDF::OT.Compound}> ;
+ <#{RDF::OLO.index}> ?cidx;
+ } ORDER BY ?cidx"
+ inchis = FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| "InChI#{uri.split("InChI").last}" }
+
+ table = [["InChI"] + features.collect{|f| f.title}]
+ inchis.each{|inchi| table << [inchi]}
+ sparql = "SELECT ?cidx ?fidx ?value FROM <#{@uri}> WHERE {
+ ?data_entry <#{RDF::OLO.index}> ?cidx ;
+ <#{RDF::OT.values}> ?v .
+ ?v <#{RDF::OT.feature}> ?f;
+ <#{RDF::OT.value}> ?value .
+ ?f <#{RDF::OLO.index}> ?fidx.
+ } ORDER BY ?fidx ?cidx"
+ FourStore.query(sparql,"text/uri-list").split("\n").each do |row|
+ r,c,v = row.split("\t")
+ table[r.to_i+1][c.to_i+1] = v.to_s
+ end
+ table#.inspect
+=begin
+ table = []
+ dataset = OpenTox::Dataset.new @uri
+ table << ["SMILES"] + dataset.features.collect{|f| f.title}
+ dataset.data_entries.each_with_index do |data_entry,i|
+ table << [dataset.compounds[i]] + data_entry
+ end
+ table
+=end
+=begin
+ accept = "text/uri-list"
+ table = []
+ if ordered?
+ features = OpenTox::Dataset.find_features_sparql(@uri)
+ sparql_constraints = {:type => RDF.type, :title => RDF::DC.title}
+ feature_props = OpenTox::Dataset.find_props_sparql(features.collect { |f| f.uri }, sparql_constraints)
+ quoted_features = []; feature_names = []
+ features.each { |feature|
+ quoted_features << feature_props[feature.uri][:type].include?(RDF::OT.NominalFeature)
+ feature_names << "\"#{feature_props[feature.uri][:title][0].strip}\""
+ }
+ compounds = OpenTox::Dataset.find_compounds_sparql(@uri)
+ values = OpenTox::Dataset.find_data_entries_sparql(@uri)
+ values += Array.new(compounds.size*features.size-values.size, "")
+ clim=(compounds.size-1)
+ cidx = fidx = 0
+ num=(!quoted_features[fidx])
+ table = (Array.new((features.size)*(compounds.size))).each_slice(features.size).to_a
+ values.each { |val|
+ unless val.blank?
+ table[cidx][fidx] = (num ? val : "\"#{val}\"")
+ end
+ if (cidx < clim)
+ cidx+=1
+ else
+ cidx=0
+ fidx+=1
+ num=(!quoted_features[fidx])
+ end
+ }
+ table.each_with_index { |row,idx| row.unshift("\"#{compounds[idx].inchi}\"") }
+ table.unshift([ "\"InChI\"" ] + feature_names)
+ else
+ sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Feature}>}"
+ features = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Feature.new uri}.each { |f| f.get }
+ quoted_features = features.each_with_index.collect { |f,idx|
+ if (f[RDF.type].include?(RDF::OT.NominalFeature) or
+ f[RDF.type].include?(RDF::OT.StringFeature) and
+ !f[RDF.type].include?(RDF::OT.NumericFeature))
+ idx+1
+ end
+ }.compact
+ table << ["InChI"] + features.collect{ |f| "\"" + f[RDF::DC.title] + "\"" }
+ sparql = "SELECT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>. }"
+ compounds = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Compound.new uri}
+ compounds.each do |compound|
+ data_entries = []
+ features.each do |feature|
+ sparql = "SELECT ?value FROM <#{@uri}> WHERE {
+ ?data_entry <#{RDF::OT.compound}> <#{compound.uri}>;
+ <#{RDF::OT.values}> ?v .
+ ?v <#{RDF::OT.feature}> <#{feature.uri}>;
+ <#{RDF::OT.value}> ?value.
+ } ORDER BY ?data_entry"
+ FourStore.query(sparql, accept).split("\n").each do |value|
+ data_entries << value
+ end
+ end
+ row = ["\"#{compound.inchi}\""] + data_entries
+ row = row.each_with_index.collect { |value,idx| (quoted_features.include?(idx) ? "\"#{value}\"" : value) }
+ table << row
+ end
+ end
+ table
+=end
+ end
+
+ def feature_type(value)
+ if value.blank?
+ nil
+ elsif value.numeric?
+ RDF::OT.NumericFeature
+ else
+ RDF::OT.NominalFeature
+ end
+ end
+
+ def ordered?
+ sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.OrderedDataset}>}"
+ FourStore.query(sparql, "text/uri-list").split("\n").empty? ? false : true
+ end
+
+ def parse_put
+ task = OpenTox::Task.run "Dataset upload", @uri, @subjectid do
+ case @content_type
+ when "text/plain", "text/turtle", "application/rdf+xml" # no conversion needed
+ when "text/csv"
+ @body = from_csv @body
+ @content_type = "text/plain"
+ when "application/vnd.ms-excel"
+ from_spreadsheet Excel
+ when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+ from_spreadsheet Excelx
+ when "application/vnd.oasis.opendocument.spreadsheet"
+ from_spreadsheet Openoffice
+ # when "chemical/x-mdl-sdfile"
+ # @body = parse_sdf @body
+ # @content_type = "text/plain"
+ else
+ bad_request_error "#{@content_type} is not a supported content type."
+ end
+ if params["file"]
+ nt = "<#{@uri}> <#{RDF::DC.title}> \"#{params["file"][:filename]}\".\n<#{uri}> <#{RDF::OT.hasSource}> \"#{params["file"][:filename]}\"."
+ FourStore.put(@uri, nt, "text/plain")
+ end
+ nt ? FourStore.post(@uri, @body, @content_type) : FourStore.put(@uri, @body, @content_type)
+ @uri
+ end
+ response['Content-Type'] = "text/uri-list"
+ halt 202, task.uri
+ end
+ end
+
+ end
+end
+
diff --git a/webapp/dataset.rb b/webapp/dataset.rb
deleted file mode 100644
index 3ee6c04..0000000
--- a/webapp/dataset.rb
+++ /dev/null
@@ -1,125 +0,0 @@
-# dataset.rb
-# OpenTox dataset
-# Author: Andreas Maunz
-
-module OpenTox
-
- class Application < Service
-
-
- # Get a list of descriptor calculation
- # @return [text/uri-list] URIs
- get '/dataset/*/pc' do
- dataset=params["captures"][0]
- algorithms = YAML::load_file RestClientWrapper.get(File.join($compound[:uri],"pc_descriptors.yaml"))
- list = (algorithms.keys.sort << "AllDescriptors").collect { |name| to("/dataset/#{dataset}/pc/#{name}",:full) }.join("\n") + "\n"
- format_output(list)
- end
-
- # Get representation of descriptor calculation
- # @return [String] Representation
- get '/dataset/*/pc/*' do
- dataset = params[:captures][0]
- params[:descriptor] = params[:captures][1]
- descriptors = YAML::load_file RestClientWrapper.get(File.join($compound[:uri],"pc_descriptors.yaml"))
- alg_params = [
- { DC.description => "Dataset URI",
- OT.paramScope => "mandatory",
- DC.title => "dataset_uri" }
- ]
- if params[:descriptor] != "AllDescriptors"
- descriptors = descriptors[params[:descriptor]]
- else
- alg_params << {
- DC.description => "Physico-chemical type, one or more of '#{descriptors.collect { |id, info| info[:pc_type] }.uniq.sort.join(",")}'",
- OT.paramScope => "optional", DC.title => "pc_type"
- }
- alg_params << {
- DC.description => "Software Library, one or more of '#{descriptors.collect { |id, info| info[:lib] }.uniq.sort.join(",")}'",
- OT.paramScope => "optional", DC.title => "lib"
- }
- descriptors = {:id => "AllDescriptors", :name => "All PC descriptors" } # Comes from pc_descriptors.yaml for single descriptors
- end
-
- if descriptors
- # Contents
- algorithm = OpenTox::Algorithm.new(to("/dataset/#{dataset}/pc/#{params[:descriptor]}",:full))
- mmdata = {
- DC.title => params[:descriptor],
- DC.creator => "andreas@maunz.de",
- DC.description => descriptors[:name],
- RDF.type => [OTA.DescriptorCalculation],
- }
- mmdata[DC.description] << (", pc_type: " + descriptors[:pc_type]) unless descriptors[:id] == "AllDescriptors"
- mmdata[DC.description] << (", lib: " + descriptors[:lib]) unless descriptors[:id] == "AllDescriptors"
- algorithm.metadata=mmdata
- algorithm.parameters = alg_params
- format_output(algorithm)
- else
- resource_not_found_error "Unknown descriptor #{params[:descriptor]}."
- end
- end
-
-
- # Calculate PC descriptors
- # Single descriptors or sets of descriptors can be selected
- # Sets are selected via lib and/or pc_type, and take precedence, when also a descriptor is submitted
- # If none of descriptor, lib, and pc_type is submitted, all descriptors are calculated
- # Set composition is induced by intersecting lib and pc_type sets, if appropriate
- # @param [optional, HEADER] accept Accept one of 'application/rdf+xml', 'text/csv', defaults to 'application/rdf+xml'
- # @param [optional, String] descriptor A single descriptor to calculate values for.
- # @param [optional, String] lib One or more descriptor libraries out of [cdk,joelib,openbabel], for whose descriptors to calculate values.
- # @param [optional, String] pc_type One or more descriptor types out of [constitutional,topological,geometrical,electronic,cpsa,hybrid], for whose descriptors to calculate values
- # @return [application/rdf+xml,text/csv] Compound descriptors and values
- post '/dataset/*/pc' do
- dataset=params["captures"][0]
- params.delete('splat')
- params.delete('captures')
- params_array = params.collect{ |k,v| [k.to_sym, v]}
- params = Hash[params_array]
- params[:dataset] = dataset
- descriptor = params[:descriptor].nil? ? "" : params[:descriptor]
- lib = params[:lib].nil? ? "" : params[:lib]
- pc_type = params[:pc_type].nil? ? "" : params[:pc_type]
-
- task = OpenTox::Task.create(
- $task[:uri],
- @subjectid,
- { RDF::DC.description => "Calculating PC descriptors",
- RDF::DC.creator => to("/dataset/#{dataset}/pc",:full)
- }
- ) do |task|
-
- result_ds = OpenTox::Dataset.new(nil,@subjectid)
- ds=OpenTox::Dataset.find("#{$dataset[:uri]}/#{dataset}",@subjectid)
- ds.compounds.each { |cmpd|
- ds_string = RestClientWrapper.post("#{$compound[:uri]}/#{cmpd.inchi}/pc", params, {:accept => "application/rdf+xml"})
- single_cmpd_ds = OpenTox::Dataset.new(OpenTox::Dataset.uri_from_rdf(ds_string),@subjectid)
- single_cmpd_ds.parse_rdfxml(ds_string)
- single_cmpd_ds.get(true)
- unless result_ds.features.size>0 # features present already?
- result_ds.features = single_cmpd_ds.features # AM: features
- result_ds.parameters = ["pc_type", "lib", "descriptor"].collect{ |key| # AM: parameters
- val = single_cmpd_ds.find_parameter_value(key)
- { DC.title => key, OT.paramValue => (val.nil? ? "" : val) }
- }
- result_ds[DC.title] = single_cmpd_ds[DC.title]
- result_ds[DC.creator] = to("/dataset/#{dataset}/pc",:full)
- result_ds[OT.hasSource] = to("/dataset/#{dataset}/pc",:full)
- end
- result_ds << [ cmpd ] + single_cmpd_ds.data_entries[0]
- }
- result_ds.put @subjectid
- $logger.debug result_ds.uri
- result_ds.uri
-
- end
- response['Content-Type'] = 'text/uri-list'
- service_unavailable_error "Service unavailable" if task.cancelled?
- halt 202,task.uri.to_s+"\n"
- end
-
- end
-
-end
-
diff --git a/webapp/sinatra.rb b/webapp/sinatra.rb
index fd0d354..b6becab 100644
--- a/webapp/sinatra.rb
+++ b/webapp/sinatra.rb
@@ -17,7 +17,7 @@ module OpenTox
case @accept
when /text\/html/
content_type "text/html"
- OpenTox.text_to_html obj
+ obj.to_html
else
content_type 'text/uri-list'
obj
@@ -31,7 +31,7 @@ module OpenTox
obj.to_rdfxml
when /text\/html/
content_type "text/html"
- OpenTox.text_to_html obj.to_turtle
+ obj.to_html
else
content_type "text/turtle"
obj.to_turtle
diff --git a/webapp/test.rb b/webapp/test.rb
deleted file mode 100644
index 63b9521..0000000
--- a/webapp/test.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-#for testing the error handling
-
-module OpenTox
- class Application < Service
-
- post '/dataset/test/error_in_task/?' do
- task = OpenTox::Task.create($task[:uri],@subjectid,{ RDF::DC.description => "error_in_task"}) do |task|
- sleep 1
- internal_server_error "error_in_task_message"
- end
- response['Content-Type'] = 'text/uri-list'
- halt 202,task.uri.to_s+"\n"
- end
-
- end
-end