From f850712765a67bf31b1327401e3eddf59e3e6f50 Mon Sep 17 00:00:00 2001
From: ch <ch@ch.in-silico.ch>
Date: Sun, 12 Jul 2015 12:14:13 +0200
Subject: initial dataset tests

---
 lib/dataset.rb | 146 +++++++++++++++++++++++++++++++--------------------------
 1 file changed, 79 insertions(+), 67 deletions(-)

(limited to 'lib/dataset.rb')

diff --git a/lib/dataset.rb b/lib/dataset.rb
index d7c4076..28133b2 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -5,63 +5,75 @@ module OpenTox
   # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
   class Dataset
 
-    attr_writer :features, :compounds, :data_entries
-
     def initialize uri=nil
       super uri
-      @features = []
-      @compounds = []
-      @data_entries = []
+      @data["features"] ||= []
+      @data["compounds"] ||= []
+      @data["data_entries"] ||= []
+    end
+
+    [:features, :compounds, :data_entries].each do |method|
+      send :define_method, method do 
+        @data[method.to_s]
+      end
+      send :define_method, "#{method}=" do |value|
+        @data[method.to_s] = value.collect{|v| v.uri}
+      end
+      send :define_method, "#{method}<<" do |value|
+        @data[method.to_s] << value.uri
+      end
     end
 
     # Get data (lazy loading from dataset service)
     # overrides {OpenTox#metadata} to only load the metadata instead of the whole dataset
     # @return [Hash] the metadata
     def metadata force_update=false
-      if @metadata.empty? or force_update
-        uri = File.join(@uri,"metadata")
-        begin
-          parse_ntriples RestClientWrapper.get(uri,{},{:accept => "text/plain"})
-        rescue # fall back to rdfxml
-          parse_rdfxml RestClientWrapper.get(uri,{},{:accept => "application/rdf+xml"})
-        end
-        @metadata = @rdf.to_hash[RDF::URI.new(@uri)].inject({}) { |h, (predicate, values)| h[predicate] = values.collect{|v| v.to_s}; h }
+      if @data.empty? or force_update
+        uri = File.join(@data["uri"],"metadata")
+        #begin
+          RestClientWrapper.get(uri,{},{:accept => "application/json"})
+          @data = JSON.parse RestClientWrapper.get(uri,{},{:accept => "application/json"})
+          #parse_ntriples RestClientWrapper.get(uri,{},{:accept => "text/plain"})
+        #rescue # fall back to rdfxml
+          #parse_rdfxml RestClientWrapper.get(uri,{},{:accept => "application/rdf+xml"})
+        #end
+        #@data = @rdf.to_hash[RDF::URI.new(@data["uri"])].inject({}) { |h, (predicate, values)| h[predicate] = values.collect{|v| v.to_s}; h }
       end
-      @metadata
+      @data
     end
 
     # @return [Array] feature objects (NOT uris)
     def features force_update=false
-      if @features.empty? or force_update
-        uri = File.join(@uri,"features")
+      if @data["features"].empty? or force_update
+        uri = File.join(@data["uri"],"features")
         begin
           uris = RestClientWrapper.get(uri,{},{:accept => "text/uri-list"}).split("\n") # ordered datasets return ordered features
         rescue
           uris = []
         end
-        @features = uris.collect{|uri| Feature.new(uri)}
+        @data["features"] = uris.collect{|uri| Feature.new(uri)}
       end
-      @features
+      @data["features"]
     end
 
     # @return [Array] compound objects (NOT uris)
     def compounds force_update=false
-      if @compounds.empty? or force_update
-        uri = File.join(@uri,"compounds")
+      if @data["compounds"].empty? or force_update
+        uri = File.join(@data["uri"],"compounds")
         begin
           uris = RestClientWrapper.get(uri,{},{:accept => "text/uri-list"}).split("\n") # ordered datasets return ordered compounds
         rescue
           uris = []
         end
-        @compounds = uris.collect{|uri| Compound.new(uri)}
+        @data["compounds"] = uris.collect{|uri| Compound.new(uri)}
       end
-      @compounds
+      @data["compounds"]
     end
 
     # @return [Array] with two dimensions,
     #   first index: compounds, second index: features, values: compound feature values
     def data_entries force_update=false
-      if @data_entries.empty? or force_update
+      if @data["data_entries"].empty? or force_update
         sparql = "SELECT ?cidx ?fidx ?value FROM <#{uri}> WHERE {
           ?data_entry <#{RDF::OLO.index}> ?cidx ;
                       <#{RDF::OT.values}> ?v .
@@ -71,16 +83,16 @@ module OpenTox
           } ORDER BY ?fidx ?cidx"
           RestClientWrapper.get(service_uri,{:query => sparql},{:accept => "text/uri-list"}).split("\n").each do |row|
             r,c,v = row.split("\t")
-            @data_entries[r.to_i] ||= []
+            @data["data_entries"][r.to_i] ||= []
             # adjust value class depending on feature type, StringFeature takes precedence over NumericFeature
             if features[c.to_i][RDF.type].include? RDF::OT.NumericFeature and ! features[c.to_i][RDF.type].include? RDF::OT.StringFeature
               v = v.to_f if v
             end
-            @data_entries[r.to_i][c.to_i] = v if v
+            @data["data_entries"][r.to_i][c.to_i] = v if v
           end
         # TODO: fallbacks for external and unordered datasets
       end
-      @data_entries
+      @data["data_entries"]
     end
 
     # Find data entry values for a given compound and feature
@@ -137,18 +149,18 @@ module OpenTox
     end
 
     # Adding data methods
-    # (Alternatively, you can directly change @features and @compounds)
+    # (Alternatively, you can directly change @data["features"] and @data["compounds"])
 
     # Create a dataset from file (csv,sdf,...)
     # @param filename [String]
     # @return [String] dataset uri
     def upload filename, wait=true
-      uri = RestClientWrapper.put(@uri, {:file => File.new(filename)})
+      uri = RestClientWrapper.put(@data["uri"], {:file => File.new(filename)})
       wait_for_task uri if URI.task?(uri) and wait
       compounds true
       features true
       metadata true
-      @uri
+      @data["uri"]
     end
 
     # @param compound [OpenTox::Compound]
@@ -156,17 +168,17 @@ module OpenTox
     # @param value [Object] (will be converted to String)
     # @return [Array] data_entries
     def add_data_entry compound, feature, value
-      @compounds << compound unless @compounds.collect{|c| c.uri}.include?(compound.uri)
-      row = @compounds.collect{|c| c.uri}.index(compound.uri)
-      @features << feature unless @features.collect{|f| f.uri}.include?(feature.uri)
-      col = @features.collect{|f| f.uri}.index(feature.uri)
-      if @data_entries[row] and @data_entries[row][col] # duplicated values
-        @compounds << compound
-        row = @compounds.collect{|c| c.uri}.rindex(compound.uri)
+      @data["compounds"] << compound unless @data["compounds"].collect{|c| c.uri}.include?(compound.uri)
+      row = @data["compounds"].collect{|c| c.uri}.index(compound.uri)
+      @data["features"] << feature unless @data["features"].collect{|f| f.uri}.include?(feature.uri)
+      col = @data["features"].collect{|f| f.uri}.index(feature.uri)
+      if @data["data_entries"][row] and @data["data_entries"][row][col] # duplicated values
+        @data["compounds"] << compound
+        row = @data["compounds"].collect{|c| c.uri}.rindex(compound.uri)
       end
       if value
-        @data_entries[row] ||= []
-        @data_entries[row][col] = value
+        @data["data_entries"][row] ||= []
+        @data["data_entries"][row][col] = value
       end
     end
 
@@ -181,15 +193,15 @@ module OpenTox
     #   d << [ Compound.new("c1ccccc1"), feature-value-a, feature-value-b ]
     def << row
       compound = row.shift # removes the compound from the array
-      bad_request_error "Dataset features are empty." unless @features
-      bad_request_error "Row size '#{row.size}' does not match features size '#{@features.size}'." unless row.size == @features.size
+      bad_request_error "Dataset features are empty." unless @data["features"]
+      bad_request_error "Row size '#{row.size}' does not match features size '#{@data["features"].size}'." unless row.size == @data["features"].size
       bad_request_error "First column is not a OpenTox::Compound" unless compound.class == OpenTox::Compound
-      @compounds << compound
-      @data_entries << row
+      @data["compounds"] << compound.uri
+      @data["data_entries"] << row
     end
 
     # Serialisation
-
+    
     # converts dataset to csv format including compound smiles as first column, other column headers are feature titles
     # @return [String]
     def to_csv(inchi=false)
@@ -213,11 +225,11 @@ module OpenTox
           reader.each_statement{ |statement| @rdf << statement }
         end
         query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Compound } })
-        @compounds = query.execute(@rdf).collect { |solution| OpenTox::Compound.new solution.uri }
+        @data["compounds"] = query.execute(@rdf).collect { |solution| OpenTox::Compound.new solution.uri }
         query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Feature } })
-        @features = query.execute(@rdf).collect { |solution| OpenTox::Feature.new solution.uri }
-        @compounds.each_with_index do |c,i|
-          @features.each_with_index do |f,j|
+        @data["features"] = query.execute(@rdf).collect { |solution| OpenTox::Feature.new solution.uri }
+        @data["compounds"].each_with_index do |c,i|
+          @data["features"].each_with_index do |f,j|
           end
         end
       end
@@ -225,13 +237,13 @@ module OpenTox
 
       # redefine rdf serialization methods
       send :define_method, "to_#{format}".to_sym do
-        @metadata[RDF.type] = [RDF::OT.Dataset, RDF::OT.OrderedDataset]
+        @data[RDF.type] = [RDF::OT.Dataset, RDF::OT.OrderedDataset]
         create_rdf
-        @features.each_with_index do |feature,i|
+        @data["features"].each_with_index do |feature,i|
           @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Feature)]
           @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
         end
-        @compounds.each_with_index do |compound,i|
+        @data["compounds"].each_with_index do |compound,i|
           @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Compound)]
           if defined? @neighbors and neighbors.include? compound
             @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Neighbor)]
@@ -239,14 +251,14 @@ module OpenTox
 
           @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
           data_entry_node = RDF::Node.new
-          @rdf << [RDF::URI.new(@uri), RDF::URI.new(RDF::OT.dataEntry), data_entry_node]
+          @rdf << [RDF::URI.new(@data["uri"]), RDF::URI.new(RDF::OT.dataEntry), data_entry_node]
           @rdf << [data_entry_node, RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.DataEntry)]
           @rdf << [data_entry_node, RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
           @rdf << [data_entry_node, RDF::URI.new(RDF::OT.compound), RDF::URI.new(compound.uri)]
-          @data_entries[i].each_with_index do |value,j|
+          @data["data_entries"][i].each_with_index do |value,j|
             value_node = RDF::Node.new
             @rdf << [data_entry_node, RDF::URI.new(RDF::OT.values), value_node]
-            @rdf << [value_node, RDF::URI.new(RDF::OT.feature), RDF::URI.new(@features[j].uri)]
+            @rdf << [value_node, RDF::URI.new(RDF::OT.feature), RDF::URI.new(@data["features"][j].uri)]
             @rdf << [value_node, RDF::URI.new(RDF::OT.value), RDF::Literal.new(value)]
           end
         end
@@ -260,24 +272,24 @@ module OpenTox
 # TODO: fix bug that affects data_entry positions # DG: who wrotes this comment ?
     def to_ntriples # redefined string version for better performance
       ntriples = ""
-      @metadata[RDF.type] = [ RDF::OT.Dataset, RDF::OT.OrderedDataset ]
-      @metadata.each do |predicate,values|
+      @data[RDF.type] = [ RDF::OT.Dataset, RDF::OT.OrderedDataset ]
+      @data.each do |predicate,values|
         [values].flatten.each do |value|
           URI.valid?(value) ? value = "<#{value}>" : value = "\"#{value}\""
-          ntriples << "<#{@uri}> <#{predicate}> #{value} .\n" #\n"
+          ntriples << "<#{@data["uri"]}> <#{predicate}> #{value} .\n" #\n"
         end
       end
       @parameters.each_with_index do |parameter,i|
         p_node = "_:parameter"+ i.to_s
-        ntriples <<  "<#{@uri}> <#{RDF::OT.parameters}> #{p_node} .\n"
+        ntriples <<  "<#{@data["uri"]}> <#{RDF::OT.parameters}> #{p_node} .\n"
         ntriples <<  "#{p_node} <#{RDF.type}> <#{RDF::OT.Parameter}> .\n"
         parameter.each { |k,v| ntriples <<  "#{p_node} <#{k}> \"#{v.to_s.tr('"', '\'')}\" .\n" }
       end
-      @features.each_with_index do |feature,i|
+      @data["features"].each_with_index do |feature,i|
         ntriples <<  "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}> .\n"
         ntriples <<  "<#{feature.uri}> <#{RDF::OLO.index}> \"#{i}\"^^<http://www.w3.org/2001/XMLSchema#integer> .\n" # sorting at dataset service does not work without type information
       end
-      @compounds.each_with_index do |compound,i|
+      @data["compounds"].each_with_index do |compound,i|
         ntriples <<  "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Compound}> .\n"
         if defined? @neighbors and neighbors.include? compound
           ntriples <<  "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Neighbor}> .\n"
@@ -285,16 +297,16 @@ module OpenTox
 
         ntriples <<  "<#{compound.uri}> <#{RDF::OLO.index}> \"#{i}\"^^<http://www.w3.org/2001/XMLSchema#integer> .\n" # sorting at dataset service does not work without type information
         data_entry_node = "_:dataentry"+ i.to_s
-        ntriples <<  "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} .\n"
+        ntriples <<  "<#{@data["uri"]}> <#{RDF::OT.dataEntry}> #{data_entry_node} .\n"
         ntriples <<  "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> .\n"
         ntriples <<  "#{data_entry_node} <#{RDF::OLO.index}> \"#{i}\"^^<http://www.w3.org/2001/XMLSchema#integer> .\n" # sorting at dataset service does not work without type information
         ntriples <<  "#{data_entry_node} <#{RDF::OT.compound}> <#{compound.uri}> .\n"
-        @data_entries[i].each_with_index do |value,j|
+        @data["data_entries"][i].each_with_index do |value,j|
           value_node = data_entry_node+ "_value"+ j.to_s
           ntriples <<  "#{data_entry_node} <#{RDF::OT.values}> #{value_node} .\n"
-          ntriples <<  "#{value_node} <#{RDF::OT.feature}> <#{@features[j].uri}> .\n"
+          ntriples <<  "#{value_node} <#{RDF::OT.feature}> <#{@data["features"][j].uri}> .\n"
           ntriples <<  "#{value_node} <#{RDF::OT.value}> \"#{value}\" .\n"
-        end unless @data_entries[i].nil?
+        end unless @data["data_entries"][i].nil?
       end
       ntriples
 
@@ -361,7 +373,7 @@ module OpenTox
       unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound_uri)
         @cmp_indices = {}
         compounds().size.times do |i|
-          c = @compounds[i].uri
+          c = @data["compounds"][i].uri
           if @cmp_indices[c]==nil
             @cmp_indices[c] = [i]
           else
@@ -374,9 +386,9 @@ module OpenTox
 
     # returns compound feature value using the compound-index and the feature_uri
     def data_entry_value(compound_index, feature_uri)
-      data_entries(true) if @data_entries.empty?
-      col = @features.collect{|f| f.uri}.index feature_uri
-      @data_entries[compound_index] ?  @data_entries[compound_index][col] : nil
+      data_entries(true) if @data["data_entries"].empty?
+      col = @data["features"].collect{|f| f.uri}.index feature_uri
+      @data["data_entries"][compound_index] ?  @data["data_entries"][compound_index][col] : nil
     end
   end
 
-- 
cgit v1.2.3