summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2012-03-15 18:23:44 +0100
committerChristoph Helma <helma@in-silico.ch>2012-03-15 18:23:44 +0100
commit2fd7dcb2d011e3a2029de56f48aca5722685ee80 (patch)
tree74629c6dcb8d67014469e81ed13b1a1630b97f67
parent2a900b8d19887ca64061dc72a82aae4bdb449fb1 (diff)
dataset methods implemented
-rw-r--r--lib/dataset.rb323
-rw-r--r--lib/opentox-client.rb3
-rw-r--r--lib/opentox.rb36
-rw-r--r--test/feature.rb2
4 files changed, 43 insertions, 321 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 3de9d1f..8032533 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -1,318 +1,29 @@
module OpenTox
-
+
# Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
- # TODO: fix API Doc
class Dataset
- #include OpenTox
-
- #attr_reader :features, :compounds, :data_entries, :metadata
-
- # Create dataset with optional URI. Does not load data into the dataset - you will need to execute one of the load_* methods to pull data from a service or to insert it from other representations.
- # @example Create an empty dataset
- # dataset = OpenTox::Dataset.new
- # @example Create an empty dataset with URI
- # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
- # @param [optional, String] uri Dataset URI
- # @return [OpenTox::Dataset] Dataset object
- def initialize(uri=nil,subjectid=nil)
- super uri, subjectid
- @features = {}
- @compounds = []
- @data_entries = {}
- end
-
-=begin
- # Load YAML representation into the dataset
- # @param [String] yaml YAML representation of the dataset
- # @return [OpenTox::Dataset] Dataset object with YAML data
- def self.from_yaml service_uri, yaml, subjectid=nil
- Dataset.create(service_uri, subjectid).post yaml, :content_type => "application/x-yaml"
- end
-
- # Load RDF/XML representation from a file
- # @param [String] file File with RDF/XML representation of the dataset
- # @return [OpenTox::Dataset] Dataset object with RDF/XML data
- def self.from_rdfxml service_uri, rdfxml, subjectid=nil
- Dataset.create(service_uri, subjectid).post rdfxml, :content_type => "application/rdf+xml"
- end
-
- # Load CSV string (format specification: http://toxcreate.org/help)
- # - loads data_entries, compounds, features
- # - sets metadata (warnings) for parser errors
- # - you will have to set remaining metadata manually
- # @param [String] csv CSV representation of the dataset
- # @return [OpenTox::Dataset] Dataset object with CSV data
- def self.from_csv service_uri, csv, subjectid=nil
- Dataset.from_file(service_uri, csv, subjectid)
- end
-
- # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
- # - loads data_entries, compounds, features
- # - sets metadata (warnings) for parser errors
- # - you will have to set remaining metadata manually
- # @param [Excel] book Excel workbook object (created with roo gem)
- # @return [OpenTox::Dataset] Dataset object with Excel data
- def self.from_xls service_uri, xls, subjectid=nil
- Dataset.create(service_uri, subjectid).post xls, :content_type => "application/vnd.ms-excel"
- end
-
- def self.from_sdf service_uri, sdf, subjectid=nil
- Dataset.create(service_uri, subjectid).post sdf, :content_type => 'chemical/x-mdl-sdfile'
- end
-=end
-
- # Load all data (metadata, data_entries, compounds and features) from URI
- # TODO: move to opentox-server
- def data_entries reload=true
- if reload
- file = Tempfile.new("ot-rdfxml")
- file.puts get :accept => "application/rdf+xml"
- file.close
- to_delete = file.path
-
- data = {}
- feature_values = {}
- feature = {}
- feature_accept_values = {}
- other_statements = {}
- `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line|
- triple = line.chomp.split(' ',3)
- triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
- case triple[1]
- when /#{RDF::OT.values}|#{RDF::OT1.values}/i
- data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
- data[triple[0]][:values] << triple[2]
- when /#{RDF::OT.value}|#{RDF::OT1.value}/i
- feature_values[triple[0]] = triple[2]
- when /#{RDF::OT.compound}|#{RDF::OT1.compound}/i
- data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
- data[triple[0]][:compound] = triple[2]
- when /#{RDF::OT.feature}|#{RDF::OT1.feature}/i
- feature[triple[0]] = triple[2]
- when /#{RDF.type}/i
- if triple[2]=~/#{RDF::OT.Compound}|#{RDF::OT1.Compound}/i and !data[triple[0]]
- data[triple[0]] = {:compound => triple[0], :values => []}
- end
- when /#{RDF::OT.acceptValue}|#{RDF::OT1.acceptValue}/i # acceptValue in ambit datasets is only provided in dataset/<id> no in dataset/<id>/features
- feature_accept_values[triple[0]] = [] unless feature_accept_values[triple[0]]
- feature_accept_values[triple[0]] << triple[2]
- else
- end
- end
- File.delete(to_delete) if to_delete
- data.each do |id,entry|
- if entry[:values].size==0
- # no feature values add plain compounds
- @compounds << entry[:compound] unless @compounds.include? entry[:compound]
- else
- entry[:values].each do |value_id|
- if feature_values[value_id]
- split = feature_values[value_id].split(/\^\^/)
- case split[-1]
- when RDF::XSD.double, RDF::XSD.float
- value = split.first.to_f
- when RDF::XSD.boolean
- value = split.first=~/(?i)true/ ? true : false
- else
- value = split.first
- end
- end
- @compounds << entry[:compound] unless @compounds.include? entry[:compound]
- @features[feature[value_id][value_id]] = {} unless @features[feature[value_id]]
- @data_entries[entry[:compound].to_s] = {} unless @data_entries[entry[:compound].to_s]
- @data_entries[entry[:compound].to_s][feature[value_id]] = [] unless @data_entries[entry[:compound]][feature[value_id]]
- @data_entries[entry[:compound].to_s][feature[value_id]] << value if value!=nil
- end
- end
- end
- features subjectid
- #feature_accept_values.each do |feature, values|
- #self.features[feature][OT.acceptValue] = values
- #end
- self.metadata = metadata(subjectid)
- end
- @data_entries
- end
-
- # Load and return only compound URIs from the dataset service
- # @return [Array] Compound URIs in the dataset
- def compounds reload=true
- reload ? @compounds = Compound.all(File.join(@uri,"compounds")) : @compounds
- end
-
- # Load and return only features from the dataset service
- # @return [Hash] Features of the dataset
- def features reload=true
- reload ? @features = Feature.all(File.join(@uri,"features")) : @features
- end
-
-=begin
- # returns the accept_values of a feature, i.e. the classification domain / all possible feature values
- # @param [String] feature the URI of the feature
- # @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric)
- def accept_values(feature)
- load_features
- accept_values = features[feature][OT.acceptValue]
- accept_values.sort if accept_values
- accept_values
- end
-
- # Detect feature type(s) in the dataset
- # @return [String] `classification", "regression", "mixed" or unknown`
- def feature_type
- load_features
- feature_types = @features.collect{|f,metadata| metadata[RDF.type]}.flatten.uniq
- if feature_types.include?(OT.NominalFeature)
- "classification"
- elsif feature_types.include?(OT.NumericFeature)
- "regression"
- else
- "unknown"
- end
+ def data_entries
+ # TODO fix for api 1.2
+ data_entries = []
+ pull
+ @reload = false
+ metadata[RDF::OT1.dataEntry].collect{|data_entry|
+ data_entries << @rdf.to_hash[data_entry]
+ }
+ @reload = true
+ data_entries
end
-=end
- # Get Excel representation (alias for to_spreadsheet)
- # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded))
- def to_xls
- get :accept => "application/vnd.ms-excel"
+ def compounds
+ uri = File.join(@uri,"compounds")
+ RestClientWrapper.get(uri,{},{:accept => "text/uri-list", :subjectid => @subjectid}).split("\n").collect{|uri| OpenTox::Compound.new uri}
end
- # Get CSV string representation (data_entries only, metadata will be discarded)
- # @return [String] CSV representation
- def to_csv
- get :accept => "text/csv"
+ def features
+ uri = File.join(@uri,"features")
+ RestClientWrapper.get(uri,{},{:accept => "text/uri-list", :subjectid => @subjectid}).split("\n").collect{|uri| OpenTox::Feature.new uri}
end
- def to_sdf
- get :accept => 'chemical/x-mdl-sdfile'
- end
-
-
- # Get OWL-DL in ntriples format
- # @return [String] N-Triples representation
- def to_ntriples
- get :accept => "application/rdf+xml"
- end
-
- # Get OWL-DL in RDF/XML format
- # @return [String] RDF/XML representation
- def to_rdfxml
- get :accept => "application/rdf+xml"
- end
-
- # Get name (DC.title) of a feature
- # @param [String] feature Feature URI
- # @return [String] Feture title
- def feature_name(feature)
- features[feature][DC.title]
- end
-
- def title
- metadata[DC.title]
- end
-
- # Insert a statement (compound_uri,feature_uri,value)
- # @example Insert a statement (compound_uri,feature_uri,value)
- # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
- # @param [String] compound Compound URI
- # @param [String] feature Compound URI
- # @param [Boolean,Float] value Feature value
- def add (compound,feature,value)
- @compounds << compound unless @compounds.include? compound
- @features[feature] = {} unless @features[feature]
- @data_entries[compound] = {} unless @data_entries[compound]
- @data_entries[compound][feature] = [] unless @data_entries[compound][feature]
- @data_entries[compound][feature] << value if value!=nil
- end
-
- # Add a feature
- # @param [String] feature Feature URI
- # @param [Hash] metadata Hash with feature metadata
- def add_feature(feature,metadata={})
- @features[feature] = metadata
- end
-
- # Add/modify metadata for a feature
- # @param [String] feature Feature URI
- # @param [Hash] metadata Hash with feature metadata
- def add_feature_metadata(feature,metadata)
- metadata.each { |k,v| @features[feature][k] = v }
- end
-
- # Add a new compound
- # @param [String] compound Compound URI
- def add_compound (compound)
- @compounds << compound unless @compounds.include? compound
- end
-
- # Creates a new dataset, by splitting the current dataset, i.e. using only a subset of compounds and features
- # @param [Array] compounds List of compound URIs
- # @param [Array] features List of feature URIs
- # @param [Hash] metadata Hash containing the metadata for the new dataset
- # @param [String] subjectid
- # @return [OpenTox::Dataset] newly created dataset, already saved
- def split( compounds, features, metadata)
- LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds"
- raise "no new compounds selected" unless compounds and compounds.size>0
- dataset = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],@subjectid)
- if features.size==0
- compounds.each{ |c| dataset.add_compound(c) }
- else
- compounds.each do |c|
- features.each do |f|
- if @data_entries[c]==nil or @data_entries[c][f]==nil
- dataset.add(c,f,nil)
- else
- @data_entries[c][f].each do |v|
- dataset.add(c,f,v)
- end
- end
- end
- end
- end
- # set feature metadata in new dataset accordingly (including accept values)
- features.each do |f|
- self.features[f].each do |k,v|
- dataset.features[f][k] = v
- end
- end
- dataset.add_metadata(metadata)
- dataset.save
- dataset
- end
-
- # Save dataset at the dataset service
- # - creates a new dataset if uri is not set
- # - overwrites dataset if uri exists
- # @return [String] Dataset URI
- def save
- @compounds.uniq!
- # create dataset if uri is empty
- self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:subjectid => @subjectid}).to_s.chomp unless @uri
- if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
- RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => @subjectid})
- else
- s = Serializer::Owl.new
- s.add_dataset(self)
- RestClientWrapper.post(@uri, s.to_rdfxml,{:content_type => "application/rdf+xml" , :subjectid => @subjectid})
- end
- @uri
- end
-
- private
- # Copy a dataset (rewrites URI)
- def copy(dataset)
- @metadata = dataset.metadata
- @data_entries = dataset.data_entries
- @compounds = dataset.compounds
- @features = dataset.features
- if @uri
- self.uri = @uri
- else
- @uri = dataset.metadata[XSD.anyURI]
- end
- end
end
end
diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb
index e68fd7f..8616995 100644
--- a/lib/opentox-client.rb
+++ b/lib/opentox-client.rb
@@ -5,6 +5,7 @@ require 'rdf/raptor'
require "rest-client"
require 'uri'
require 'yaml'
+require 'json'
require 'logger'
# define constants and global variables
@@ -29,4 +30,4 @@ require File.join(File.dirname(__FILE__),"otlogger.rb") # avoid require conflict
require File.join(File.dirname(__FILE__),"opentox.rb")
require File.join(File.dirname(__FILE__),"task.rb")
require File.join(File.dirname(__FILE__),"compound.rb")
-#require File.join(File.dirname(__FILE__),"dataset.rb")
+require File.join(File.dirname(__FILE__),"dataset.rb")
diff --git a/lib/opentox.rb b/lib/opentox.rb
index 4338302..4b43547 100644
--- a/lib/opentox.rb
+++ b/lib/opentox.rb
@@ -4,7 +4,7 @@ $logger.level = Logger::DEBUG
module OpenTox
- attr_accessor :uri, :subjectid, :rdf, :response
+ attr_accessor :uri, :subjectid, :rdf, :response, :reload
# Ruby interface
@@ -15,38 +15,34 @@ module OpenTox
def initialize uri=nil, subjectid=nil
@uri = uri.to_s.chomp
@subjectid = subjectid
+ @reload = true
@rdf = RDF::Graph.new
end
# Load metadata from service
def pull
- kind_of?(OpenTox::Dataset) ? uri = File.join(@uri,"metadata") : uri = @uri
# TODO generic method for all formats
- parse_rdfxml RestClientWrapper.get(uri,{},{:accept => $default_rdf, :subjectid => @subjectid})
+ parse_rdfxml RestClientWrapper.get(@uri,{},{:accept => $default_rdf, :subjectid => @subjectid})
end
# Get object metadata
# @return [Hash] Metadata
- # TODO: rename to_hash? or store in object variables
def metadata
- pull # force update
- metadata = {}
- @rdf.query([RDF::URI.new(@uri),nil,nil]).collect do |statement|
- metadata[statement.predicate] ||= []
- metadata[statement.predicate] << statement.object
- end
- metadata.each{|k,v| metadata[k] = v.first if v.size == 1}
+ pull if @reload # force update
+ @rdf.to_hash[RDF::URI.new(@uri)]
end
# Get metadata values
# @param [RDF] Key from RDF Vocabularies
# @return [Array] Values for supplied key
def [](key)
- pull # force update
+ pull if @reload # force update
result = @rdf.query([RDF::URI.new(@uri),key,nil]).collect{|statement| statement.object}
- return nil if result.empty?
+ # TODO: convert to OpenTox objects??
+ return nil if result and result.empty?
return result.first.to_s if result.size == 1
return result.collect{|r| r.to_s}
+ result
end
# Save object at service
@@ -74,6 +70,20 @@ module OpenTox
end
end
+# def to_hash
+# hash = {}
+# metadata.each{|k,v| v.is_a?(Array) ? hash[k.to_s] = v.collect{|i| i.to_s} : hash[k.to_s] = v.to_s}
+# hash
+# end
+
+ def to_yaml
+ @rdf.to_hash.to_yaml
+ end
+
+ def to_json
+ to_hash.to_json
+ end
+
# REST API
def get headers={}
headers[:subjectid] ||= @subjectid
diff --git a/test/feature.rb b/test/feature.rb
index 8983c11..f37f298 100644
--- a/test/feature.rb
+++ b/test/feature.rb
@@ -17,7 +17,7 @@ class FeatureTest < Test::Unit::TestCase
@features.each do |uri|
f = OpenTox::Feature.new(uri)
assert_equal RDF::OT1.TUM_CDK_nAtom, f[RDF::OWL.sameAs]
- assert_equal RDF::OT1.TUM_CDK_nAtom, f.metadata[RDF::OWL.sameAs]
+ assert_equal RDF::OT1.TUM_CDK_nAtom, f.metadata[RDF::OWL.sameAs].first.to_s
assert_equal [RDF::OT1.Feature,RDF::OT1.NumericFeature].sort, f[RDF.type].sort
end
end