summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-10-22 17:31:35 +0200
committerAndreas Maunz <andreas@maunz.de>2012-10-22 17:31:35 +0200
commit3b81bead90b64cad999a7611e57e347eb0e8d0fe (patch)
treea92b1be3e9f790833b0d74370c243c6f6fa428ff
parent8b908fd2b6162785056c3d6234d438737ab0b6d4 (diff)
Added feature finding for csv upload
-rw-r--r--application.rb190
-rw-r--r--lib/utils/shims/feature.rb64
2 files changed, 173 insertions, 81 deletions
diff --git a/application.rb b/application.rb
index ce267f5..c6f2ccc 100644
--- a/application.rb
+++ b/application.rb
@@ -152,95 +152,123 @@ module OpenTox
dataset.to_ntriples
=end
- @warnings = []
- ntriples = ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.Dataset}>."]
- ntriples << ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.OrderedDataset}>."]
+ begin
+ @warnings = []
+ ntriples = ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.Dataset}>."]
+ ntriples << ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.OrderedDataset}>."]
+
+ # features
+ feature_names = table.shift.collect{|f| f.strip}
+ @warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size
+ compound_format = feature_names.shift.strip
+ bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: URI, SMILES, InChI." unless compound_format =~ /URI|URL|SMILES|InChI/i
+ features = []
+ ignored_feature_indices = []
+ feature_names.each_with_index do |f,i|
+ feature_existing = OpenTox::Feature.find_by_title(f,{})
+ feature_new = OpenTox::Feature.new File.join($feature[:uri], SecureRandom.uuid)
+ feature_new[RDF::DC.title] = f
+ values = table.collect{|row| val=row[i+1]; val.strip! unless val.nil?; val }.uniq.compact
+ types = values.collect{|v| feature_type(v)}.uniq
+ if values.size == 0
+ # AM: 'Empty' feature
+ elsif values.size <= 5 # max classes
+ feature_new.append RDF.type, [ RDF::OT.NominalFeature, RDF::OT.StringFeature ]
+ feature_new.append RDF::OT.acceptValue, values
+ end
+ if types.size == 1 and types[0] == RDF::OT.NumericFeature
+ feature_new.append RDF.type, RDF::OT.NumericFeature
+ else
+ feature_new.append RDF.type, [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] # only nominal type for mixed cases
+ feature_new.append RDF::OT.acceptValue, values
+ end
- # features
- feature_names = table.shift.collect{|f| f.strip}
- @warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size
- compound_format = feature_names.shift.strip
- bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: URI, SMILES, InChI." unless compound_format =~ /URI|URL|SMILES|InChI/i
- features = []
- ignored_feature_indices = []
- feature_names.each_with_index do |f,i|
- feature = OpenTox::Feature.new File.join($feature[:uri], SecureRandom.uuid)
- feature[RDF::DC.title] = f
- features << feature
- values = table.collect{|row| val=row[i+1]; val.strip! unless val.nil?; val }.uniq.compact
- types = values.collect{|v| feature_type(v)}.uniq
- if values.size == 0
- elsif values.size <= 5 # max classes
- feature.append RDF.type, RDF::OT.NominalFeature
- feature.append RDF.type, RDF::OT.StringFeature
- feature[RDF::OT.acceptValue] = values
- end
- if types.size == 1 and types[0] == RDF::OT.NumericFeature
- feature.append RDF.type, RDF::OT.NumericFeature
- else
- feature.append RDF.type, RDF::OT.NominalFeature # only nominal type for mixed cases
- feature.append RDF.type, RDF::OT.StringFeature
- feature[RDF::OT.acceptValue] = values
- end
- feature.put
- ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}>."
- ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> #{i} ."
- end
+ # Check for equality of features
+ features_equal = true
+ if feature_new and feature_existing
+ [ RDF.type, RDF::OT.acceptValue ].each { |predicate|
+ unless (
+ ( feature_new[predicate].nil? and
+ feature_existing[predicate].nil? ) or
+ ( feature_new[predicate] and
+ feature_existing[predicate] and
+ feature_new[predicate].sort == feature_existing[predicate].sort )
+ )
+ features_equal = false
+ end
+ }
+ end
- # compounds and values
- compound_uris = []
- table.each_with_index do |values,j|
- compound = values.shift
- begin
- case compound_format
- when /URI|URL/i
- compound_uri = compound
- when /SMILES/i
- compound_uri = OpenTox::Compound.from_smiles($compound[:uri], compound).uri
- when /InChI/i
- compound_uri = OpenTox::Compound.from_inchi($compound[:uri], compound).uri
+ if features_equal
+ features << feature_existing
+ feature = feature_existing
+ else
+ features << feature_new
+ feature_new.put
+ feature = feature_new
end
- rescue
- @warnings << "Cannot parse compound \"#{compound}\" at position #{j+2}, all entries are ignored."
- next
- end
- unless compound_uri.match(/InChI=/)
- @warnings << "Cannot parse compound \"#{compound}\" at position #{j+2}, all entries are ignored."
- next
- end
- compound_uris << compound_uri
- unless values.size == features.size
- @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored."
- next
+ ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}>."
+ ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> #{i} ."
end
- ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>."
- ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ."
-
- values.each_with_index do |v,i|
- #@warnings << "Empty value for compound #{compound} (row #{j+2}) and feature \"#{feature_names[i]}\" (column #{i+2})." if v.blank?
- #@warnings << "Empty value in row #{j+2}, column #{i+2} (feature \"#{feature_names[i]}\")." if v.blank?
-
- data_entry_node = "_:dataentry"+ j.to_s
- value_node = data_entry_node+ "_value"+ i.to_s
- ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ."
- ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ."
- ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ."
- ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ."
- ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ."
- ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ."
- ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ."
+ # compounds and values
+ compound_uris = []
+ table.each_with_index do |values,j|
+ compound = values.shift
+ begin
+ case compound_format
+ when /URI|URL/i
+ compound_uri = compound
+ when /SMILES/i
+ compound_uri = OpenTox::Compound.from_smiles($compound[:uri], compound).uri
+ when /InChI/i
+ compound_uri = OpenTox::Compound.from_inchi($compound[:uri], compound).uri
+ end
+ rescue
+ @warnings << "Cannot parse compound \"#{compound}\" at position #{j+2}, all entries are ignored."
+ next
+ end
+ unless compound_uri.match(/InChI=/)
+ @warnings << "Cannot parse compound \"#{compound}\" at position #{j+2}, all entries are ignored."
+ next
+ end
+ compound_uris << compound_uri
+ unless values.size == features.size
+ @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored."
+ next
+ end
+ ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>."
+ ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ."
+
+ values.each_with_index do |v,i|
+ #@warnings << "Empty value for compound #{compound} (row #{j+2}) and feature \"#{feature_names[i]}\" (column #{i+2})." if v.blank?
+ #@warnings << "Empty value in row #{j+2}, column #{i+2} (feature \"#{feature_names[i]}\")." if v.blank?
+
+ data_entry_node = "_:dataentry"+ j.to_s
+ value_node = data_entry_node+ "_value"+ i.to_s
+ ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ."
+ ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ."
+ ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ."
+ ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ."
+ ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ."
+ ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ."
+ ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ."
+
+ end
+
+ end
+ compound_uris.duplicates.each do |uri|
+ positions = []
+ compound_uris.each_with_index{|c,i| positions << i+1 if c == uri}
+ @warnings << "Duplicate compound #{uri} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
end
+ ntriples << "<#{@uri}> <#{RDF::OT.Warnings}> \"#{@warnings.join('\n')}\" ."
+ ntriples.join("\n")
+ rescue Exception => e
+ $logger.debug "#{e.class}: #{e.message}"
+ $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
end
- compound_uris.duplicates.each do |uri|
- positions = []
- compound_uris.each_with_index{|c,i| positions << i+1 if c == uri}
- @warnings << "Duplicate compound #{uri} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
- end
-
- ntriples << "<#{@uri}> <#{RDF::OT.Warnings}> \"#{@warnings.join('\n')}\" ."
- ntriples.join("\n")
=begin
=end
end
diff --git a/lib/utils/shims/feature.rb b/lib/utils/shims/feature.rb
new file mode 100644
index 0000000..d576872
--- /dev/null
+++ b/lib/utils/shims/feature.rb
@@ -0,0 +1,64 @@
+# Shims for translation to the new architecture (TM).
+# Author: Andreas Maunz, 2012
+
+module OpenTox
+
+ # Shims for the feature class
+ class Feature
+
+ # Load a feature from URI
+ # @param [String] Feature URI
+ # @return [OpenTox::Feature] Feature object with the full data
+ def self.find(uri, subjectid=nil)
+ return nil unless uri
+ f = OpenTox::Feature.new uri, subjectid
+ f.get
+ f
+ end
+
+ # Load a feature given its title. create it if not present, using metadata.
+ # When metadata is empty, nil is returned
+ # @param[String] title Feature title
+ # @param[Hash] metadata Feature metadata
+ # @return [OpenTox::Feature] Feature object with the full data, or nil, if not found
+ def self.find_by_title(title, metadata)
+ feature_uri = nil
+ sparql = "SELECT DISTINCT ?feature WHERE { ?feature <#{RDF.type}> <#{RDF::OT['feature'.capitalize]}>. ?feature <#{RDF::DC.title}> '#{title.to_s}' }"
+ feature_uri = OpenTox::Backend::FourStore.query(sparql,"text/uri-list").split("\n").first # is nil for non-existing feature
+ if feature_uri.nil? and metadata.size>0
+ feature = OpenTox::Feature.new feature_uri, @subjectid
+ feature.title = title
+ feature.metadata = metadata
+ feature.put
+ else
+ feature = OpenTox::Feature.find(feature_uri, @subjectid)
+ end
+ feature
+ end
+
+ # Find out feature type
+ # Classification takes precedence
+ # @return [String] Feature type
+ def feature_type
+ bad_request_error "rdf type of feature '#{@uri}' not set" unless self[RDF.type]
+ if self[RDF.type].include?(OT.NominalFeature)
+ "classification"
+ elsif [RDF.type].to_a.flatten.include?(OT.NumericFeature)
+ "regression"
+ else
+ "unknown"
+ end
+ end
+
+ # Get accept values
+ # @param[String] Feature URI
+ # @return[Array] Accept values
+ def accept_values
+ accept_values = self[OT.acceptValue]
+ accept_values.sort if accept_values
+ accept_values
+ end
+
+ end
+
+end