From 7e547fd4a296f497615a7805d565b378cb1bd7cd Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Wed, 14 Nov 2018 17:33:44 +0100 Subject: bad_request_error substituted with ArgumentError --- lib/crossvalidation.rb | 2 +- lib/dataset.rb | 8 +-- lib/enm-import.rb | 125 ++++++++++++++++++++++++++++++++++++++++ lib/error.rb | 66 --------------------- lib/import.rb~ | 125 ---------------------------------------- lib/lazar.rb | 2 +- lib/leave-one-out-validation.rb | 2 +- lib/model.rb | 16 ++--- lib/overwrite.rb | 2 +- lib/rest-client-wrapper.rb | 10 +++- lib/validation-statistics.rb | 2 +- 11 files changed, 149 insertions(+), 211 deletions(-) create mode 100644 lib/enm-import.rb delete mode 100644 lib/error.rb delete mode 100644 lib/import.rb~ (limited to 'lib') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index c866ebc..8719dca 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -15,7 +15,7 @@ module OpenTox $logger.debug model.algorithms klass = ClassificationCrossValidation if model.is_a? Model::LazarClassification klass = RegressionCrossValidation if model.is_a? Model::LazarRegression - bad_request_error "Unknown model class #{model.class}." unless klass + raise ArgumentError, "Unknown model class #{model.class}." unless klass cv = klass.new( name: model.name, diff --git a/lib/dataset.rb b/lib/dataset.rb index 90b4993..3979105 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -142,7 +142,7 @@ module OpenTox dataset = self.new(:source => file, :name => File.basename(file,".*"), :md5 => md5) dataset.parse_table table else - bad_request_error "#{file} is not a valid CSV/TSV file. Could not find "," ";" or TAB as column separator." + raise ArgumentError, "#{file} is not a valid CSV/TSV file. Could not find "," ";" or TAB as column separator." end end dataset @@ -251,7 +251,7 @@ module OpenTox # features feature_names = table.shift.collect{|f| f.strip} - bad_request_error "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size + raise ArgumentError, "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size if feature_names[0] =~ /ID/i # check ID column original_id = OriginalId.find_or_create_by(:dataset_id => self.id,:name => feature_names.shift) @@ -260,7 +260,7 @@ module OpenTox end compound_format = feature_names.shift - bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i + raise ArgumentError, "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i original_smiles = OriginalSmiles.find_or_create_by(:dataset_id => self.id) if compound_format.match(/SMILES/i) numeric = [] @@ -473,7 +473,7 @@ module OpenTox merged_feature = MergedNumericBioActivity.find_or_create_by(:name => features.collect{|f| f.name} + " merged", :original_feature_ids => features.collect{|f| f.id}) # TODO: regression transformations end else - bad_request_error "Cannot merge features of different types (#{feature_classes})." + raise ArgumentError, "Cannot merge features of different types (#{feature_classes})." end accept_values = [] diff --git a/lib/enm-import.rb b/lib/enm-import.rb new file mode 100644 index 0000000..cf1a26f --- /dev/null +++ b/lib/enm-import.rb @@ -0,0 +1,125 @@ +module OpenTox + + # Import data from external databases + module Import + + class Enanomapper + include OpenTox + + # Import from eNanoMapper + def self.import + # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%) + datasets = {} + bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle', {}, {accept: :json}))["dataset"] + bundles.each do |bundle| + datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"].strip) + $logger.debug bundle["title"].strip + nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"], {}, {accept: :json}))["dataEntry"] + nanoparticles.each_with_index do |np,n| + core_id = nil + coating_ids = [] + np["composition"].each do |c| + uri = c["component"]["compound"]["URI"] + data = JSON.parse(RestClientWrapper.get("https://data.enanomapper.net/query/compound/url/all?search=#{uri}", {}, {accept: :json})) + source = data["dataEntry"][0]["compound"]["URI"] + smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"] + names = [] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"] + if smiles + compound = Compound.find_or_create_by(:smiles => smiles) + compound.name = names.first + compound.names = names.compact + else + compound = Compound.find_or_create_by(:name => names.first,:names => names.compact) + end + compound.source = source + compound.save + if c["relation"] == "HAS_CORE" + core_id = compound.id.to_s + elsif c["relation"] == "HAS_COATING" + coating_ids << compound.id.to_s + end + end if np["composition"] + nanoparticle = Nanoparticle.find_or_create_by( + :name => np["values"]["https://data.enanomapper.net/identifier/name"], + :source => np["compound"]["URI"], + :core_id => core_id, + :coating_ids => coating_ids + ) + #np["bundles"].keys.each do |bundle_uri| + #nanoparticle.dataset_ids << datasets[bundle_uri].id + #end + + studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study"), {}, {accept: :json}))["study"] + studies.each do |study| + dataset = datasets[np["bundles"].keys.first] + proteomics_features = {} + category = study["protocol"]["topcategory"] + source = study["protocol"]["category"]["term"] + study["effects"].each do |effect| + + effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature + effect["conditions"].delete_if { |k, v| v.nil? } + + if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data + + JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step + proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true) + nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset + end + else + name = effect["endpoint"] + unit = effect["result"]["unit"] + warnings = [] + case name + when "Log2 transformed" # use a sensible name + name = "log2(Net cell association)" + warnings = ["Original name was 'Log2 transformed'"] + unit = "log2(mL/ug(Mg))" + when "Total protein (BCA assay)" + category = "P-CHEM" + warnings = ["Category changed from TOX to P-CHEM"] + end + feature = klass.find_or_create_by( + :name => name, + :unit => unit, + :category => category, + :conditions => effect["conditions"], + :source => study["protocol"]["category"]["term"], + :measured => true, + :warnings => warnings + ) + nanoparticle.parse_ambit_value feature, effect["result"], dataset + end + end + end + nanoparticle.save + print "#{n}, " + end + puts + end + datasets.each { |u,d| d.save } + end + +=begin + def self.import_ld # defunct, AMBIT JSON_LD does not have substance entries + #get list of bundle URIs + bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] + datasets = [] + bundles.each do |bundle| + uri = bundle["URI"] + study = JSON.parse(`curl -H 'Accept:application/ld+json' '#{uri}/substance'`) + study["@graph"].each do |i| + puts i.to_yaml if i.keys.include? "sio:has-value" + end + end + datasets.collect{|d| d.id} + end +=end + + end + + end + +end diff --git a/lib/error.rb b/lib/error.rb deleted file mode 100644 index 39b3c76..0000000 --- a/lib/error.rb +++ /dev/null @@ -1,66 +0,0 @@ -module OpenToxError - attr_accessor :http_code, :message, :cause - def initialize message=nil - message = message.to_s.gsub(/\A"|"\Z/, '') if message # remove quotes - super message - @http_code ||= 500 - @message = message.to_s - @cause = cut_backtrace(caller) - $logger.error("\n"+JSON.pretty_generate({ - :http_code => @http_code, - :message => @message, - :cause => @cause - })) - end - - def cut_backtrace(trace) - if trace.is_a?(Array) - cut_index = trace.find_index{|line| line.match(/sinatra|minitest/)} - cut_index ||= trace.size - cut_index -= 1 - cut_index = trace.size-1 if cut_index < 0 - trace[0..cut_index] - else - trace - end - end - -end - -class RuntimeError - include OpenToxError -end - -# clutters log file with library errors -#class NoMethodError - #include OpenToxError -#end - -module OpenTox - - class Error < RuntimeError - include OpenToxError - - def initialize(code, message=nil) - @http_code = code - super message - end - end - - # OpenTox errors - RestClientWrapper.known_errors.each do |error| - # create error classes - c = Class.new Error do - define_method :initialize do |message=nil| - super error[:code], message - end - end - OpenTox.const_set error[:class],c - - # define global methods for raising errors, eg. bad_request_error - Object.send(:define_method, error[:method]) do |message| - raise c.new(message) - end - end - -end diff --git a/lib/import.rb~ b/lib/import.rb~ deleted file mode 100644 index cf1a26f..0000000 --- a/lib/import.rb~ +++ /dev/null @@ -1,125 +0,0 @@ -module OpenTox - - # Import data from external databases - module Import - - class Enanomapper - include OpenTox - - # Import from eNanoMapper - def self.import - # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%) - datasets = {} - bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle', {}, {accept: :json}))["dataset"] - bundles.each do |bundle| - datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"].strip) - $logger.debug bundle["title"].strip - nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"], {}, {accept: :json}))["dataEntry"] - nanoparticles.each_with_index do |np,n| - core_id = nil - coating_ids = [] - np["composition"].each do |c| - uri = c["component"]["compound"]["URI"] - data = JSON.parse(RestClientWrapper.get("https://data.enanomapper.net/query/compound/url/all?search=#{uri}", {}, {accept: :json})) - source = data["dataEntry"][0]["compound"]["URI"] - smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"] - names = [] - names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"] - if smiles - compound = Compound.find_or_create_by(:smiles => smiles) - compound.name = names.first - compound.names = names.compact - else - compound = Compound.find_or_create_by(:name => names.first,:names => names.compact) - end - compound.source = source - compound.save - if c["relation"] == "HAS_CORE" - core_id = compound.id.to_s - elsif c["relation"] == "HAS_COATING" - coating_ids << compound.id.to_s - end - end if np["composition"] - nanoparticle = Nanoparticle.find_or_create_by( - :name => np["values"]["https://data.enanomapper.net/identifier/name"], - :source => np["compound"]["URI"], - :core_id => core_id, - :coating_ids => coating_ids - ) - #np["bundles"].keys.each do |bundle_uri| - #nanoparticle.dataset_ids << datasets[bundle_uri].id - #end - - studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study"), {}, {accept: :json}))["study"] - studies.each do |study| - dataset = datasets[np["bundles"].keys.first] - proteomics_features = {} - category = study["protocol"]["topcategory"] - source = study["protocol"]["category"]["term"] - study["effects"].each do |effect| - - effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature - effect["conditions"].delete_if { |k, v| v.nil? } - - if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data - - JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step - proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true) - nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset - end - else - name = effect["endpoint"] - unit = effect["result"]["unit"] - warnings = [] - case name - when "Log2 transformed" # use a sensible name - name = "log2(Net cell association)" - warnings = ["Original name was 'Log2 transformed'"] - unit = "log2(mL/ug(Mg))" - when "Total protein (BCA assay)" - category = "P-CHEM" - warnings = ["Category changed from TOX to P-CHEM"] - end - feature = klass.find_or_create_by( - :name => name, - :unit => unit, - :category => category, - :conditions => effect["conditions"], - :source => study["protocol"]["category"]["term"], - :measured => true, - :warnings => warnings - ) - nanoparticle.parse_ambit_value feature, effect["result"], dataset - end - end - end - nanoparticle.save - print "#{n}, " - end - puts - end - datasets.each { |u,d| d.save } - end - -=begin - def self.import_ld # defunct, AMBIT JSON_LD does not have substance entries - #get list of bundle URIs - bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] - datasets = [] - bundles.each do |bundle| - uri = bundle["URI"] - study = JSON.parse(`curl -H 'Accept:application/ld+json' '#{uri}/substance'`) - study["@graph"].each do |i| - puts i.to_yaml if i.keys.include? "sio:has-value" - end - end - datasets.collect{|d| d.id} - end -=end - - end - - end - -end diff --git a/lib/lazar.rb b/lib/lazar.rb index c3bbbf3..a077626 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -77,7 +77,7 @@ CLASSES = ["Feature","Substance","Dataset","CrossValidation","LeaveOneOutValidat [ # be aware of the require sequence as it affects class/method overwrites "overwrite.rb", "rest-client-wrapper.rb", - "error.rb", + #"error.rb", "opentox.rb", "feature.rb", "physchem.rb", diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index d37b6ce..7d73b89 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -9,7 +9,7 @@ module OpenTox # @param [OpenTox::Model::Lazar] # @return [OpenTox::Validation::LeaveOneOut] def self.create model - bad_request_error "Cannot create leave one out validation for models with supervised feature selection. Please use crossvalidation instead." if model.algorithms[:feature_selection] + raise ArgumentError, "Cannot create leave one out validation for models with supervised feature selection. Please use crossvalidation instead." if model.algorithms[:feature_selection] $logger.debug "#{model.name}: LOO validation started" t = Time.now model.training_dataset.features.collect{|f| f.class}.include?(NominalBioActivity) ? klass = ClassificationLeaveOneOut : klass = RegressionLeaveOneOut diff --git a/lib/model.rb b/lib/model.rb index db69120..caf8a6e 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -36,7 +36,7 @@ module OpenTox # # @return [OpenTox::Model::Lazar] def self.create prediction_feature:nil, training_dataset:, algorithms:{} - bad_request_error "Please provide a training_dataset and a optional prediction_feature." unless prediction_feature or training_dataset + raise ArgumentError, "Please provide a training_dataset and a optional prediction_feature." unless prediction_feature or training_dataset prediction_feature ||= training_dataset.features.select{|f| f.is_a? NumericBioActivity or f.is_a? NominalBioActivity}.first unless prediction_feature # guess model type @@ -62,7 +62,7 @@ module OpenTox # set defaults# substance_classes = training_dataset.substances.collect{|s| s.class.to_s}.uniq - bad_request_error "Cannot create models for mixed substance classes '#{substance_classes.join ', '}'." unless substance_classes.size == 1 + raise ArgumentError, "Cannot create models for mixed substance classes '#{substance_classes.join ', '}'." unless substance_classes.size == 1 if substance_classes.first == "OpenTox::Compound" @@ -110,7 +110,7 @@ module OpenTox }, } else - bad_request_error "Cannot create models for #{substance_classes.first}." + raise ArgumentError, "Cannot create models for #{substance_classes.first}." end # overwrite defaults with explicit parameters @@ -175,7 +175,7 @@ module OpenTox model.descriptor_ids = feature_ids & property_ids model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}} else - bad_request_error "Descriptor method '#{descriptor_method}' not implemented." + raise ArgumentError, "Descriptor method '#{descriptor_method}' not implemented." end if model.algorithms[:feature_selection] and model.algorithms[:feature_selection][:method] @@ -224,7 +224,7 @@ module OpenTox end end else - bad_request_error "Unknown descriptor type '#{descriptors}' for similarity method '#{similarity[:method]}'." + raise ArgumentError, "Unknown descriptor type '#{descriptors}' for similarity method '#{similarity[:method]}'." end prediction ||= {:warnings => [], :measurements => []} @@ -300,7 +300,7 @@ module OpenTox elsif object.is_a? Dataset substances = object.substances else - bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Substances or an OpenTox::Dataset as parameter." + raise ArgumentError, "Please provide a OpenTox::Compound an Array of OpenTox::Substances or an OpenTox::Dataset as parameter." end # make predictions @@ -486,7 +486,7 @@ module OpenTox # @return [OpenTox::Model::Validation] lazar model with five independent 10-fold crossvalidations def self.from_csv_file file metadata_file = file.sub(/csv$/,"json") - bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file + raise ArgumentError, "No metadata file #{metadata_file}" unless File.exist? metadata_file model_validation = self.new JSON.parse(File.read(metadata_file)) training_dataset = Dataset.from_csv_file file model = Lazar.create training_dataset: training_dataset @@ -510,7 +510,7 @@ module OpenTox unless training_dataset # try to import Import::Enanomapper.import training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - bad_request_error "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset + raise ArgumentError, "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset end prediction_feature ||= Feature.where(name: "log2(Net cell association)", category: "TOX").first diff --git a/lib/overwrite.rb b/lib/overwrite.rb index 0dd1c8a..d482902 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -84,7 +84,7 @@ class String def to_boolean return true if self == true || self =~ (/(true|t|yes|y|1)$/i) return false if self == false || self.nil? || self =~ (/(false|f|no|n|0)$/i) - bad_request_error "invalid value for Boolean: \"#{self}\"" + raise ArgumentError, "invalid value for Boolean: \"#{self}\"" end end diff --git a/lib/rest-client-wrapper.rb b/lib/rest-client-wrapper.rb index c9fd40f..db23e66 100644 --- a/lib/rest-client-wrapper.rb +++ b/lib/rest-client-wrapper.rb @@ -28,14 +28,14 @@ module OpenTox uri = Addressable::URI.encode(uri) # check input - bad_request_error "Headers are not a hash: #{headers.inspect} for #{uri}." unless headers==nil or headers.is_a?(Hash) + raise ArgumentError, "Headers are not a hash: #{headers.inspect} for #{uri}." unless headers==nil or headers.is_a?(Hash) headers[:subjectid] ||= @@subjectid - bad_request_error "Invalid URI: '#{uri}'" unless URI.valid? uri + raise ArgumentError, "Invalid URI: '#{uri}'" unless URI.valid? uri # make sure that no header parameters are set in the payload [:accept,:content_type,:subjectid].each do |header| if defined? $aa || URI(uri).host == URI($aa[:uri]).host else - bad_request_error "#{header} should be submitted in the headers of URI: #{uri}" if payload and payload.is_a?(Hash) and payload[header] + raise ArgumentError, "#{header} should be submitted in the headers of URI: #{uri}" if payload and payload.is_a?(Hash) and payload[header] end end @@ -56,6 +56,7 @@ module OpenTox @response = @request.execute do |response, request, result| if [301, 302, 307].include? response.code and request.method == :get response.follow_redirection(request, result) +=begin elsif response.code >= 400 and !URI.task?(uri) error = known_errors.collect{|e| e if e[:code] == response.code}.compact.first begin # errors are returned as error reports in json, try to parse @@ -68,6 +69,7 @@ module OpenTox cause = nil end Object.method(error[:method]).call "#{msg}, #{uri}, #{cause}" # call error method +=end else response end @@ -75,6 +77,7 @@ module OpenTox end end +=begin #@return [Array] of hashes with error code, method and class def self.known_errors errors = [] @@ -88,6 +91,7 @@ module OpenTox end errors end +=end end end diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index ad4c14d..f3e3af8 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -242,7 +242,7 @@ module OpenTox title = "log2(Net cell association [mL/ug(Mg)])" else title = feature.name - title += " [#{feature.unit}]" if feature.unit and !feature.unit.blank? + title += "-log10(#{feature.unit})" if feature.unit and !feature.unit.blank? end R.eval "image = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)" R.eval "image = image + geom_abline(intercept=0, slope=1)" -- cgit v1.2.3