From 9750e0309500259e9a56e267ce87984fb5bb5e53 Mon Sep 17 00:00:00 2001 From: gebele Date: Mon, 26 Nov 2018 15:29:26 +0000 Subject: clean out; better response codes; prepare for batch --- lib/aa.rb | 82 -------------------------------- lib/compound.rb | 48 ++++++++++--------- lib/dataset.rb | 122 ++++++++++++++++++++++++++++++++++++++++++------ lib/lazar-rest.rb | 69 --------------------------- lib/model.rb | 131 ++++++++++++++++++++++++++++++++++++++++++++++++---- lib/nanoparticle.rb | 30 ------------ lib/substance.rb | 24 ++++++---- 7 files changed, 270 insertions(+), 236 deletions(-) delete mode 100644 lib/aa.rb delete mode 100644 lib/lazar-rest.rb delete mode 100644 lib/nanoparticle.rb (limited to 'lib') diff --git a/lib/aa.rb b/lib/aa.rb deleted file mode 100644 index 6dfec4b..0000000 --- a/lib/aa.rb +++ /dev/null @@ -1,82 +0,0 @@ -post "/aa/authenticate/?" do - mime_types = ["text/plain"] - bad_request_error "Mime type #{@accept} not supported here. Please request data as #{mime_types.join(', ')}." unless mime_types.include? @accept - bad_request_error "Please send formdata username." unless params[:username] - bad_request_error "Please send formdata password." unless params[:password] - case @accept - when "text/plain" - if OpenTox::Authorization.authenticate(params[:username], params[:password]) - return OpenTox::RestClientWrapper.subjectid - else - return nil - end - else - bad_request_error "'#{@accept}' is not a supported content type." - end -end - -post "/aa/logout/?" do - mime_types = ["text/plain"] - bad_request_error "Mime type #{@accept} not supported here. Please request data as #{mime_types.join(', ')}." unless mime_types.include? @accept - bad_request_error "Please send formdata subjectid." unless params[:subjectid] - case @accept - when "text/plain" - if OpenTox::Authorization.logout(params[:subjectid]) - return "Successfully logged out. \n" - else - return "Logout failed.\n" - end - else - bad_request_error "'#{@accept}' is not a supported content type." - end -end - -module OpenTox - - AA = "https://opensso.in-silico.ch" - - module Authorization - #Authentication against OpenSSO. Returns token. Requires Username and Password. - # @param user [String] Username - # @param pw [String] Password - # @return [Boolean] true if successful - def self.authenticate(user, pw) - begin - res = RestClientWrapper.post("#{AA}/auth/authenticate",{:username=>user, :password => pw},{:subjectid => ""}).sub("token.id=","").sub("\n","") - if is_token_valid(res) - RestClientWrapper.subjectid = res - return true - else - bad_request_error "Authentication failed #{res.inspect}" - end - rescue - bad_request_error "Authentication failed #{res.inspect}" - end - end - - #Logout on opensso. Make token invalid. Requires token - # @param [String] subjectid the subjectid - # @return [Boolean] true if logout is OK - def self.logout(subjectid=RestClientWrapper.subjectid) - begin - out = RestClientWrapper.post("#{AA}/auth/logout", :subjectid => subjectid) - return true unless is_token_valid(subjectid) - rescue - return false - end - return false - end - - #Checks if a token is a valid token - # @param [String]subjectid subjectid from openSSO session - # @return [Boolean] subjectid is valid or not. - def self.is_token_valid(subjectid=RestClientWrapper.subjectid) - begin - return true if RestClientWrapper.post("#{AA}/auth/isTokenValid",:tokenid => subjectid) == "boolean=true\n" - rescue #do rescue because openSSO throws 401 - return false - end - return false - end - end -end \ No newline at end of file diff --git a/lib/compound.rb b/lib/compound.rb index 01ba036..77948ab 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -38,27 +38,31 @@ post "/compound/descriptor/?" do end end -get %r{/compound/(.+)} do |inchi| - bad_request_error "Input parameter #{inchi} is not an InChI" unless inchi.match(/^InChI=/) - compound = Compound.from_inchi URI.unescape(inchi) - response['Content-Type'] = @accept - case @accept - when "application/json" - return JSON.pretty_generate JSON.parse(compound.to_json) - when "chemical/x-daylight-smiles" - return compound.smiles - when "chemical/x-inchi" - return compound.inchi - when "chemical/x-mdl-sdfile" - return compound.sdf - when "chemical/x-mdl-molfile" - when "image/png" - return compound.png - when "image/svg+xml" - return compound.svg - when "text/plain" - return "#{compound.names}\n" +get %r{/compound/(InChI.+)} do |input| + compound = Compound.from_inchi URI.unescape(input) + if compound + response['Content-Type'] = @accept + case @accept + when "application/json" + c = {"compound": {"id": compound.id, "inchi": compound.inchi, "smiles": compound.smiles, "warnings": compound.warnings}} + return JSON.pretty_generate JSON.parse(c.to_json) + when "chemical/x-daylight-smiles" + return compound.smiles + when "chemical/x-inchi" + return compound.inchi + when "chemical/x-mdl-sdfile" + return compound.sdf + when "chemical/x-mdl-molfile" + when "image/png" + return compound.png + when "image/svg+xml" + return compound.svg + #when "text/plain" + #return "#{compound.names}\n" + else + halt 400, "Content type #{@accept} not supported." + end else - return compound.inspect + halt 400, "Compound with #{input} not found.".to_json end -end \ No newline at end of file +end diff --git a/lib/dataset.rb b/lib/dataset.rb index 7c74f39..749167b 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -17,28 +17,122 @@ end # Get a dataset get "/dataset/:id/?" do - dataset = Dataset.find :id => params[:id] - not_found_error "Dataset with id: #{params[:id]} not found." unless dataset - case @accept - when "application/json" - dataset.data_entries.each do |k, v| - dataset.data_entries[k][:URI] = uri("/substance/#{k}") + if Task.where(id: params[:id]).exists? + task = Task.find params[:id] + halt 404, "Dataset with id: #{params[:id]} not found." unless task.percent == 100 + $logger.debug task.inspect + response['Content-Type'] = "text/csv" + m = Model::Validation.find task.model_id + dataset = Batch.find task.dataset_id + @ids = dataset.ids + warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n") + unless warnings.nil? + @parse = [] + warnings.split("\n").each do |warning| + if warning =~ /^Cannot/ + smi = warning.split("SMILES compound").last.split("at").first + line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i + @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n" + end + end + keys_array = [] + warnings.split("\n").each do |warning| + if warning =~ /^Duplicate/ + text = warning.split("ID").first + numbers = warning.split("ID").last.split("and") + keys_array << numbers.collect{|n| n.strip.to_i} + end + end + @dups = {} + keys_array.each do |keys| + keys.each do |key| + @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n" + end + end + end + $logger.debug "dups: #{@dups}" + endpoint = "#{m.endpoint}_(#{m.species})" + tempfile = Tempfile.new + header = task.csv + lines = [] + $logger.debug task.predictions + task.predictions[m.id.to_s].each_with_index do |hash,idx| + identifier = hash.keys[0] + prediction_id = hash.values[0] + # add duplicate warning at the end of a line if ID matches + if @dups[idx+1] + if prediction_id.is_a? BSON::ObjectId + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + end + else + if @ids.blank? + lines << "#{idx+1},#{identifier},\n" + else + lines << "#{idx+1},#{@ids[idx]}#{identifier},\n" + end + end + else + if prediction_id.is_a? BSON::ObjectId + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}" + end + else + if @ids.blank? + lines << "#{idx+1},#{identifier},\n" + else + lines << "#{idx+1},#{@ids[idx]}#{identifier},\n" + end + end + end end - dataset[:URI] = uri("/dataset/#{dataset.id}") - dataset[:substances] = uri("/dataset/#{dataset.id}/substances") - dataset[:features] = uri("/dataset/#{dataset.id}/features") - return dataset.to_json - when "text/csv", "application/csv" - return dataset.to_csv + (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join("")) + #tempfile.write(header+lines.join("")) + tempfile.rewind + ######################## +=begin + header = task.csv + lines = [] + task.predictions.each_with_index do |result,idx| + identifier = result[0] + prediction_id = result[1] + prediction = Prediction.find prediction_id + lines << "#{idx+1},#{identifier},#{prediction.csv.tr("\n","")}" + end + return header+lines.join("\n") +=end + return tempfile.read else - bad_request_error "Mime type #{@accept} is not supported." + dataset = Dataset.find :id => params[:id] + halt 400, "Dataset with id: #{params[:id]} not found." unless dataset + case @accept + when "application/json" + dataset.data_entries.each do |k, v| + dataset.data_entries[k][:URI] = uri("/substance/#{k}") + end + dataset[:URI] = uri("/dataset/#{dataset.id}") + dataset[:substances] = uri("/dataset/#{dataset.id}/substances") + dataset[:features] = uri("/dataset/#{dataset.id}/features") + return dataset.to_json + when "text/csv", "application/csv" + return dataset.to_csv + else + bad_request_error "Mime type #{@accept} is not supported." + end end end # Get a dataset attribute. One of compounds, nanoparticles, substances, features get "/dataset/:id/:attribute/?" do + if Task.where(id: params[:id]).exists? + halt 400, "No attributes selection available for dataset with id: #{params[:id]}.".to_json + end dataset = Dataset.find :id => params[:id] - not_found_error "Dataset with id: #{params[:id]} not found." unless dataset + halt 400, "Dataset with id: #{params[:id]} not found." unless dataset attribs = ["compounds", "nanoparticles", "substances", "features"] return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute] out = dataset.send("#{params[:attribute]}") diff --git a/lib/lazar-rest.rb b/lib/lazar-rest.rb deleted file mode 100644 index 255c52f..0000000 --- a/lib/lazar-rest.rb +++ /dev/null @@ -1,69 +0,0 @@ -require "sinatra" -require "sinatra/reloader" -require 'sinatra/cross_origin' - -configure do - $logger = Logger.new(STDOUT) - enable :reloader #if development? - enable :cross_origin - disable :show_exceptions - disable :raise_errors -end - -#set :protection, :except => :frame_options - -# Environment setup from unicorn -E param -ENV["LAZAR_ENV"] = ENV["RACK_ENV"] -require "../lazar/lib/lazar.rb" -require "../qsar-report/lib/qsar-report.rb" -=begin -if ENV["LAZAR_ENV"] == "development" - require "../lazar/lib/lazar.rb" - require "../qsar-report/lib/qsar-report.rb" -else - require "lazar" - require "qsar-report" -end -=end - -include OpenTox - -before do - @accept = request.env['HTTP_ACCEPT'] - response['Content-Type'] = @accept -end - -not_found do - 400 - "Path '#{request.env["REQUEST_PATH"]}' not found.\n" -end - -error do - response['Content-Type'] = "text/plain" - error = request.env['sinatra.error'] - body = error.message+"\n" - error.respond_to?(:http_code) ? code = error.http_code : code = 500 - halt code, body -end - -# https://github.com/britg/sinatra-cross_origin#responding-to-options -options "*" do - response.headers["Allow"] = "HEAD,GET,PUT,POST,DELETE,OPTIONS" - response.headers["Access-Control-Allow-Headers"] = "X-Requested-With, X-HTTP-Method-Override, Content-Type, Cache-Control, Accept" - 200 -end - -[ - "aa.rb", - "api.rb", - "compound.rb", - "dataset.rb", - "feature.rb", - "model.rb", - "nanoparticle.rb", - "report.rb", - "substance.rb", - "swagger.rb", - "validation.rb" -].each{ |f| require_relative f } - diff --git a/lib/model.rb b/lib/model.rb index 3764ee2..42f3a95 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -1,4 +1,3 @@ - # Get a list of all prediction models # @param [Header] Accept one of text/uri-list, # @return [text/uri-list] list of all prediction models @@ -24,15 +23,129 @@ get "/model/:id/?" do return model.to_json end - post "/model/:id/?" do - identifier = params[:identifier].split(",") - compounds = identifier.collect{ |i| Compound.from_smiles i.strip.gsub(/\A"|"\Z/,'') } - model = Model::Validation.find params[:id] - batch = {} - compounds.each do |compound| + if request.content_type == "application/x-www-form-urlencoded" + identifier = params[:identifier].strip.gsub(/\A"|"\Z/,'') + compound = Compound.from_smiles identifier + model = Model::Validation.find params[:id] prediction = model.predict(compound) - batch[compound] = {:id => compound.id, :inchi => compound.inchi, :smiles => compound.smiles, :model => model, :prediction => prediction} + output = {:compound => {:id => compound.id, :inchi => compound.inchi, :smiles => compound.smiles}, + :model => model, + :prediction => prediction + } + return 200, output.to_json + elsif request.content_type =~ /^multipart\/form-data/ && request.content_length.to_i > 0 + @task = Task.new + @task.save + task = Task.run do + m = Model::Validation.find params[:id] + @task.update_percent(0.1) + dataset = Batch.from_csv_file params[:fileName][:tempfile] + compounds = dataset.compounds + $logger.debug compounds.size + identifiers = dataset.identifiers + ids = dataset.ids + type = (m.regression? ? "Regression" : "Classification") + # add header for regression + if type == "Regression" + unit = (type == "Regression") ? "(#{m.unit})" : "" + converted_unit = (type == "Regression") ? "#{m.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" + if ids.blank? + header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\ + "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\ + "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\ + "inApplicabilityDomain,Note\n" + else + header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\ + "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\ + "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\ + "inApplicabilityDomain,Note\n" + end + end + # add header for classification + if type == "Classification" + av = m.prediction_feature.accept_values + if ids.blank? + header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\ + "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n" + else + header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\ + "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n" + end + end + # predict compounds + p = 100.0/compounds.size + counter = 1 + predictions = [] + compounds.each_with_index do |cid,idx| + compound = Compound.find cid + #$logger.debug compound.inspect + if Prediction.where(compound: compound.id, model: m.id).exists? + prediction_object = Prediction.find_by(compound: compound.id, model: m.id) + prediction = prediction_object.prediction + prediction_id = prediction_object.id + # in case prediction object was created by single prediction + if prediction_object.csv.blank? + prediction_object[:csv] = prediction_to_csv(m,compound,prediction) + prediction_object.save + end + # identifier + identifier = identifiers[idx] + else + prediction = m.predict(compound) + # save prediction object + prediction_object = Prediction.new + prediction_id = prediction_object.id + prediction_object[:compound] = compound.id + prediction_object[:model] = m.id + # add additionally fields for html representation + unless prediction[:value].blank? || type == "Classification" + prediction[:prediction_value] = "#{prediction[:value].delog10.signif(3)} #{unit}" + prediction["converted_prediction_value"] = "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)} #{converted_unit}" + end + unless prediction[:prediction_interval].blank? + interval = prediction[:prediction_interval] + prediction[:interval] = "#{interval[1].delog10.signif(3)} - #{interval[0].delog10.signif(3)} #{unit}" + prediction[:converted_interval] = "#{compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{compound.mmol_to_mg(interval[0].delog10).signif(3)} #{converted_unit}" + end + prediction["unit"] = unit + prediction["converted_unit"] = converted_unit + if prediction[:measurements].is_a?(Array) + prediction["measurements_string"] = (type == "Regression") ? prediction[:measurements].collect{|value| "#{value.delog10.signif(3)} #{unit}"} : prediction[:measurements].join("
") + prediction["converted_measurements"] = prediction[:measurements].collect{|value| "#{compound.mmol_to_mg(value.delog10).signif(3)} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"} if type == "Regression" + else + output["measurements_string"] = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} #{unit}}" : prediction[:measurements] + output["converted_measurements"] = "#{compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if type == "Regression" + end + + # store in prediction_object + prediction_object[:prediction] = prediction + prediction_object[:csv] = prediction_to_csv(m,compound,prediction) + prediction_object.save + + # identifier + identifier = identifiers[idx] + end + # collect prediction_object ids with identifier + predictions << {"#{identifier}" => prediction_id} + $logger.debug predictions.inspect + @task.update_percent((counter*p).ceil > 100 ? 100 : (counter*p).ceil) + counter += 1 + end + # write csv + @task[:csv] = header + # write predictions + # save task + # append predictions as last action otherwise they won't save + # mongoid works with shallow copy via #dup + @task[:predictions] = {m.id.to_s => predictions} + @task[:dataset_id] = dataset.id + @task[:model_id] = m.id + @task.save + end#main task + tid = @task.id.to_s + return 202, to("/task/#{tid}").to_json + else + bad_request_error "No accepted content type" end - return batch.to_json end diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb deleted file mode 100644 index 332493d..0000000 --- a/lib/nanoparticle.rb +++ /dev/null @@ -1,30 +0,0 @@ -# Get all Nanoparticles -get "/nanoparticle/?" do - nanoparticles = Nanoparticle.all - case @accept - when "text/uri-list" - uri_list = nanoparticles.collect{|nanoparticle| uri("/nanoparticle/#{nanoparticle.id}")} - return uri_list.join("\n") + "\n" - when "application/json" - nanoparticles = JSON.parse nanoparticles.to_json - nanoparticles.each_index do |idx| - nanoparticles[idx][:URI] = uri("/nanoparticle/#{nanoparticles[idx]["_id"]["$oid"]}") - end - return nanoparticles.to_json - else - bad_request_error "Mime type #{@accept} is not supported." - end -end - -# Get a nanoparticle -get "/nanoparticle/:id/?" do - case @accept - when "application/json" - nanoparticle = Nanoparticle.find :id => params[:id] - not_found_error "Nanoparticle with id: #{params[:id]} not found." unless nanoparticle - nanoparticle[:URI] = uri("/nanoparticle/#{nanoparticle.id}") - return nanoparticle.to_json - else - bad_request_error "Mime type #{@accept} is not supported." - end -end diff --git a/lib/substance.rb b/lib/substance.rb index fef1b7e..f493714 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -6,24 +6,28 @@ get "/substance/?" do uri_list = substances.collect{|substance| uri("/substance/#{substance.id}")} return uri_list.join("\n") + "\n" when "application/json" - substances = JSON.parse substances.to_json - substances.each_index do |idx| - substances[idx][:URI] = uri("/substance/#{substances[idx]["_id"]["$oid"]}") - end - return substances.to_json + list = substances.collect{|substance| uri("/substance/#{substance.id}")} + substances = JSON.parse list.to_json + return JSON.pretty_generate substances else bad_request_error "Mime type #{@accept} is not supported." end end -# Get a substance +# Get a substance by ID get "/substance/:id/?" do case @accept when "application/json" - substance = Substance.find :id => params[:id] - not_found_error "Substance with id: #{params[:id]} not found." unless substance - substance[:URI] = uri("/substance/#{substance.id}") - return substance.to_json + mongoid = /^[a-f\d]{24}$/i + halt 400, "Input #{params[:id]} is no valid ID.".to_json unless params[:id].match(mongoid) + substance = Substance.find params[:id] + if substance + out = {"compound": {"id": substance.id, "inchi": substance.inchi, "smiles": substance.smiles, "warnings": substance.warnings}} + response['Content-Type'] = @accept + return JSON.pretty_generate JSON.parse(out.to_json) + else + halt 400, "Substance with ID #{input} not found." + end else bad_request_error "Mime type #{@accept} is not supported." end -- cgit v1.2.3