From 9750e0309500259e9a56e267ce87984fb5bb5e53 Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Mon, 26 Nov 2018 15:29:26 +0000
Subject: clean out; better response codes; prepare for batch

---
 lib/aa.rb           |  82 --------------------------------
 lib/compound.rb     |  48 ++++++++++---------
 lib/dataset.rb      | 122 ++++++++++++++++++++++++++++++++++++++++++------
 lib/lazar-rest.rb   |  69 ---------------------------
 lib/model.rb        | 131 ++++++++++++++++++++++++++++++++++++++++++++++++----
 lib/nanoparticle.rb |  30 ------------
 lib/substance.rb    |  24 ++++++----
 7 files changed, 270 insertions(+), 236 deletions(-)
 delete mode 100644 lib/aa.rb
 delete mode 100644 lib/lazar-rest.rb
 delete mode 100644 lib/nanoparticle.rb

(limited to 'lib')

diff --git a/lib/aa.rb b/lib/aa.rb
deleted file mode 100644
index 6dfec4b..0000000
--- a/lib/aa.rb
+++ /dev/null
@@ -1,82 +0,0 @@
-post "/aa/authenticate/?" do
-  mime_types = ["text/plain"]
-  bad_request_error "Mime type #{@accept} not supported here. Please request data as  #{mime_types.join(', ')}." unless mime_types.include? @accept
-  bad_request_error "Please send formdata username." unless params[:username]
-  bad_request_error "Please send formdata password." unless params[:password]
-  case @accept
-  when "text/plain"
-    if OpenTox::Authorization.authenticate(params[:username], params[:password])
-      return OpenTox::RestClientWrapper.subjectid
-    else
-      return nil
-    end
-  else
-    bad_request_error "'#{@accept}' is not a supported content type."
-   end
-end
-
-post "/aa/logout/?" do
-  mime_types = ["text/plain"]
-  bad_request_error "Mime type #{@accept} not supported here. Please request data as  #{mime_types.join(', ')}." unless mime_types.include? @accept
-  bad_request_error "Please send formdata subjectid." unless params[:subjectid]
-  case @accept
-  when "text/plain"
-    if OpenTox::Authorization.logout(params[:subjectid])
-      return "Successfully logged out. \n"
-    else
-      return "Logout failed.\n"
-    end
-  else
-    bad_request_error "'#{@accept}' is not a supported content type."
-   end
-end
-
-module OpenTox
-
-  AA = "https://opensso.in-silico.ch"
-  
-  module Authorization
-    #Authentication against OpenSSO. Returns token. Requires Username and Password.
-    # @param user [String] Username
-    # @param pw [String] Password
-    # @return [Boolean] true if successful
-    def self.authenticate(user, pw)
-      begin
-        res = RestClientWrapper.post("#{AA}/auth/authenticate",{:username=>user, :password => pw},{:subjectid => ""}).sub("token.id=","").sub("\n","")
-        if is_token_valid(res)
-          RestClientWrapper.subjectid = res
-          return true
-        else
-          bad_request_error "Authentication failed #{res.inspect}"
-        end
-      rescue
-        bad_request_error "Authentication failed #{res.inspect}"
-      end
-    end
-
-    #Logout on opensso. Make token invalid. Requires token
-    # @param [String] subjectid the subjectid
-    # @return [Boolean] true if logout is OK
-    def self.logout(subjectid=RestClientWrapper.subjectid)
-      begin
-        out = RestClientWrapper.post("#{AA}/auth/logout", :subjectid => subjectid)
-        return true unless is_token_valid(subjectid)
-      rescue
-        return false
-      end
-      return false
-    end
-
-    #Checks if a token is a valid token
-    # @param [String]subjectid subjectid from openSSO session
-    # @return [Boolean] subjectid is valid or not.
-    def self.is_token_valid(subjectid=RestClientWrapper.subjectid)
-      begin
-        return true if RestClientWrapper.post("#{AA}/auth/isTokenValid",:tokenid => subjectid) == "boolean=true\n"
-      rescue #do rescue because openSSO throws 401
-        return false
-      end
-      return false
-    end
-  end
-end
\ No newline at end of file
diff --git a/lib/compound.rb b/lib/compound.rb
index 01ba036..77948ab 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -38,27 +38,31 @@ post "/compound/descriptor/?" do
   end
 end
 
-get %r{/compound/(.+)} do |inchi|
-  bad_request_error "Input parameter #{inchi} is not an InChI" unless inchi.match(/^InChI=/)
-  compound = Compound.from_inchi URI.unescape(inchi)
-  response['Content-Type'] = @accept
-  case @accept
-  when "application/json"
-    return JSON.pretty_generate JSON.parse(compound.to_json)
-  when "chemical/x-daylight-smiles"
-    return compound.smiles
-  when "chemical/x-inchi"
-    return compound.inchi
-  when "chemical/x-mdl-sdfile"
-    return compound.sdf
-  when "chemical/x-mdl-molfile"
-  when "image/png"
-    return compound.png
-  when "image/svg+xml"
-    return compound.svg
-  when "text/plain"
-    return "#{compound.names}\n"
+get %r{/compound/(InChI.+)} do |input|
+  compound = Compound.from_inchi URI.unescape(input)
+  if compound
+    response['Content-Type'] = @accept
+    case @accept
+    when "application/json"
+      c = {"compound": {"id": compound.id, "inchi": compound.inchi, "smiles": compound.smiles, "warnings": compound.warnings}}
+      return JSON.pretty_generate JSON.parse(c.to_json)
+    when "chemical/x-daylight-smiles"
+      return compound.smiles
+    when "chemical/x-inchi"
+      return compound.inchi
+    when "chemical/x-mdl-sdfile"
+      return compound.sdf
+    when "chemical/x-mdl-molfile"
+    when "image/png"
+      return compound.png
+    when "image/svg+xml"
+      return compound.svg
+    #when "text/plain"
+      #return "#{compound.names}\n"
+    else
+      halt 400, "Content type #{@accept} not supported."
+    end
   else
-    return compound.inspect
+    halt 400, "Compound with #{input} not found.".to_json
   end
-end
\ No newline at end of file
+end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 7c74f39..749167b 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -17,28 +17,122 @@ end
 
 # Get a dataset
 get "/dataset/:id/?" do
-  dataset = Dataset.find :id => params[:id]
-  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
-  case @accept
-  when "application/json"
-    dataset.data_entries.each do |k, v|
-      dataset.data_entries[k][:URI] = uri("/substance/#{k}")
+  if Task.where(id: params[:id]).exists?
+    task = Task.find params[:id]
+    halt 404, "Dataset with id: #{params[:id]} not found." unless task.percent == 100
+    $logger.debug task.inspect
+    response['Content-Type'] = "text/csv"
+    m = Model::Validation.find task.model_id
+    dataset = Batch.find task.dataset_id
+    @ids = dataset.ids
+    warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n")
+    unless warnings.nil?
+      @parse = []
+      warnings.split("\n").each do |warning|
+        if warning =~ /^Cannot/
+          smi = warning.split("SMILES compound").last.split("at").first
+          line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i
+          @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n"
+        end
+      end
+      keys_array = []
+      warnings.split("\n").each do |warning|
+        if warning =~ /^Duplicate/
+          text = warning.split("ID").first
+          numbers = warning.split("ID").last.split("and")
+          keys_array << numbers.collect{|n| n.strip.to_i}
+        end
+      end
+      @dups = {}
+      keys_array.each do |keys|
+        keys.each do |key|
+          @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n"
+        end
+      end
+    end
+    $logger.debug "dups: #{@dups}"
+    endpoint = "#{m.endpoint}_(#{m.species})"
+    tempfile = Tempfile.new
+    header = task.csv
+    lines = []
+    $logger.debug task.predictions
+    task.predictions[m.id.to_s].each_with_index do |hash,idx|
+      identifier = hash.keys[0]
+      prediction_id = hash.values[0]
+      # add duplicate warning at the end of a line if ID matches
+      if @dups[idx+1]
+        if prediction_id.is_a? BSON::ObjectId
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
+          else
+            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
+          end
+        else
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},\n"
+          else
+            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
+          end
+        end
+      else
+        if prediction_id.is_a? BSON::ObjectId
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}"
+          else
+            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}"
+          end
+        else
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},\n"
+          else
+            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
+          end
+        end
+      end
     end
-    dataset[:URI] = uri("/dataset/#{dataset.id}")
-    dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
-    dataset[:features] = uri("/dataset/#{dataset.id}/features")
-    return dataset.to_json
-  when "text/csv", "application/csv"
-    return dataset.to_csv
+    (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join(""))
+    #tempfile.write(header+lines.join(""))
+    tempfile.rewind
+    ########################
+=begin
+    header = task.csv
+    lines = []
+    task.predictions.each_with_index do |result,idx|
+      identifier = result[0]
+      prediction_id = result[1]
+      prediction = Prediction.find prediction_id
+      lines << "#{idx+1},#{identifier},#{prediction.csv.tr("\n","")}"
+    end
+    return header+lines.join("\n")
+=end
+    return tempfile.read
   else
-    bad_request_error "Mime type #{@accept} is not supported."
+    dataset = Dataset.find :id => params[:id]
+    halt 400, "Dataset with id: #{params[:id]} not found." unless dataset
+    case @accept
+    when "application/json"
+      dataset.data_entries.each do |k, v|
+        dataset.data_entries[k][:URI] = uri("/substance/#{k}")
+      end
+      dataset[:URI] = uri("/dataset/#{dataset.id}")
+      dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
+      dataset[:features] = uri("/dataset/#{dataset.id}/features")
+      return dataset.to_json
+    when "text/csv", "application/csv"
+      return dataset.to_csv
+    else
+      bad_request_error "Mime type #{@accept} is not supported."
+    end
   end
 end
 
 # Get a dataset attribute. One of compounds, nanoparticles, substances, features 
 get "/dataset/:id/:attribute/?" do
+  if Task.where(id: params[:id]).exists?
+    halt 400, "No attributes selection available for dataset with id: #{params[:id]}.".to_json
+  end
   dataset = Dataset.find :id => params[:id]
-  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
+  halt 400,  "Dataset with id: #{params[:id]} not found." unless dataset
   attribs = ["compounds", "nanoparticles", "substances", "features"]
   return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute]
   out = dataset.send("#{params[:attribute]}")
diff --git a/lib/lazar-rest.rb b/lib/lazar-rest.rb
deleted file mode 100644
index 255c52f..0000000
--- a/lib/lazar-rest.rb
+++ /dev/null
@@ -1,69 +0,0 @@
-require "sinatra"
-require "sinatra/reloader"
-require 'sinatra/cross_origin'
-
-configure do
-  $logger = Logger.new(STDOUT)
-  enable :reloader #if development?
-  enable :cross_origin
-  disable :show_exceptions
-  disable :raise_errors
-end
-
-#set :protection, :except => :frame_options
-
-# Environment setup from unicorn -E param
-ENV["LAZAR_ENV"] = ENV["RACK_ENV"]
-require "../lazar/lib/lazar.rb"
-require "../qsar-report/lib/qsar-report.rb"
-=begin
-if ENV["LAZAR_ENV"] == "development"
-  require "../lazar/lib/lazar.rb"
-  require "../qsar-report/lib/qsar-report.rb"
-else
-  require "lazar"
-  require "qsar-report"
-end
-=end
-
-include OpenTox
-
-before do
-  @accept = request.env['HTTP_ACCEPT']
-  response['Content-Type'] = @accept
-end
-
-not_found do
-  400
-  "Path '#{request.env["REQUEST_PATH"]}' not found.\n"
-end
-
-error do
-  response['Content-Type'] = "text/plain"
-  error = request.env['sinatra.error']
-  body = error.message+"\n"
-  error.respond_to?(:http_code) ? code = error.http_code : code = 500
-  halt code, body
-end
-
-# https://github.com/britg/sinatra-cross_origin#responding-to-options
-options "*" do
-  response.headers["Allow"] = "HEAD,GET,PUT,POST,DELETE,OPTIONS"
-  response.headers["Access-Control-Allow-Headers"] = "X-Requested-With, X-HTTP-Method-Override, Content-Type, Cache-Control, Accept"
-  200
-end
-
-[
-  "aa.rb",
-  "api.rb",
-  "compound.rb",
-  "dataset.rb",
-  "feature.rb",
-  "model.rb",
-  "nanoparticle.rb",
-  "report.rb",
-  "substance.rb",
-  "swagger.rb",
-  "validation.rb"
-].each{ |f| require_relative f }
-
diff --git a/lib/model.rb b/lib/model.rb
index 3764ee2..42f3a95 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -1,4 +1,3 @@
-
 # Get a list of all prediction models
 # @param [Header] Accept one of text/uri-list,
 # @return [text/uri-list] list of all prediction models
@@ -24,15 +23,129 @@ get "/model/:id/?" do
   return model.to_json
 end
 
-
 post "/model/:id/?" do
-  identifier = params[:identifier].split(",")
-  compounds = identifier.collect{ |i| Compound.from_smiles i.strip.gsub(/\A"|"\Z/,'') }
-  model = Model::Validation.find params[:id]
-  batch = {}
-  compounds.each do |compound|
+  if request.content_type == "application/x-www-form-urlencoded"
+    identifier = params[:identifier].strip.gsub(/\A"|"\Z/,'')
+    compound = Compound.from_smiles identifier
+    model = Model::Validation.find params[:id]
     prediction = model.predict(compound)
-    batch[compound] = {:id => compound.id, :inchi => compound.inchi, :smiles => compound.smiles, :model => model, :prediction => prediction}
+    output = {:compound => {:id => compound.id, :inchi => compound.inchi, :smiles => compound.smiles},
+              :model => model,
+              :prediction => prediction
+    }
+    return 200, output.to_json
+  elsif request.content_type =~ /^multipart\/form-data/ && request.content_length.to_i > 0
+    @task = Task.new
+    @task.save
+    task = Task.run do
+      m = Model::Validation.find params[:id]
+      @task.update_percent(0.1)
+      dataset = Batch.from_csv_file params[:fileName][:tempfile]
+      compounds = dataset.compounds
+      $logger.debug compounds.size
+      identifiers = dataset.identifiers
+      ids = dataset.ids
+      type = (m.regression? ? "Regression" : "Classification")
+      # add header for regression
+      if type == "Regression"
+        unit = (type == "Regression") ? "(#{m.unit})" : ""
+        converted_unit = (type == "Regression") ? "#{m.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : ""
+        if ids.blank?
+          header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\
+          "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\
+          "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\
+          "inApplicabilityDomain,Note\n"
+        else
+          header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\
+          "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\
+          "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\
+          "inApplicabilityDomain,Note\n"
+        end
+      end
+      # add header for classification
+      if type == "Classification"
+        av = m.prediction_feature.accept_values
+        if ids.blank?
+          header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\
+          "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n"
+        else
+          header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\
+          "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n"
+        end
+      end
+      # predict compounds
+      p = 100.0/compounds.size
+      counter = 1
+      predictions = []
+      compounds.each_with_index do |cid,idx|
+        compound = Compound.find cid
+        #$logger.debug compound.inspect
+        if Prediction.where(compound: compound.id, model: m.id).exists?
+          prediction_object = Prediction.find_by(compound: compound.id, model: m.id)
+          prediction = prediction_object.prediction
+          prediction_id = prediction_object.id
+          # in case prediction object was created by single prediction
+          if prediction_object.csv.blank?
+            prediction_object[:csv] = prediction_to_csv(m,compound,prediction)
+            prediction_object.save
+          end
+          # identifier
+          identifier = identifiers[idx]
+        else
+          prediction = m.predict(compound)
+          # save prediction object
+          prediction_object = Prediction.new
+          prediction_id = prediction_object.id
+          prediction_object[:compound] = compound.id
+          prediction_object[:model] = m.id
+          # add additionally fields for html representation
+          unless prediction[:value].blank? || type == "Classification"
+            prediction[:prediction_value] = "#{prediction[:value].delog10.signif(3)} #{unit}"
+            prediction["converted_prediction_value"] = "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)} #{converted_unit}"
+          end
+          unless prediction[:prediction_interval].blank?
+            interval = prediction[:prediction_interval]
+            prediction[:interval] = "#{interval[1].delog10.signif(3)} - #{interval[0].delog10.signif(3)} #{unit}"
+            prediction[:converted_interval] = "#{compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{compound.mmol_to_mg(interval[0].delog10).signif(3)} #{converted_unit}"
+          end
+          prediction["unit"] = unit
+          prediction["converted_unit"] = converted_unit
+          if prediction[:measurements].is_a?(Array)
+            prediction["measurements_string"] = (type == "Regression") ? prediction[:measurements].collect{|value| "#{value.delog10.signif(3)} #{unit}"} : prediction[:measurements].join("</br>")
+            prediction["converted_measurements"] = prediction[:measurements].collect{|value| "#{compound.mmol_to_mg(value.delog10).signif(3)} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"} if type == "Regression"
+          else
+            output["measurements_string"] = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} #{unit}}" : prediction[:measurements]
+            output["converted_measurements"] = "#{compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if type == "Regression"
+          end
+
+          # store in prediction_object
+          prediction_object[:prediction] = prediction
+          prediction_object[:csv] = prediction_to_csv(m,compound,prediction)
+          prediction_object.save
+
+          # identifier
+          identifier = identifiers[idx]
+        end
+        # collect prediction_object ids with identifier
+        predictions << {"#{identifier}" => prediction_id}
+        $logger.debug predictions.inspect
+        @task.update_percent((counter*p).ceil > 100 ? 100 : (counter*p).ceil)
+        counter += 1
+      end
+      # write csv
+      @task[:csv] = header
+      # write predictions
+      # save task 
+      # append predictions as last action otherwise they won't save
+      # mongoid works with shallow copy via #dup
+      @task[:predictions] = {m.id.to_s => predictions}
+      @task[:dataset_id] = dataset.id
+      @task[:model_id] = m.id
+      @task.save
+    end#main task
+    tid = @task.id.to_s
+    return 202, to("/task/#{tid}").to_json
+  else
+    bad_request_error "No accepted content type"
   end
-  return batch.to_json
 end
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
deleted file mode 100644
index 332493d..0000000
--- a/lib/nanoparticle.rb
+++ /dev/null
@@ -1,30 +0,0 @@
-# Get all Nanoparticles
-get "/nanoparticle/?" do
-  nanoparticles = Nanoparticle.all
-  case @accept
-  when "text/uri-list"
-    uri_list = nanoparticles.collect{|nanoparticle| uri("/nanoparticle/#{nanoparticle.id}")}
-    return uri_list.join("\n") + "\n"
-  when "application/json"
-    nanoparticles = JSON.parse nanoparticles.to_json
-    nanoparticles.each_index do |idx|
-      nanoparticles[idx][:URI] = uri("/nanoparticle/#{nanoparticles[idx]["_id"]["$oid"]}")
-    end
-    return nanoparticles.to_json
-  else
-    bad_request_error "Mime type #{@accept} is not supported."
-  end
-end
-
-# Get a nanoparticle
-get "/nanoparticle/:id/?" do
-  case @accept
-  when "application/json"
-    nanoparticle = Nanoparticle.find :id => params[:id]
-    not_found_error "Nanoparticle with id: #{params[:id]} not found." unless nanoparticle
-    nanoparticle[:URI] = uri("/nanoparticle/#{nanoparticle.id}")
-    return nanoparticle.to_json
-  else
-    bad_request_error "Mime type #{@accept} is not supported."
-  end
-end
diff --git a/lib/substance.rb b/lib/substance.rb
index fef1b7e..f493714 100644
--- a/lib/substance.rb
+++ b/lib/substance.rb
@@ -6,24 +6,28 @@ get "/substance/?" do
     uri_list = substances.collect{|substance| uri("/substance/#{substance.id}")}
     return uri_list.join("\n") + "\n"
   when "application/json"
-    substances = JSON.parse substances.to_json
-    substances.each_index do |idx|
-      substances[idx][:URI] = uri("/substance/#{substances[idx]["_id"]["$oid"]}")
-    end
-    return substances.to_json
+    list = substances.collect{|substance| uri("/substance/#{substance.id}")}
+    substances = JSON.parse list.to_json
+    return JSON.pretty_generate substances
   else
     bad_request_error "Mime type #{@accept} is not supported."
   end
 end
 
-# Get a substance
+# Get a substance by ID
 get "/substance/:id/?" do
   case @accept
   when "application/json"
-    substance = Substance.find :id => params[:id]
-    not_found_error "Substance with id: #{params[:id]} not found." unless substance
-    substance[:URI] = uri("/substance/#{substance.id}")
-    return substance.to_json
+    mongoid = /^[a-f\d]{24}$/i
+    halt 400, "Input #{params[:id]} is no valid ID.".to_json unless params[:id].match(mongoid)
+    substance = Substance.find params[:id]
+    if substance
+      out = {"compound": {"id": substance.id, "inchi": substance.inchi, "smiles": substance.smiles, "warnings": substance.warnings}}
+      response['Content-Type'] = @accept
+      return JSON.pretty_generate JSON.parse(out.to_json)
+    else
+      halt 400, "Substance with ID #{input} not found."
+    end
   else
     bad_request_error "Mime type #{@accept} is not supported."
   end
-- 
cgit v1.2.3