development merged, git links in FAQ.md fixed1.4.0

author: Christoph Helma <helma@in-silico.ch> 2019-09-03 13:45:36 +0200
committer: Christoph Helma <helma@in-silico.ch> 2019-09-03 13:45:36 +0200
commit: d1032e4f40d9fbb212e85e0db4f0ecd2e8ac9a88 (patch)
tree: 48922d60d750839dacd5d0a4a6e50ea3fe68da63 /application.rb
parent: 5bb4c24c6cfc1ddfae14eb9543b283baae2d75be (diff)
parent: a84d9eabf1b921086a688f81df28b0f21ba4df19 (diff)
1 files changed, 270 insertions, 201 deletions
diff --git a/application.rb b/application.rb
index 895b0c2..e23c042 100644
--- a/application.rb
+++ b/application.rb
@@ -1,20 +1,59 @@
 require 'rdiscount'
 require_relative 'qmrf_report.rb'
+require_relative 'task.rb'
+require_relative 'helper.rb'
 include OpenTox
+PUBCHEM_CID_URI = PUBCHEM_URI.split("/")[0..-3].join("/")+"/compound/"
 
+[
+  "api.rb",
+  "compound.rb",
+  "dataset.rb",
+  "endpoint.rb",
+  "feature.rb",
+  "model.rb",
+  "report.rb",
+  "substance.rb",
+  "swagger.rb",
+  "validation.rb"
+].each{ |f| require_relative "./lib/#{f}" }
 
 configure :production do
+  STDOUT.sync = true  
   $logger = Logger.new(STDOUT)
-  enable :reloader
 end
 
 configure :development do
+  STDOUT.sync = true  
   $logger = Logger.new(STDOUT)
-  enable :reloader
+  $logger.level = Logger::DEBUG
 end
 
 before do
-  @version = File.read("VERSION").chomp
+  # use this hostname method instead to('/')
+  # allowes to set https for xhr requests
+  #$host_with_port = request.host =~ /localhost/ ? request.host_with_port : request.host
+  $host_with_port = request.host_with_port
+  $paths = [
+  "api",
+  "compound",
+  "dataset",
+  "endpoint",
+  "feature",
+  "model",
+  "report",
+  "substance",
+  "swagger",
+  "validation"]
+  if request.path =~ /predict/
+    @accept = request.env['HTTP_ACCEPT'].split(",").first
+    response['Content-Type'] = @accept
+    halt 400, "Mime type #{@accept} is not supported." unless @accept == "text/html" or @accept == "*/*"
+    @version = File.read("VERSION").chomp
+  else
+    @accept = request.env['HTTP_ACCEPT'].split(",").first
+    response['Content-Type'] = @accept
+  end
 end
 
 not_found do
@@ -22,258 +61,279 @@ not_found do
 end
 
 error do
-  @error = request.env['sinatra.error']
-  haml :error
+  # API errors
+  if request.path.split("/")[1] == "api" || $paths.include?(request.path.split("/")[2])
+    @accept = request.env['HTTP_ACCEPT']
+    response['Content-Type'] = @accept
+    @accept == "text/plain" ? request.env['sinatra.error'] : request.env['sinatra.error'].to_json
+  # batch dataset error
+  elsif request.env['sinatra.error.params']['batchfile'] && request.env['REQUEST_METHOD'] == "POST"
+    @error = request.env['sinatra.error']
+    response['Content-Type'] = "text/html"
+    status 200
+    return haml :error
+  # basic error
+  else
+    @error = request.env['sinatra.error']
+    return haml :error
+  end
 end
 
-get '/?' do
-  redirect to('/predict') 
+# https://github.com/britg/sinatra-cross_origin#responding-to-options
+options "*" do
+  response.headers["Allow"] = "HEAD,GET,PUT,POST,DELETE,OPTIONS"
+  response.headers["Access-Control-Allow-Headers"] = "X-Requested-With, X-HTTP-Method-Override, Content-Type, Cache-Control, Accept"
+  200
 end
 
 get '/predict/?' do
+  # handle user click on back button while batch prediction
+  if params[:tpid]
+    begin
+      Process.kill(9,params[:tpid].to_i) if !params[:tpid].blank?
+    rescue
+      nil
+    end
+    # remove data helper method
+    remove_task_data(params[:tpid])
+  end
+  # regular request on '/predict' page
   @models = OpenTox::Model::Validation.all
-  @models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/}
   @endpoints = @models.collect{|m| m.endpoint}.sort.uniq
-  if @models.count > 0
-    rodent_index = 0
-    @models.each_with_index{|model,idx| rodent_index = idx if model.species =~ /Rodent/}
-    @models.insert(rodent_index-1,@models.delete_at(rodent_index))
-  end
   @models.count > 0 ? (haml :predict) : (haml :info)
 end
 
 get '/predict/modeldetails/:model' do
   model = OpenTox::Model::Validation.find params[:model]
-  crossvalidations = OpenTox::Validation::RepeatedCrossValidation.find(model.repeated_crossvalidation_id).crossvalidations
+  training_dataset = model.model.training_dataset
+  data_entries = training_dataset.data_entries
+  crossvalidations = model.crossvalidations
+  if model.classification?
+    crossvalidations.each do |cv|
+      File.open(File.join('public', "#{cv.id}.png"), 'w') do |file|
+        file.write(cv.probability_plot(format: "png"))
+      end unless File.exists? File.join('public', "#{cv.id}.png")
+    end
+  else
+    crossvalidations.each do |cv|
+      File.open(File.join('public', "#{cv.id}.png"), 'w') do |file|
+        file.write(cv.correlation_plot(format: "png"))
+      end unless File.exists? File.join('public', "#{cv.id}.png")
+    end
+  end
 
-  return haml :model_details, :layout=> false, :locals => {:model => model, :crossvalidations => crossvalidations}
+  response['Content-Type'] = "text/html"
+  return haml :model_details, :layout=> false, :locals => {:model => model, 
+                                                           :crossvalidations => crossvalidations, 
+                                                           :training_dataset => training_dataset,
+                                                           :data_entries => data_entries
+  }
 end
 
-# get individual compound details
-get '/prediction/:neighbor/details/?' do
-  @compound = OpenTox::Compound.find params[:neighbor]
-  @smiles = @compound.smiles
-  begin
-    @names = @compound.names.nil? ? "No names for this compound available." : @compound.names
-  rescue
-    @names = "No names for this compound available."
-  end
-  @inchi = @compound.inchi.gsub("InChI=", "")
-
-  haml :details, :layout => false
+get "/predict/report/:id/?" do
+  prediction_model = Model::Validation.find params[:id]
+  bad_request_error "model with id: '#{params[:id]}' not found." unless prediction_model
+  report = qmrf_report params[:id]
+  # output
+  t = Tempfile.new
+  t << report.to_xml
+  name = prediction_model.species.sub(/\s/,"-")+"-"+prediction_model.endpoint.downcase.sub(/\s/,"-")
+  send_file t.path, :filename => "QMRF_report_#{name.gsub!(/[^0-9A-Za-z]/, '_')}.xml", :type => "application/xml", :disposition => "attachment"
 end
 
-get '/jme_help/?' do
+get '/predict/jme_help/?' do
   File.read(File.join('views','jme_help.html'))
 end
 
+# download training dataset
 get '/predict/dataset/:name' do
-  response['Content-Type'] = "text/csv"
   dataset = Dataset.find_by(:name=>params[:name])
-  csv = dataset.to_csv
-  csv
+  csv = File.read dataset.source
+  name = params[:name] + ".csv"
+  t = Tempfile.new
+  t << csv
+  t.rewind
+  response['Content-Type'] = "text/csv"
+  send_file t.path, :filename => name, :type => "text/csv", :disposition => "attachment"
 end
 
-get '/predict/:tmppath/:filename/?' do
+# download batch predicton file
+get '/predict/batch/download/?' do
+  task = Task.find params[:tid]
+  dataset = Dataset.find task.dataset_id
+  name = dataset.name + ".csv"
+  t = Tempfile.new
+  # to_prediction_csv takes too much time; use task.csv instead which is the same
+  #t << dataset.to_prediction_csv
+  t << task.csv
+  t.rewind
   response['Content-Type'] = "text/csv"
-  path = "/tmp/#{params[:tmppath]}"
-  send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment"
+  send_file t.path, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{name}", :type => "text/csv", :disposition => "attachment"
 end
 
 post '/predict/?' do
-
   # process batch prediction
-  if !params[:fileselect].blank?
+  unless params[:fileselect].blank?
     if params[:fileselect][:filename] !~ /\.csv$/
-      bad_request_error "Please submit a csv file."
-    end
-    File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
-      f.write(params[:fileselect][:tempfile].read)
+      raise "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a CSV file."
     end
     @filename = params[:fileselect][:filename]
-    begin
-      input = OpenTox::Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true
-      if input.class == OpenTox::Dataset
-        dataset = OpenTox::Dataset.find input
-      else
-        bad_request_error "Could not serialize file '#{@filename}'."
-      end
-    rescue
-      bad_request_error "Could not serialize file '#{@filename}'."
-    end
-    @compounds = dataset.compounds
-    if @compounds.size == 0
-      message = dataset[:warnings]
-      dataset.delete
-      bad_request_error message
+    File.open('tmp/' + @filename, "w") do |f|
+      f.write(params[:fileselect][:tempfile].read)
     end
+    # check CSV structure by parsing and header check
+    csv = CSV.read File.join("tmp", @filename)
+    header = csv.shift
+    accepted = ["SMILES","InChI"]
+    raise "CSV header does not include 'SMILES' or 'InChI'. Please read the <a href='https://dg.in-silico.ch/predict/help' rel='external'> HELP </a> page." unless header.any?(/smiles|inchi/i)
+    @models = params[:selection].keys.join(",")
+    return haml :upload
+  end
 
-    # for csv export
-    @batch = {}
-    # for haml table
-    @view = {}
+  unless params[:batchfile].blank?
+    dataset = Dataset.from_csv_file File.join("tmp", params[:batchfile])
+    raise "No compounds in Dataset. Please read the <a href='https://dg.in-silico.ch/predict/help' rel='external'> HELP </a> page." if dataset.compounds.size == 0
+    response['Content-Type'] = "application/json"
+    return {:dataset_id => dataset.id.to_s, :models => params[:models]}.to_json
+  end
 
-    @compounds.each{|c| @view[c] = []}
-    params[:selection].keys.each do |model_id|
-      model = OpenTox::Model::Validation.find model_id
-      @batch[model] = []
-      @compounds.each_with_index do |compound,idx|
-        prediction = model.predict(compound)
-        @batch[model] << [compound, prediction]
-        @view[compound] << [model,prediction]
-      end
-    end
+  unless params[:models].blank?
+    dataset = Dataset.find params[:dataset_id]
+    @compounds_size = dataset.compounds.size
+    @models = params[:models].split(",")
+    @tasks = []
+    @models.each{|m| t = Task.new; t.save; @tasks << t}
+    @predictions = {}
 
-    @csvhash = {}
-    @warnings = dataset[:warnings]
-    dupEntries = {}
-    delEntries = ""
-    
-    # split duplicates and deleted entries
-    @warnings.each do |w|
-      substring = w.match(/line .* of/)
-      unless substring.nil?
-        delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n"
-      end
-      substring = w.match(/rows .* Entries/)
-      unless substring.nil?
-        lines = []
-        substring[0].split(",").each{|s| lines << s[/\d+/]}
-        lines.shift
-        lines.each{|l| dupEntries[l.to_i] = w.split(".").first}
+    maintask = Task.run do
+      @models.each_with_index do |model_id,idx|
+        t = @tasks[idx]
+        t.update_percent(1)
+        prediction = {}
+        model = Model::Validation.find model_id
+        t.update_percent(10)
+        prediction_dataset = model.predict dataset
+        t.update_percent(70)
+        t[:dataset_id] = prediction_dataset.id
+        t.update_percent(75)
+        prediction[model_id] = prediction_dataset.id.to_s
+        t.update_percent(80)
+        t[:predictions] = prediction
+        t.update_percent(90)
+        t[:csv] = prediction_dataset.to_prediction_csv
+        t.update_percent(100)
+        t.save
       end
     end
-
-    @batch.each_with_index do |hash, idx|
-      @csvhash[idx] = ""
-      model = hash[0]
-      # create header
-      if model.regression?
-        predAunit = "(#{model.unit})"
-        predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})"
-        @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
-      else #classification
-        av = model.prediction_feature.accept_values
-        probFirst = av[0].capitalize
-        probLast = av[1].capitalize
-        @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
+    maintask[:subTasks] = @tasks.collect{|t| t.id}
+    maintask.save
+    @pid = maintask.pid
+    response['Content-Type'] = "text/html"
+    return haml :batch
+  else
+    # single compound prediction
+    # validate identifier input
+    if !params[:identifier].blank?
+      @identifier = params[:identifier].strip
+      $logger.debug "input:#{@identifier}"
+      # get compound from SMILES
+      begin
+        @compound = Compound.from_smiles @identifier
+      rescue
+        @error = "'#{@identifier}' is not a valid SMILES string." unless @compound
+        return haml :error
       end
-      values = hash[1]
-      dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact!
-      
-      values.each_with_index do |array, id|
-        type = (model.regression? ? "Regression" : "Classification")
-        endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
-      
-        if id == 0
-          @csvhash[idx] += delEntries unless delEntries.blank?
-        end
-        unless array.kind_of? String
-          compound = array[0]
-          prediction = array[1]
-          smiles = compound.smiles
-          
-          if prediction[:neighbors]
-            if prediction[:value]
-              pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
-              predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
-              predAunit = prediction[:value].numeric? ? "(#{model.unit})" : ""
-              predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value]
-              predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : ""
-              int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
-              intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}")
-              intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}")
-              intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}")
-              intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}")
-              inApp = "yes"
-              inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
-              note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
-              
-              unless prediction[:probabilities].nil?
-                av = model.prediction_feature.accept_values
-                propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}"
-                propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}"
-              end
-            else
-              # no prediction value only one neighbor
-              inApp = "no"
-              inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
-              note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
-            end
-          else
-            # no prediction value
-            inApp = "no"
-            inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
-            note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
-          end
-          if @warnings
-            @warnings.each do |w|
-              note += (w.split(".").first + ".") if /\b(#{Regexp.escape(smiles)})\b/ === w
-            end
-          end
-        else
-          # string note for duplicates
-          endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = ""
-          note = array
-        end
-        if model.regression?
-          @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
-        else
-          @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
-        end
+      @models = []
+      @predictions = []
+      params[:selection].keys.each do |model_id|
+        model = Model::Validation.find model_id
+        @models << model
+        prediction = model.predict(@compound)
+        @predictions << prediction
       end
+      haml :prediction
     end
-    t = Tempfile.new
-    @csvhash.each do |model, csv|
-      t.write(csv)
-      t.write("\n")
-    end
-    t.rewind
-    @tmppath = t.path.split("/").last
-
-    dataset.delete
-    File.delete File.join("tmp", params[:fileselect][:filename])
-    return haml :batch
   end
+end
 
-  # validate identifier input
-  if !params[:identifier].blank?
-    @identifier = params[:identifier]
-    $logger.debug "input:#{@identifier}"
-    # get compound from SMILES
-    @compound = Compound.from_smiles @identifier
-    bad_request_error "'#{@identifier}' is not a valid SMILES string." if @compound.blank?
-
-    @models = []
-    @predictions = []
-    params[:selection].keys.each do |model_id|
-      model = OpenTox::Model::Validation.find model_id
-      @models << model
-      @predictions << model.predict(@compound)
+get '/prediction/task/?' do
+  # returns task progress in percent
+  if params[:turi]
+    task = Task.find(params[:turi].to_s)
+    response['Content-Type'] = "application/json"
+    return JSON.pretty_generate(:percent => task.percent)
+  # kills task process id
+  elsif params[:ktpid]
+    begin
+      Process.kill(9,params[:ktpid].to_i) if !params[:ktpid].blank?
+    rescue
+      nil
+    end
+    #remove_task_data(params[:ktpid]) deletes also the source file
+    response['Content-Type'] = "application/json"
+    return JSON.pretty_generate(:ktpid => params[:ktpid])
+  # returns task details
+  elsif params[:predictions]
+    task = Task.find(params[:predictions])
+    pageSize = params[:pageSize].to_i - 1
+    pageNumber= params[:pageNumber].to_i - 1
+    csv = CSV.parse(task.csv)
+    header = csv.shift
+    string = "<td><table class=\"table table-bordered\">"
+    # find canonical smiles column
+    cansmi = 0
+    header.each_with_index do |h,idx|
+      cansmi = idx if h =~ /Canonical SMILES/
+      string += "<th class=\"fit\">#{h}</th>"
+    end
+    string += "</tr>"
+    string += "<tr>"
+    csv[pageNumber].each_with_index do |line,idx|
+      if idx == cansmi
+        c = Compound.from_smiles line
+        string += "<td class=\"fit\">#{line}</br>" \
+                  "<a class=\"btn btn-link\" data-id=\"link\" " \
+                  "data-remote=\"#{to("/prediction/#{c.id}/details")}\" data-toggle=\"modal\" " \
+                  "href=#details>" \
+                  "#{embedded_svg(c.svg, title: "click for details")}" \
+                  "</td>"
+      else
+        string += "<td nowrap>#{line.numeric? && line.include?(".") ? line.to_f.signif(3) : (line.nil? ? line : line.gsub(" ","<br />"))}</td>"
+      end
     end
-    haml :prediction
+    string += "</tr>"
+    string += "</table></td>"
+    response['Content-Type'] = "application/json"
+    return JSON.pretty_generate(:prediction => [string])
   end
 end
 
-get "/report/:id/?" do
-  prediction_model = Model::Validation.find params[:id]
-  bad_request_error "model with id: '#{params[:id]}' not found." unless prediction_model
-  report = qmrf_report params[:id]
-  # output
-  t = Tempfile.new
-  t << report.to_xml
-  name = prediction_model.species.sub(/\s/,"-")+"-"+prediction_model.endpoint.downcase.sub(/\s/,"-")
-  send_file t.path, :filename => "QMRF_report_#{name.gsub!(/[^0-9A-Za-z]/, '_')}.xml", :type => "application/xml", :disposition => "attachment"
+# get individual compound details
+get '/prediction/:neighbor/details/?' do
+  @compound = OpenTox::Compound.find params[:neighbor]
+  @smiles = @compound.smiles
+  begin
+    @names = @compound.names.nil? ? "No names for this compound available." : @compound.names
+  rescue
+    @names = "No names for this compound available."
+  end
+  @inchi = @compound.inchi
+
+  haml :details, :layout => false
 end
 
-get '/license' do
+get '/predict/license' do
   @license = RDiscount.new(File.read("LICENSE.md")).to_html
   haml :license, :layout => false
 end
 
-get '/faq' do
+get '/predict/faq' do
   @faq = RDiscount.new(File.read("FAQ.md")).to_html
-  haml :faq, :layout => false
+  haml :faq#, :layout => false
+end
+
+get '/predict/help' do
+  haml :help
 end
 
 get '/style.css' do
@@ -281,3 +341,12 @@ get '/style.css' do
   scss :style
 end
 
+# for swagger representation
+get '/api/swagger-ui.css' do
+  headers 'Content-Type' => 'text/css; charset=utf-8'
+  scss :style
+end
+
+get '/IST_logo_s.png' do
+  redirect to('/images/IST_logo_s.png')
+end
author	Christoph Helma <helma@in-silico.ch>	2019-09-03 13:45:36 +0200
committer	Christoph Helma <helma@in-silico.ch>	2019-09-03 13:45:36 +0200
commit	d1032e4f40d9fbb212e85e0db4f0ecd2e8ac9a88 (patch)
tree	48922d60d750839dacd5d0a4a6e50ea3fe68da63 /application.rb
parent	5bb4c24c6cfc1ddfae14eb9543b283baae2d75be (diff)
parent	a84d9eabf1b921086a688f81df28b0f21ba4df19 (diff)