before clean up

author: gebele <gebele@in-silico.ch> 2019-05-28 14:25:52 +0000
committer: gebele <gebele@in-silico.ch> 2019-05-28 14:25:52 +0000
commit: 5a3be4190688bc8240327930b3e953b09ecc9d9e (patch)
tree: c26ae148d18bcb0195a092b9339dbfcd16718a9e /application.rb
parent: 750e91ae181c06c2f9d067d540d2d336274049b0 (diff)
1 files changed, 93 insertions, 289 deletions
diff --git a/application.rb b/application.rb
index aaa18ae..508a8c6 100644
--- a/application.rb
+++ b/application.rb
@@ -1,11 +1,9 @@
 require 'rdiscount'
 require_relative 'qmrf_report.rb'
 require_relative 'task.rb'
-require_relative 'prediction.rb'
-require_relative 'batch.rb'
 require_relative 'helper.rb'
 include OpenTox
-
+=begin
 [
   "api.rb",
   "compound.rb",
@@ -18,7 +16,7 @@ include OpenTox
   "swagger.rb",
   "validation.rb"
 ].each{ |f| require_relative "./lib/#{f}" }
-
+=end
 
 configure :production, :development do
   STDOUT.sync = true  
@@ -26,8 +24,8 @@ configure :production, :development do
   $logger.level = Logger::DEBUG
   enable :reloader
   also_reload './helper.rb'
-  also_reload './prediction.rb'
-  also_reload './batch.rb'
+  also_reload './qmrf_report.rb'
+=begin
   [
     "api.rb",
     "compound.rb",
@@ -40,8 +38,9 @@ configure :production, :development do
     "swagger.rb",
     "validation.rb"
   ].each{ |f| also_reload "./lib/#{f}" }
+=end
 end
-
+=begin
 before do
   $paths = [
   "/",
@@ -62,11 +61,15 @@ before do
     @version = File.read("VERSION").chomp
   end
 end
+=end
+before do
+  @version = File.read("VERSION").chomp
+end
 
 not_found do
   redirect to('/predict')
 end
-
+=begin
 error do
   if request.path == "/" || $paths.include?(request.path.split("/")[1])
     @accept = request.env['HTTP_ACCEPT']
@@ -77,33 +80,40 @@ error do
     haml :error
   end
 end
+=end
 
+error do
+  @error = request.env['sinatra.error']
+  haml :error
+end
+=begin
 # https://github.com/britg/sinatra-cross_origin#responding-to-options
 options "*" do
   response.headers["Allow"] = "HEAD,GET,PUT,POST,DELETE,OPTIONS"
   response.headers["Access-Control-Allow-Headers"] = "X-Requested-With, X-HTTP-Method-Override, Content-Type, Cache-Control, Accept"
   200
 end
-
+=end
 get '/predict/?' do
   @models = OpenTox::Model::Validation.all
-  @models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/}
   @endpoints = @models.collect{|m| m.endpoint}.sort.uniq
-  if @models.count > 0
-    rodent_index = 0
-    @models.each_with_index{|model,idx| rodent_index = idx if model.species =~ /Rodent/}
-    @models.insert(rodent_index-1,@models.delete_at(rodent_index))
-  end
   @models.count > 0 ? (haml :predict) : (haml :info)
 end
 
 get '/predict/modeldetails/:model' do
   model = OpenTox::Model::Validation.find params[:model]
+  training_dataset = model.model.training_dataset
+  data_entries = training_dataset.data_entries
   crossvalidations = OpenTox::Validation::RepeatedCrossValidation.find(model.repeated_crossvalidation_id).crossvalidations
 
-  return haml :model_details, :layout=> false, :locals => {:model => model, :crossvalidations => crossvalidations}
+  return haml :model_details, :layout=> false, :locals => {:model => model, 
+                                                           :crossvalidations => crossvalidations, 
+                                                           :training_dataset => training_dataset,
+                                                           :data_entries => data_entries
+  }
 end
 
+#TODO fix update
 get "/predict/report/:id/?" do
   prediction_model = Model::Validation.find params[:id]
   bad_request_error "model with id: '#{params[:id]}' not found." unless prediction_model
@@ -119,6 +129,7 @@ get '/predict/jme_help/?' do
   File.read(File.join('views','jme_help.html'))
 end
 
+# download training dataset
 get '/predict/dataset/:name' do
   response['Content-Type'] = "text/csv"
   dataset = Dataset.find_by(:name=>params[:name])
@@ -129,226 +140,54 @@ get '/predict/dataset/:name' do
   send_file t.path, :filename => name, :type => "text/csv", :disposition => "attachment"
 end
 
-get '/predict/:tmppath/:filename/?' do
-  response['Content-Type'] = "text/csv"
-  path = "/tmp/#{params[:tmppath]}"
-  send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment"
-end
-
-get '/predict/csv/:task/:model/:filename/?' do
-  response['Content-Type'] = "text/csv"
-  filename = params[:filename] =~ /\.csv$/ ? params[:filename].gsub(/\.csv$/,"") : params[:filename]
-  task = Task.find params[:task].to_s
-  m = Model::Validation.find params[:model].to_s
-  dataset = Batch.find_by(:name => filename)
-  @ids = dataset.ids
-  warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n")
-  unless warnings.nil?
-    @parse = []
-    warnings.split("\n").each do |warning|
-      if warning =~ /^Cannot/
-        smi = warning.split("SMILES compound").last.split("at").first
-        line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i
-        @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n"
-      end
-    end
-    keys_array = []
-    warnings.split("\n").each do |warning|
-      if warning =~ /^Duplicate/
-        text = warning.split("ID").first
-        numbers = warning.split("ID").last.split("and")
-        keys_array << numbers.collect{|n| n.strip.to_i}
-      end
-    end
-    @dups = {}
-    keys_array.each do |keys|
-      keys.each do |key|
-        @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n"
-      end
-    end
-  end
-  endpoint = "#{m.endpoint}_(#{m.species})"
+# download batch predicton file
+get '/predict/batch/download/?' do
+  task = Task.find params[:tid]
+  prediction_dataset = Dataset.find task.dataset_id
+  filename = prediction_dataset.name
   tempfile = Tempfile.new
-  header = task.csv
-  lines = []
-  task.predictions[params[:model]].each_with_index do |hash,idx|
-    identifier = hash.keys[0]
-    prediction_id = hash.values[0]
-    # add duplicate warning at the end of a line if ID matches
-    if @dups && @dups[idx+1]
-      if prediction_id.is_a? BSON::ObjectId
-        if @ids.blank?
-          lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
-        else
-          lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
-        end
-      end
-    else
-      if prediction_id.is_a? BSON::ObjectId
-        if @ids.blank?
-          lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}"
-        else
-          lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}"
-        end
-      else
-        if @ids.blank?
-          lines << "#{idx+1},#{identifier},#{p}\n"
-        else
-          lines << "#{idx+1},#{@ids[idx]}#{identifier},#{p}\n"
-        end
-      end
-    end
-  end
-  (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join(""))
-  #tempfile.write(header+lines.join(""))
+  tempfile << prediction_dataset.to_csv
   tempfile.rewind
-  send_file tempfile, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{endpoint}_#{filename}.csv", :type => "text/csv", :disposition => "attachment"
+  response['Content-Type'] = "text/csv"
+  send_file tempfile, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{filename}.csv", :type => "text/csv", :disposition => "attachment"
 end
 
 post '/predict/?' do
   # process batch prediction
   if !params[:fileselect].blank?
-    next
     if params[:fileselect][:filename] !~ /\.csv$/
       bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a CSV file."
     end
     @filename = params[:fileselect][:filename]
-    begin
-      File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
-        f.write(params[:fileselect][:tempfile].read)
-      end
-      input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename])
-      $logger.debug "Processing '#{params[:fileselect][:filename]}'"
-      if input.class == OpenTox::Batch
-        @dataset = input
-        @compounds = @dataset.compounds
-        @identifiers = @dataset.identifiers
-        @ids = @dataset.ids
-      else
-        File.delete File.join("tmp", params[:fileselect][:filename])
-        bad_request_error "Could not serialize file '#{@filename}'."
-      end
-    rescue
-      File.delete File.join("tmp", params[:fileselect][:filename])
-      bad_request_error "Could not serialize file '#{@filename}'."
+    File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
+      f.write(params[:fileselect][:tempfile].read)
     end
-
-    if @compounds.size == 0
-      message = @dataset.warnings
-      @dataset.delete
-      bad_request_error message
-    end
-      
+    input = Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename])
+    $logger.debug "Processing '#{params[:fileselect][:filename]}'"
+    @compounds_size = input.compounds.size
     @models = params[:selection].keys
-    # for single predictions in batch
     @tasks = []
     @models.each{|m| t = Task.new; t.save; @tasks << t}
     @predictions = {}
-    task = Task.run do
-      @models.each_with_index do |model,idx|
-        t = @tasks[idx]
-        m = Model::Validation.find model
-        type = (m.regression? ? "Regression" : "Classification")
-        # add header for regression
-        if type == "Regression"
-          unit = (type == "Regression") ? "(#{m.unit})" : ""
-          converted_unit = (type == "Regression") ? "#{m.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : ""
-          if @ids.blank?
-            header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\
-            "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\
-            "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\
-            "inApplicabilityDomain,Note\n"
-          else
-            header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\
-            "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\
-            "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\
-            "inApplicabilityDomain,Note\n"
-          end
-        end
-        # add header for classification
-        if type == "Classification"
-          av = m.prediction_feature.accept_values
-          if @ids.blank?
-            header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\
-            "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n"
-          else
-            header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\
-            "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n"
-          end
-        end
-        # predict compounds
-        p = 100.0/@compounds.size
-        counter = 1
-        predictions = []
-        @compounds.each_with_index do |cid,idx|
-          compound = Compound.find cid
-          if Prediction.where(compound: compound.id, model: m.id).exists?
-            prediction_object = Prediction.find_by(compound: compound.id, model: m.id)
-            prediction = prediction_object.prediction
-            prediction_id = prediction_object.id
-            # in case prediction object was created by single prediction
-            if prediction_object.csv.blank?
-              prediction_object[:csv] = prediction_to_csv(m,compound,prediction)
-              prediction_object.save
-            end
-            # identifier
-            identifier = @identifiers[idx]
-          else
-            prediction = m.predict(compound)
-            # save prediction object
-            prediction_object = Prediction.new
-            prediction_id = prediction_object.id
-            prediction_object[:compound] = compound.id
-            prediction_object[:model] = m.id
-            # add additionally fields for html representation
-            unless prediction[:value].blank? || type == "Classification"
-              prediction[:prediction_value] = "#{prediction[:value].delog10.signif(3)} #{unit}"
-              prediction["converted_prediction_value"] = "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)} #{converted_unit}"
-            end
-            unless prediction[:prediction_interval].blank?
-              interval = prediction[:prediction_interval]
-              prediction[:interval] = "#{interval[1].delog10.signif(3)} - #{interval[0].delog10.signif(3)} #{unit}"
-              prediction[:converted_interval] = "#{compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{compound.mmol_to_mg(interval[0].delog10).signif(3)} #{converted_unit}"
-            end
-            prediction["unit"] = unit
-            prediction["converted_unit"] = converted_unit
-            if prediction[:measurements].is_a?(Array)
-              prediction["measurements_string"] = (type == "Regression") ? prediction[:measurements].collect{|value| "#{value.delog10.signif(3)} #{unit}"} : prediction[:measurements].join("</br>")
-              prediction["converted_measurements"] = prediction[:measurements].collect{|value| "#{compound.mmol_to_mg(value.delog10).signif(3)} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"} if type == "Regression"
-            else
-              output["measurements_string"] = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} #{unit}}" : prediction[:measurements]
-              output["converted_measurements"] = "#{compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if type == "Regression"
-            end
 
-            # store in prediction_object
-            prediction_object[:prediction] = prediction
-            prediction_object[:csv] = prediction_to_csv(m,compound,prediction)
-            prediction_object.save
-
-            # identifier
-            identifier = @identifiers[idx]
-          end
-          # collect prediction_object ids with identifier
-          predictions << {identifier => prediction_id}
-          t.update_percent((counter*p).ceil > 100 ? 100 : (counter*p).ceil)
-          counter += 1
-        end
-        # write csv
-        t[:csv] = header
-        # write predictions
-        @predictions["#{model}"] = predictions
-        # save task 
-        # append predictions as last action otherwise they won't save
-        # mongoid works with shallow copy via #dup
-        t[:predictions] = @predictions
+    maintask = Task.run do
+      @models.each_with_index do |model_id,idx|
+        t = @tasks[idx]
+        t.update_percent(1)
+        prediction = {}
+        model = Model::Validation.find model_id
+        t.update_percent(10)
+        prediction_dataset = model.predict input
+        t[:dataset_id] = prediction_dataset.id
+        t.update_percent(90)
+        prediction[model_id] = prediction_dataset.id.to_s
+        t[:predictions] = prediction
+        t[:csv] = prediction_dataset.to_csv
+        t.update_percent(100)
         t.save
-      end#models
-
-    end#main task
-    @pid = task.pid
-
-    #@dataset.delete
-    #File.delete File.join("tmp", params[:fileselect][:filename])
+      end
+    end
+    @pid = maintask.pid
     return haml :batch
   else
     # single compound prediction
@@ -357,30 +196,20 @@ post '/predict/?' do
       @identifier = params[:identifier].strip
       $logger.debug "input:#{@identifier}"
       # get compound from SMILES
-      @compound = Compound.from_smiles @identifier
-      bad_request_error "'#{@identifier}' is not a valid SMILES string." if @compound.blank?
-      
+      begin
+        @compound = Compound.from_smiles @identifier
+      rescue
+        @error = "'#{@identifier}' is not a valid SMILES string." unless @compound
+        return haml :error
+      end
       @models = []
       @predictions = []
-      @toxtree = false
       params[:selection].keys.each do |model_id|
         model = Model::Validation.find model_id
         @models << model
-        if Prediction.where(compound: @compound.id, model: model.id).exists?
-          prediction_object = Prediction.find_by(compound: @compound.id, model: model.id)
-          prediction = prediction_object.prediction
-          @predictions << prediction
-        else
-          prediction_object = Prediction.new
-          prediction = model.predict(@compound)
-          prediction_object[:compound] = @compound.id
-          prediction_object[:model] = model.id
-          prediction_object[:prediction] = prediction
-          prediction_object.save
-          @predictions << prediction
-        end
+        prediction = model.predict(@compound)
+        @predictions << prediction
       end
-
       haml :prediction
     end
   end
@@ -394,62 +223,33 @@ get '/prediction/task/?' do
     task = Task.find(params[:predictions])
     pageSize = params[:pageSize].to_i - 1
     pageNumber= params[:pageNumber].to_i - 1
-    predictions = task.predictions[params[:model]].collect{|hash| hash.values[0]}
-    prediction_object = Prediction.find predictions[pageNumber]
-    prediction = prediction_object.prediction
-    compound = Compound.find prediction_object.compound
-    model = Model::Validation.find prediction_object.model
-    image = compound.svg
-    smiles = compound.smiles
-    type = (model.regression? ? "Regression" : "Classification")
-    html = "<table class=\"table table-bordered single-batch\"><tr>"
-    html += "<td>#{image}</br>#{smiles}</br></td>"
-    string = "<td><table class=\"table\">"
-    sorter = []
-    if prediction[:info]
-      prediction[:info] = "This compound was part of the training dataset. All information from this compound was "\
-                          "removed from the training data before the prediction to obtain unbiased results."
-      sorter << {"Info" => prediction[:info]}
-      if prediction["measurements_string"].kind_of?(Array)
-        sorter << {"Measured activity" => "#{prediction["measurements_string"].join(";")}</br>#{prediction["converted_measurements"].join(";")}"}
+    csv = CSV.parse(task.csv)
+    header = csv.shift
+    string = "<td><table class=\"table table-bordered\">"
+    # find canonical smiles column
+    cansmi = 0
+    header.each_with_index do |h,idx|
+      cansmi = idx if h =~ /Canonical SMILES/
+      string += "<th>#{h}</th>"
+    end
+    string += "</tr>"
+    string += "<tr>"
+    csv[pageNumber].each_with_index do |line,idx|
+      if idx == cansmi
+        c = Compound.from_smiles line
+        string += "<td>#{line}</br>" \
+                  "<a class=\"btn btn-link\" data-id=\"link\" " \
+                  "data-remote=\"#{to("/prediction/#{c.id}/details")}\" data-toggle=\"modal\" " \
+                  "href=#details>" \
+                  "#{embedded_svg(c.svg, title: "click for details")}" \
+                  "</td>"
       else
-        sorter << {"Measured activity" => "#{prediction["measurements_string"]}</br>#{prediction["converted_measurements"]}"}
+        string += "<td>#{line.numeric? && line.include?(".") ? line.to_f.signif(3) : line}</td>"
       end
     end
-
-    # regression
-    if prediction[:value] && type == "Regression"
-      sorter << {"Prediction" => "#{prediction["prediction_value"]}</br>#{prediction["converted_prediction_value"]}"}
-      sorter << {"95% Prediction interval" => "#{prediction[:interval]}</br>#{prediction["converted_interval"]}"}
-      sorter << {"Warnings" => prediction[:warnings].join("</br>")}
-    elsif !prediction[:value] && type == "Regression"
-      sorter << {"Prediction" => ""}
-      sorter << {"95% Prediction interval" => ""}
-      sorter << {"Warnings" => prediction[:warnings].join("</br>")}
-    # classification
-    elsif prediction[:value] && type == "Classification"
-      sorter << {"Prediction" => prediction[:value]}
-      sorter << {"Probability" => prediction[:probabilities].collect{|k,v| "#{k}: #{v.signif(3)}"}.join("</br>")}
-    elsif !prediction[:value] && type == "Classification"
-      sorter << {"Prediction" => ""}
-      sorter << {"Probability" => ""}
-    #else
-      sorter << {"Warnings" => prediction[:warnings].join("</br>")}
-    end
-    sorter.each_with_index do |hash,idx|
-      k = hash.keys[0]
-      v = hash.values[0]
-      string += (idx == 0 ? "<tr class=\"hide-top\">" : "<tr>")+(k =~ /lazar/i ? "<td colspan=\"2\">" : "<td>")
-      # keyword
-      string += "#{k}:"
-      string += "</td><td>"
-      # values
-      string += "#{v}"
-      string += "</td></tr>"
-    end
+    string += "</tr>"
     string += "</table></td>"
-    html += "#{string}</tr></table>"
-    return JSON.pretty_generate(:prediction => [html])
+    return JSON.pretty_generate(:prediction => [string])
   end
 end
 
@@ -462,7 +262,7 @@ get '/prediction/:neighbor/details/?' do
   rescue
     @names = "No names for this compound available."
   end
-  @inchi = @compound.inchi.gsub("InChI=", "")
+  @inchi = @compound.inchi
 
   haml :details, :layout => false
 end
@@ -477,6 +277,10 @@ get '/predict/faq' do
   haml :faq#, :layout => false
 end
 
+get '/help' do
+  haml :help
+end
+
 get '/style.css' do
   headers 'Content-Type' => 'text/css; charset=utf-8'
   scss :style
author	gebele <gebele@in-silico.ch>	2019-05-28 14:25:52 +0000
committer	gebele <gebele@in-silico.ch>	2019-05-28 14:25:52 +0000
commit	5a3be4190688bc8240327930b3e953b09ecc9d9e (patch)
tree	c26ae148d18bcb0195a092b9339dbfcd16718a9e /application.rb
parent	750e91ae181c06c2f9d067d540d2d336274049b0 (diff)