before new batch

author: gebele <gebele@in-silico.ch> 2017-09-25 11:29:26 +0000
committer: gebele <gebele@in-silico.ch> 2017-09-25 11:29:26 +0000
commit: e22513f460eeb42af5164537a7ecea9d21035cea (patch)
tree: 1a36c019ca4fce7e92c994ae9587f267b072974c /application.rb
parent: 437b78faf4bcd820ea3193da18abf6b71eda8153 (diff)
1 files changed, 194 insertions, 74 deletions
diff --git a/application.rb b/application.rb
index eefd833..70eecb5 100644
--- a/application.rb
+++ b/application.rb
@@ -1,38 +1,61 @@
-#require_relative 'helper.rb'
-require 'rdiscount'
 include OpenTox
 
-configure :development do
+
+configure :production do
   $logger = Logger.new(STDOUT)
+  enable :reloader
 end
 
-helpers do
-  class Numeric
-    def percent_of(n)
-      self.to_f / n.to_f * 100.0
-    end
-  end
-
+configure :development do
+  $logger = Logger.new(STDOUT)
+  enable :reloader
 end
 
 before do
   @version = File.read("VERSION").chomp
 end
 
+not_found do
+  redirect to('/predict')
+end
+
+error do
+  @error = request.env['sinatra.error']
+  haml :error
+end
+
+helpers do
+  def embedded_svg image, options={}
+    doc = Nokogiri::HTML::DocumentFragment.parse image
+    svg = doc.at_css 'svg'
+    title = doc.at_css 'title'
+    if options[:class].present?
+      svg['class'] = options[:class]
+    end
+    if options[:title].present?
+      title.children.remove
+      text_node = Nokogiri::XML::Text.new(options[:title], doc)
+      title.add_child(text_node)
+    end
+    doc.to_html.html_safe
+  end
+end
+
 get '/?' do
   redirect to('/predict') 
 end
 
 get '/predict/?' do
-  @models = OpenTox::Model::Prediction.all
+  @models = Model::Validation.all
   @models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/}
   @endpoints = @models.collect{|m| m.endpoint}.sort.uniq
+  @endpoints << "Oral toxicity (Cramer rules)"
   @models.count <= 0 ? (haml :info) : (haml :predict)
 end
 
 get '/predict/modeldetails/:model' do
-  model = OpenTox::Model::Prediction.find params[:model]
-  crossvalidations = OpenTox::Validation::RepeatedCrossValidation.find(model.repeated_crossvalidation_id).crossvalidations
+  model = Model::Validation.find params[:model]
+  crossvalidations = Validation::RepeatedCrossValidation.find(model.repeated_crossvalidation_id).crossvalidations
 
   return haml :model_details, :layout=> false, :locals => {:model => model, :crossvalidations => crossvalidations}
 end
@@ -48,43 +71,10 @@ get '/predict/dataset/:name' do
   csv
 end
 
-get '/predict/?:csv?' do
+get '/predict/:tmppath/:filename/?' do
   response['Content-Type'] = "text/csv"
-  @csv = "\"Compound\",\"Endpoint\",\"Type\",\"Prediction\",\"95% Prediction interval\"\n"
-  @@batch.each do |key, values|
-    compound = key
-    smiles = compound.smiles
-    values.each do |array|
-      model = array[0]
-      type = model.model.class.to_s.match("Classification") ? "Classification" : "Regression"
-      prediction = array[1]
-      endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
-      if prediction[:confidence] == "measured"
-        if prediction[:value].is_a?(Array)
-          prediction[:value].each do |value|
-            pred = value.numeric? ? "#{value} (#{model.unit}), #{compound.mmol_to_mg(value.delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : value
-            int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
-            interval = (int.nil? ? "--" : "#{int[1].delog10} - #{int[0].delog10} (#{model.unit})")
-            @csv += "\"#{smiles}\",\"#{endpoint}\",\"#{type}\",\"#{pred}\",\"#{interval}\"\n"
-          end
-        else
-          pred = prediction[:value].numeric? ? "#{prediction[:value]} (#{model.unit}), #{compound.mmol_to_mg(prediction[:value].delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
-          confidence = "measured activity"
-        end
-      elsif prediction[:neighbors].size > 0
-        type = model.model.class.to_s.match("Classification") ? "Classification" : "Regression"
-        pred = prediction[:value].numeric? ? "#{prediction[:value].delog10} (#{model.unit}), #{compound.mmol_to_mg(prediction[:value].delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
-        int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
-        interval = (int.nil? ? "--" : "#{int[1].delog10} - #{int[0].delog10} (#{model.unit})")
-      else
-        type = ""
-        pred = "Not enough similar compounds in training dataset."
-        interval = ""
-      end
-      @csv += "\"#{smiles}\",\"#{endpoint}\",\"#{type}\",\"#{pred}\",\"#{interval}\"\n" unless prediction[:value].is_a?(Array)
-    end
-  end
-  @csv
+  path = "/tmp/#{params[:tmppath]}"
+  send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment"
 end
 
 post '/predict/?' do
@@ -92,66 +82,196 @@ post '/predict/?' do
   # process batch prediction
   if !params[:fileselect].blank?
     if params[:fileselect][:filename] !~ /\.csv$/
-      @error_report = "Please submit a csv file."
-      return haml :error
+      bad_request_error "Please submit a csv file."
     end
     File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
       f.write(params[:fileselect][:tempfile].read)
     end
     @filename = params[:fileselect][:filename]
     begin
-      input = OpenTox::Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true
+      input = Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true
       if input.class == OpenTox::Dataset
-        dataset = OpenTox::Dataset.find input
+        dataset = Dataset.find input
       else
-        @error_report = "Could not serialize file '#{@filename}' ."
-        return haml :error
+        bad_request_error "Could not serialize file '#{@filename}'."
       end
     rescue
-      @error_report = "Could not serialize file '#{@filename}' ."
-      return haml :error
+      bad_request_error "Could not serialize file '#{@filename}'."
     end
     @compounds = dataset.compounds
     if @compounds.size == 0
-      @error_report = dataset[:warnings]
+      message = dataset[:warnings]
       dataset.delete
-      return haml :error
+      bad_request_error message
     end
+
+    # for csv export
     @batch = {}
-    @compounds.each do |compound|
-      @batch[compound] = []
-      params[:selection].keys.each do |model_id|
-        model = Model::Prediction.find model_id
+    # for haml table
+    @view = {}
+
+    @compounds.each{|c| @view[c] = []}
+    params[:selection].keys.each do |model_id|
+      model = Model::Validation.find model_id
+      @batch[model] = []
+      @compounds.each_with_index do |compound,idx|
         prediction = model.predict(compound)
-        @batch[compound] << [model, prediction]
+        @batch[model] << [compound, prediction]
+        @view[compound] << [model,prediction]
       end
     end
-    @@batch = @batch
+
+    @csvhash = {}
     @warnings = dataset[:warnings]
+    dupEntries = {}
+    delEntries = ""
+    
+    # split duplicates and deleted entries
+    @warnings.each do |w|
+      substring = w.match(/line .* of/)
+      unless substring.nil?
+        delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n"
+      end
+      substring = w.match(/rows .* Entries/)
+      unless substring.nil?
+        lines = []
+        substring[0].split(",").each{|s| lines << s[/\d+/]}
+        lines.shift
+        lines.each{|l| dupEntries[l.to_i] = w.split(".").first}
+      end
+    end
+
+    @batch.each_with_index do |hash, idx|
+      @csvhash[idx] = ""
+      model = hash[0]
+      # create header
+      if model.regression?
+        predAunit = "(#{model.unit})"
+        predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})"
+        @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
+      else #classification
+        av = model.prediction_feature.accept_values
+        probFirst = av[0].capitalize
+        probLast = av[1].capitalize
+        @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
+      end
+      values = hash[1]
+      dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact!
+      
+      values.each_with_index do |array, id|
+        type = (model.regression? ? "Regression" : "Classification")
+        endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
+      
+        if id == 0
+          @csvhash[idx] += delEntries unless delEntries.blank?
+        end
+        unless array.kind_of? String
+          compound = array[0]
+          prediction = array[1]
+          smiles = compound.smiles
+          
+          if prediction[:neighbors]
+            if prediction[:value]
+              pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
+              predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
+              predAunit = prediction[:value].numeric? ? "(#{model.unit})" : ""
+              predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value]
+              predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : ""
+              int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
+              intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}")
+              intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}")
+              intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}")
+              intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}")
+              inApp = "yes"
+              inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
+              note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
+              
+              unless prediction[:probabilities].nil?
+                av = model.prediction_feature.accept_values
+                propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}"
+                propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}"
+              end
+            else
+              # no prediction value only one neighbor
+              inApp = "no"
+              inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
+              note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
+            end
+          else
+            # no prediction value
+            inApp = "no"
+            inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
+            note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
+          end
+          if @warnings
+            @warnings.each do |w|
+              note += (w.split(".").first + ".") if /\b(#{Regexp.escape(smiles)})\b/ === w
+            end
+          end
+        else
+          # string note for duplicates
+          endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = ""
+          note = array
+        end
+        if model.regression?
+          @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
+        else
+          @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
+        end
+      end
+    end
+    t = Tempfile.new
+    @csvhash.each do |model, csv|
+      t.write(csv)
+      t.write("\n")
+    end
+    t.rewind
+    @tmppath = t.path.split("/").last
+
     dataset.delete
     File.delete File.join("tmp", params[:fileselect][:filename])
     return haml :batch
   end
 
   # validate identifier input
-  # transfered input
   if !params[:identifier].blank?
     @identifier = params[:identifier]
     $logger.debug "input:#{@identifier}"
     # get compound from SMILES
     @compound = Compound.from_smiles @identifier
-    if @compound.blank?
-      @error_report = "'#{@identifier}' is not a valid SMILES string."
-      return haml :error
-    end
-
+    bad_request_error "'#{@identifier}' is not a valid SMILES string." if @compound.blank?
+    
     @models = []
     @predictions = []
+    @toxtree = false
     params[:selection].keys.each do |model_id|
-      model = Model::Prediction.find model_id
-      @models << model
-      @predictions << model.predict(@compound)
+      if model_id == "Cramer"
+        @toxtree = true
+        @predictions << [Toxtree.predict(@compound.smiles, "Cramer rules"), Toxtree.predict(@compound.smiles, "Cramer rules with extensions")]
+      else
+        model = Model::Validation.find model_id
+        @models << model
+        if model.model.name =~ /kazius/
+          sa_prediction = KaziusAlerts.predict(@compound.smiles)
+          lazar_mutagenicity = model.predict(@compound)
+          confidence = 0
+          lazar_mutagenicity_val = (lazar_mutagenicity[:value] == "non-mutagenic" ? false : true)
+          if sa_prediction[:prediction] == false && lazar_mutagenicity_val == false
+            confidence = 0.85
+          elsif sa_prediction[:prediction] == true && lazar_mutagenicity_val == true
+            confidence = 0.85 * ( 1 - sa_prediction[:error_product] )
+          elsif sa_prediction[:prediction] == false && lazar_mutagenicity_val == true
+            confidence = 0.11
+          elsif sa_prediction[:prediction] == true && lazar_mutagenicity_val == false
+            confidence = ( 1 - sa_prediction[:error_product] ) - 0.57
+          end
+          @predictions << [lazar_mutagenicity, {:prediction => sa_prediction, :confidence => confidence}]
+        else
+          @predictions << model.predict(@compound)
+        end
+      end
     end
+
     haml :prediction
   end
 end
author	gebele <gebele@in-silico.ch>	2017-09-25 11:29:26 +0000
committer	gebele <gebele@in-silico.ch>	2017-09-25 11:29:26 +0000
commit	e22513f460eeb42af5164537a7ecea9d21035cea (patch)
tree	1a36c019ca4fce7e92c994ae9587f267b072974c /application.rb
parent	437b78faf4bcd820ea3193da18abf6b71eda8153 (diff)