allow tab separated smi files

author: gebele <gebele@in-silico.ch> 2018-09-24 14:33:43 +0000
committer: gebele <gebele@in-silico.ch> 2018-09-24 14:33:43 +0000
commit: 9b28faf75acb5a74ac0fac37433fbed4d1d39c7c (patch)
tree: ba0c5faf576933cb92455ec060430b3f56816f97
parent: 427d0fc4e4190f201ff6a3a1ee29c6f6bf56bc56 (diff)
4 files changed, 48 insertions, 41 deletions
diff --git a/application.rb b/application.rb
index f30361e..e6ca7c2 100644
--- a/application.rb
+++ b/application.rb
@@ -38,7 +38,6 @@ get '/predict/?' do
   @existing_datasets = dataset_storage
   @models = Model::Validation.all
   @models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/}
-  #endpoints = @models.collect{|m| m.endpoint =~ /LOAEL/ ? m.endpoint+" (lazar)" : m.endpoint}
   endpoints = @models.collect{|m| m.endpoint if m.endpoint != "Mutagenicity"}.compact
   endpoints << "Oral toxicity (Cramer rules)"
   endpoints << "Lowest observed adverse effect level (LOAEL) (Mazzatorta)"
@@ -173,21 +172,21 @@ get '/download/dataset/:id' do
   response['Content-Type'] = "text/csv"
   dataset = Batch.find params[:id]
   tempfile = Tempfile.new
-  tempfile.write(File.read("tmp/"+dataset.name+".csv"))
+  tempfile.write(File.read(dataset.source))
   tempfile.rewind
-  send_file tempfile, :filename => dataset.name+".csv", :type => "text/csv", :disposition => "attachment"
+  send_file tempfile, :filename => dataset.source, :type => (dataset.source =~ /\.smi$/ ? "chemical/x-daylight-smiles" : "text/csv"), :disposition => "attachment"
 end
 
 get '/delete/dataset/:id' do
   dataset = Batch.find params[:id]
   dataset.delete
-  File.delete File.join("tmp/"+dataset.name+".csv")
+  File.delete File.join(dataset.source)
   redirect to("/")
 end
 
 get '/predict/csv/:task/:model/:filename/?' do
   response['Content-Type'] = "text/csv"
-  filename = params[:filename] =~ /\.csv$/ ? params[:filename].gsub(/\.csv$/,"") : params[:filename]
+  filename = params[:filename]
   task = Task.find params[:task].to_s
   m = Model::Validation.find params[:model].to_s unless params[:model] =~ /Cramer|Mazzatorta/
   dataset = Batch.find_by(:name => filename)
@@ -218,7 +217,7 @@ get '/predict/csv/:task/:model/:filename/?' do
     end
   end
   if params[:model] == "Mazzatorta"
-    endpoint = "Lowest observed adverse effect level (LOAEL) (Rats) (Mazzatorta)"
+    endpoint = "Lowest observed adverse effect level (LOAEL) (Rat) (Mazzatorta)"
   elsif params[:model] == "Cramer"
     endpoint = "Oral_toxicity_(Cramer_rules)"
   else
@@ -293,38 +292,32 @@ post '/predict/?' do
       @filename = @dataset.name
     end
     if !params[:fileselect].blank?
-      if params[:fileselect][:filename] !~ /\.csv$/
-        bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a CSV file."
+      if params[:fileselect][:filename] !~ /\.csv$|\.smi$/
+        bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a .csv or .smi file."
       end
-      @filename = params[:fileselect][:filename]
-      begin
-        @dataset = Batch.find_by(:name => params[:fileselect][:filename].sub(/\.csv$/,""))
-        if @dataset
-          $logger.debug "Take file from database."
+      @filename = params[:fileselect][:filename].gsub(/\.csv$|\.smi$/,"")
+      @dataset = Batch.find_by(:name => @filename)
+      if @dataset
+        $logger.debug "Take file from database."
+        @compounds = @dataset.compounds
+        @identifiers = @dataset.identifiers
+        @ids = @dataset.ids
+      else
+        File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
+          f.write(params[:fileselect][:tempfile].read)
+        end
+        input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename])
+        $logger.debug "Processing '#{params[:fileselect][:filename]}'"
+        if input.class == OpenTox::Batch
+          @dataset = input
           @compounds = @dataset.compounds
           @identifiers = @dataset.identifiers
           @ids = @dataset.ids
         else
-          File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
-            f.write(params[:fileselect][:tempfile].read)
-          end
-          input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename])
-          $logger.debug "Processing '#{params[:fileselect][:filename]}'"
-          if input.class == OpenTox::Batch
-            @dataset = input
-            @compounds = @dataset.compounds
-            @identifiers = @dataset.identifiers
-            @ids = @dataset.ids
-          else
-            File.delete File.join("tmp", params[:fileselect][:filename])
-            bad_request_error "Could not serialize file '#{@filename}'."
-          end
+          File.delete File.join("tmp", params[:fileselect][:filename])
+          bad_request_error "Could not serialize file '#{@filename}'."
         end
-      rescue
-        File.delete File.join("tmp", params[:fileselect][:filename])
-        bad_request_error "Could not serialize file '#{@filename}'."
       end
-
       if @compounds.size == 0
         message = @dataset.warnings
         @dataset.delete
@@ -455,7 +448,7 @@ post '/predict/?' do
           compounds = @compounds.collect{|cid| c = Compound.find cid; c.smiles}
           prediction = LoaelMazzatorta.predict(compounds)
           output = {}
-          output["model_name"] = "Lowest observed adverse effect level (LOAEL) (Rats) (Mazzatorta)"
+          output["model_name"] = "Lowest observed adverse effect level (LOAEL) (Rat) (Mazzatorta)"
           output["mazzatorta"] = []
           #output["mazzatorta"] = prediction
           # header
diff --git a/batch.rb b/batch.rb
index 2e72396..02fca2a 100644
--- a/batch.rb
+++ b/batch.rb
@@ -1,6 +1,10 @@
 require 'csv'
 require 'tempfile'
 
+def has_tab?(line)
+  !!(line =~ /\t/)
+end
+
 module OpenTox
 
   class Batch
@@ -24,7 +28,13 @@ module OpenTox
         $logger.debug "Skipping import of #{file}, it is already in the database (id: #{batch.id})."
       else
         $logger.debug "Parsing #{file}."
-        table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8'
+        # check delimiter
+        line = File.readlines(file).first
+        if has_tab?(line)
+          table = CSV.read file, :col_sep => "\t", :skip_blanks => true, :encoding => 'windows-1251:utf-8'
+        else
+          table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8'
+        end
         batch = self.new(:source => source, :name => name, :identifiers => [], :ids => [], :compounds => [])
 
         # original IDs
@@ -37,7 +47,11 @@ module OpenTox
         feature_names = table.shift.collect{|f| f.strip}
         warnings << "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size
         compound_format = feature_names.shift.strip
-        bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i
+        unless compound_format =~ /SMILES|InChI/i
+          File.delete file
+          bad_request_error "'#{compound_format}' is not a supported compound format in the header. " \
+          "Accepted formats: SMILES, InChI. Please take a look on the help page."
+        end
         numeric = []
         features = []
         # guess feature types
diff --git a/views/help.haml b/views/help.haml
index 267f0ce..0e54eff 100644
--- a/views/help.haml
+++ b/views/help.haml
@@ -2,7 +2,7 @@
   %h3 How to use batch prediction
 
   %p
-    You have two options to format your comma sperated spreadsheet (CSV) for batch predictions:
+    You have two options to format your comma or tab sperated spreadsheet for batch predictions:
 
   %br
   %p
@@ -66,7 +66,7 @@
               ="..."
   %br
   %p
-    Uploaded CSV files will be stored until you delete them. You can repeat the prediction by selecting a file in the list of uploads.
+    Uploaded files will be stored until you delete them. You can repeat the prediction by selecting a file in the list of uploads.
     %br
     If you upload a file with the same name as one of the existing files, the existing file will be used. 
     If you want to replace an existing file delete it first!
diff --git a/views/predict.haml b/views/predict.haml
index 0afcc99..a776a93 100644
--- a/views/predict.haml
+++ b/views/predict.haml
@@ -142,14 +142,14 @@
           %input{:type => 'text', :name => 'identifier', :id => 'identifier', :size => '60'}
           %p
           %label{:for=>"fileselect"}
-            or upload a CSV file for batch predictions
+            or upload a CSV or SMI file for batch predictions
           %br
           %span.btn.btn-default.btn-file
-            %input{:type=>"file", :name=> "fileselect", :id=>"fileselect", :autocomplete=>"off", :accept=>"text/csv"}
+            %input{:type=>"file", :name=> "fileselect", :id=>"fileselect", :autocomplete=>"off", :accept=>"text/csv,chemical/x-daylight-smiles"}
           %a.btn.btn-warning{:href => to("/help"), :rel => "external", :style=>"margin-left: 1em;"} Help
       %div.col-md-6
         - if !@existing_datasets.blank?
-          %label{:for=>"storage"} or select an uploaded CSV file
+          %label{:for=>"storage"} or select an uploaded file
           #storage.storage-list
             - @existing_datasets.each do |id,values|
               %div.p2
@@ -187,7 +187,7 @@
         %div{:id=>endpoint.gsub(/\s+/, "_")}
           %h4.head-back=endpoint
           - if endpoint !~ /^Oral|Mazzatorta/
-            - @models.select{|m| endpoint = endpoint.gsub("(lazar)","").strip if endpoint =~ /lazar/; m.endpoint == endpoint}.each do |model|
+            - @models.select{|m| m.endpoint == endpoint}.each do |model|
               %div.row{:id => model.id,:style=>"margin-bottom:1em;"}
                 %span.col-lg-4.col-md-4.col-sm-4.col-xs-4
                   %input.check{:type => "checkbox", :name => "selection[#{model.id}]", :id => "selection[#{model.species.gsub(/\s+/, "_")}]", :value => true, :disabled => false}
@@ -234,7 +234,7 @@
               %span.col-lg-4.col-md-4.col-sm-4.col-xs-4
                 %input.check{:type => "checkbox", :name => "selection[Mazzatorta]", :id => "selection[Mazzatorta]", :value => true, :disabled => false}
                 %label{:for => "selection[Mazzatorta]"}
-                  Rats
+                  Rat
   %fieldset#bottom.well
     %div.row
       %div.col-md-2
author	gebele <gebele@in-silico.ch>	2018-09-24 14:33:43 +0000
committer	gebele <gebele@in-silico.ch>	2018-09-24 14:33:43 +0000
commit	9b28faf75acb5a74ac0fac37433fbed4d1d39c7c (patch)
tree	ba0c5faf576933cb92455ec060430b3f56816f97
parent	427d0fc4e4190f201ff6a3a1ee29c6f6bf56bc56 (diff)