From 9b28faf75acb5a74ac0fac37433fbed4d1d39c7c Mon Sep 17 00:00:00 2001 From: gebele Date: Mon, 24 Sep 2018 14:33:43 +0000 Subject: allow tab separated smi files --- application.rb | 57 ++++++++++++++++++++++++------------------------------ batch.rb | 18 +++++++++++++++-- views/help.haml | 4 ++-- views/predict.haml | 10 +++++----- 4 files changed, 48 insertions(+), 41 deletions(-) diff --git a/application.rb b/application.rb index f30361e..e6ca7c2 100644 --- a/application.rb +++ b/application.rb @@ -38,7 +38,6 @@ get '/predict/?' do @existing_datasets = dataset_storage @models = Model::Validation.all @models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/} - #endpoints = @models.collect{|m| m.endpoint =~ /LOAEL/ ? m.endpoint+" (lazar)" : m.endpoint} endpoints = @models.collect{|m| m.endpoint if m.endpoint != "Mutagenicity"}.compact endpoints << "Oral toxicity (Cramer rules)" endpoints << "Lowest observed adverse effect level (LOAEL) (Mazzatorta)" @@ -173,21 +172,21 @@ get '/download/dataset/:id' do response['Content-Type'] = "text/csv" dataset = Batch.find params[:id] tempfile = Tempfile.new - tempfile.write(File.read("tmp/"+dataset.name+".csv")) + tempfile.write(File.read(dataset.source)) tempfile.rewind - send_file tempfile, :filename => dataset.name+".csv", :type => "text/csv", :disposition => "attachment" + send_file tempfile, :filename => dataset.source, :type => (dataset.source =~ /\.smi$/ ? "chemical/x-daylight-smiles" : "text/csv"), :disposition => "attachment" end get '/delete/dataset/:id' do dataset = Batch.find params[:id] dataset.delete - File.delete File.join("tmp/"+dataset.name+".csv") + File.delete File.join(dataset.source) redirect to("/") end get '/predict/csv/:task/:model/:filename/?' do response['Content-Type'] = "text/csv" - filename = params[:filename] =~ /\.csv$/ ? params[:filename].gsub(/\.csv$/,"") : params[:filename] + filename = params[:filename] task = Task.find params[:task].to_s m = Model::Validation.find params[:model].to_s unless params[:model] =~ /Cramer|Mazzatorta/ dataset = Batch.find_by(:name => filename) @@ -218,7 +217,7 @@ get '/predict/csv/:task/:model/:filename/?' do end end if params[:model] == "Mazzatorta" - endpoint = "Lowest observed adverse effect level (LOAEL) (Rats) (Mazzatorta)" + endpoint = "Lowest observed adverse effect level (LOAEL) (Rat) (Mazzatorta)" elsif params[:model] == "Cramer" endpoint = "Oral_toxicity_(Cramer_rules)" else @@ -293,38 +292,32 @@ post '/predict/?' do @filename = @dataset.name end if !params[:fileselect].blank? - if params[:fileselect][:filename] !~ /\.csv$/ - bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a CSV file." + if params[:fileselect][:filename] !~ /\.csv$|\.smi$/ + bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a .csv or .smi file." end - @filename = params[:fileselect][:filename] - begin - @dataset = Batch.find_by(:name => params[:fileselect][:filename].sub(/\.csv$/,"")) - if @dataset - $logger.debug "Take file from database." + @filename = params[:fileselect][:filename].gsub(/\.csv$|\.smi$/,"") + @dataset = Batch.find_by(:name => @filename) + if @dataset + $logger.debug "Take file from database." + @compounds = @dataset.compounds + @identifiers = @dataset.identifiers + @ids = @dataset.ids + else + File.open('tmp/' + params[:fileselect][:filename], "w") do |f| + f.write(params[:fileselect][:tempfile].read) + end + input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename]) + $logger.debug "Processing '#{params[:fileselect][:filename]}'" + if input.class == OpenTox::Batch + @dataset = input @compounds = @dataset.compounds @identifiers = @dataset.identifiers @ids = @dataset.ids else - File.open('tmp/' + params[:fileselect][:filename], "w") do |f| - f.write(params[:fileselect][:tempfile].read) - end - input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename]) - $logger.debug "Processing '#{params[:fileselect][:filename]}'" - if input.class == OpenTox::Batch - @dataset = input - @compounds = @dataset.compounds - @identifiers = @dataset.identifiers - @ids = @dataset.ids - else - File.delete File.join("tmp", params[:fileselect][:filename]) - bad_request_error "Could not serialize file '#{@filename}'." - end + File.delete File.join("tmp", params[:fileselect][:filename]) + bad_request_error "Could not serialize file '#{@filename}'." end - rescue - File.delete File.join("tmp", params[:fileselect][:filename]) - bad_request_error "Could not serialize file '#{@filename}'." end - if @compounds.size == 0 message = @dataset.warnings @dataset.delete @@ -455,7 +448,7 @@ post '/predict/?' do compounds = @compounds.collect{|cid| c = Compound.find cid; c.smiles} prediction = LoaelMazzatorta.predict(compounds) output = {} - output["model_name"] = "Lowest observed adverse effect level (LOAEL) (Rats) (Mazzatorta)" + output["model_name"] = "Lowest observed adverse effect level (LOAEL) (Rat) (Mazzatorta)" output["mazzatorta"] = [] #output["mazzatorta"] = prediction # header diff --git a/batch.rb b/batch.rb index 2e72396..02fca2a 100644 --- a/batch.rb +++ b/batch.rb @@ -1,6 +1,10 @@ require 'csv' require 'tempfile' +def has_tab?(line) + !!(line =~ /\t/) +end + module OpenTox class Batch @@ -24,7 +28,13 @@ module OpenTox $logger.debug "Skipping import of #{file}, it is already in the database (id: #{batch.id})." else $logger.debug "Parsing #{file}." - table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8' + # check delimiter + line = File.readlines(file).first + if has_tab?(line) + table = CSV.read file, :col_sep => "\t", :skip_blanks => true, :encoding => 'windows-1251:utf-8' + else + table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8' + end batch = self.new(:source => source, :name => name, :identifiers => [], :ids => [], :compounds => []) # original IDs @@ -37,7 +47,11 @@ module OpenTox feature_names = table.shift.collect{|f| f.strip} warnings << "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size compound_format = feature_names.shift.strip - bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i + unless compound_format =~ /SMILES|InChI/i + File.delete file + bad_request_error "'#{compound_format}' is not a supported compound format in the header. " \ + "Accepted formats: SMILES, InChI. Please take a look on the help page." + end numeric = [] features = [] # guess feature types diff --git a/views/help.haml b/views/help.haml index 267f0ce..0e54eff 100644 --- a/views/help.haml +++ b/views/help.haml @@ -2,7 +2,7 @@ %h3 How to use batch prediction %p - You have two options to format your comma sperated spreadsheet (CSV) for batch predictions: + You have two options to format your comma or tab sperated spreadsheet for batch predictions: %br %p @@ -66,7 +66,7 @@ ="..." %br %p - Uploaded CSV files will be stored until you delete them. You can repeat the prediction by selecting a file in the list of uploads. + Uploaded files will be stored until you delete them. You can repeat the prediction by selecting a file in the list of uploads. %br If you upload a file with the same name as one of the existing files, the existing file will be used. If you want to replace an existing file delete it first! diff --git a/views/predict.haml b/views/predict.haml index 0afcc99..a776a93 100644 --- a/views/predict.haml +++ b/views/predict.haml @@ -142,14 +142,14 @@ %input{:type => 'text', :name => 'identifier', :id => 'identifier', :size => '60'} %p %label{:for=>"fileselect"} - or upload a CSV file for batch predictions + or upload a CSV or SMI file for batch predictions %br %span.btn.btn-default.btn-file - %input{:type=>"file", :name=> "fileselect", :id=>"fileselect", :autocomplete=>"off", :accept=>"text/csv"} + %input{:type=>"file", :name=> "fileselect", :id=>"fileselect", :autocomplete=>"off", :accept=>"text/csv,chemical/x-daylight-smiles"} %a.btn.btn-warning{:href => to("/help"), :rel => "external", :style=>"margin-left: 1em;"} Help %div.col-md-6 - if !@existing_datasets.blank? - %label{:for=>"storage"} or select an uploaded CSV file + %label{:for=>"storage"} or select an uploaded file #storage.storage-list - @existing_datasets.each do |id,values| %div.p2 @@ -187,7 +187,7 @@ %div{:id=>endpoint.gsub(/\s+/, "_")} %h4.head-back=endpoint - if endpoint !~ /^Oral|Mazzatorta/ - - @models.select{|m| endpoint = endpoint.gsub("(lazar)","").strip if endpoint =~ /lazar/; m.endpoint == endpoint}.each do |model| + - @models.select{|m| m.endpoint == endpoint}.each do |model| %div.row{:id => model.id,:style=>"margin-bottom:1em;"} %span.col-lg-4.col-md-4.col-sm-4.col-xs-4 %input.check{:type => "checkbox", :name => "selection[#{model.id}]", :id => "selection[#{model.species.gsub(/\s+/, "_")}]", :value => true, :disabled => false} @@ -234,7 +234,7 @@ %span.col-lg-4.col-md-4.col-sm-4.col-xs-4 %input.check{:type => "checkbox", :name => "selection[Mazzatorta]", :id => "selection[Mazzatorta]", :value => true, :disabled => false} %label{:for => "selection[Mazzatorta]"} - Rats + Rat %fieldset#bottom.well %div.row %div.col-md-2 -- cgit v1.2.3