summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2018-09-24 14:33:43 +0000
committergebele <gebele@in-silico.ch>2018-09-24 14:33:43 +0000
commit9b28faf75acb5a74ac0fac37433fbed4d1d39c7c (patch)
treeba0c5faf576933cb92455ec060430b3f56816f97
parent427d0fc4e4190f201ff6a3a1ee29c6f6bf56bc56 (diff)
allow tab separated smi files
-rw-r--r--application.rb57
-rw-r--r--batch.rb18
-rw-r--r--views/help.haml4
-rw-r--r--views/predict.haml10
4 files changed, 48 insertions, 41 deletions
diff --git a/application.rb b/application.rb
index f30361e..e6ca7c2 100644
--- a/application.rb
+++ b/application.rb
@@ -38,7 +38,6 @@ get '/predict/?' do
@existing_datasets = dataset_storage
@models = Model::Validation.all
@models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/}
- #endpoints = @models.collect{|m| m.endpoint =~ /LOAEL/ ? m.endpoint+" (lazar)" : m.endpoint}
endpoints = @models.collect{|m| m.endpoint if m.endpoint != "Mutagenicity"}.compact
endpoints << "Oral toxicity (Cramer rules)"
endpoints << "Lowest observed adverse effect level (LOAEL) (Mazzatorta)"
@@ -173,21 +172,21 @@ get '/download/dataset/:id' do
response['Content-Type'] = "text/csv"
dataset = Batch.find params[:id]
tempfile = Tempfile.new
- tempfile.write(File.read("tmp/"+dataset.name+".csv"))
+ tempfile.write(File.read(dataset.source))
tempfile.rewind
- send_file tempfile, :filename => dataset.name+".csv", :type => "text/csv", :disposition => "attachment"
+ send_file tempfile, :filename => dataset.source, :type => (dataset.source =~ /\.smi$/ ? "chemical/x-daylight-smiles" : "text/csv"), :disposition => "attachment"
end
get '/delete/dataset/:id' do
dataset = Batch.find params[:id]
dataset.delete
- File.delete File.join("tmp/"+dataset.name+".csv")
+ File.delete File.join(dataset.source)
redirect to("/")
end
get '/predict/csv/:task/:model/:filename/?' do
response['Content-Type'] = "text/csv"
- filename = params[:filename] =~ /\.csv$/ ? params[:filename].gsub(/\.csv$/,"") : params[:filename]
+ filename = params[:filename]
task = Task.find params[:task].to_s
m = Model::Validation.find params[:model].to_s unless params[:model] =~ /Cramer|Mazzatorta/
dataset = Batch.find_by(:name => filename)
@@ -218,7 +217,7 @@ get '/predict/csv/:task/:model/:filename/?' do
end
end
if params[:model] == "Mazzatorta"
- endpoint = "Lowest observed adverse effect level (LOAEL) (Rats) (Mazzatorta)"
+ endpoint = "Lowest observed adverse effect level (LOAEL) (Rat) (Mazzatorta)"
elsif params[:model] == "Cramer"
endpoint = "Oral_toxicity_(Cramer_rules)"
else
@@ -293,38 +292,32 @@ post '/predict/?' do
@filename = @dataset.name
end
if !params[:fileselect].blank?
- if params[:fileselect][:filename] !~ /\.csv$/
- bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a CSV file."
+ if params[:fileselect][:filename] !~ /\.csv$|\.smi$/
+ bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a .csv or .smi file."
end
- @filename = params[:fileselect][:filename]
- begin
- @dataset = Batch.find_by(:name => params[:fileselect][:filename].sub(/\.csv$/,""))
- if @dataset
- $logger.debug "Take file from database."
+ @filename = params[:fileselect][:filename].gsub(/\.csv$|\.smi$/,"")
+ @dataset = Batch.find_by(:name => @filename)
+ if @dataset
+ $logger.debug "Take file from database."
+ @compounds = @dataset.compounds
+ @identifiers = @dataset.identifiers
+ @ids = @dataset.ids
+ else
+ File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
+ f.write(params[:fileselect][:tempfile].read)
+ end
+ input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename])
+ $logger.debug "Processing '#{params[:fileselect][:filename]}'"
+ if input.class == OpenTox::Batch
+ @dataset = input
@compounds = @dataset.compounds
@identifiers = @dataset.identifiers
@ids = @dataset.ids
else
- File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
- f.write(params[:fileselect][:tempfile].read)
- end
- input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename])
- $logger.debug "Processing '#{params[:fileselect][:filename]}'"
- if input.class == OpenTox::Batch
- @dataset = input
- @compounds = @dataset.compounds
- @identifiers = @dataset.identifiers
- @ids = @dataset.ids
- else
- File.delete File.join("tmp", params[:fileselect][:filename])
- bad_request_error "Could not serialize file '#{@filename}'."
- end
+ File.delete File.join("tmp", params[:fileselect][:filename])
+ bad_request_error "Could not serialize file '#{@filename}'."
end
- rescue
- File.delete File.join("tmp", params[:fileselect][:filename])
- bad_request_error "Could not serialize file '#{@filename}'."
end
-
if @compounds.size == 0
message = @dataset.warnings
@dataset.delete
@@ -455,7 +448,7 @@ post '/predict/?' do
compounds = @compounds.collect{|cid| c = Compound.find cid; c.smiles}
prediction = LoaelMazzatorta.predict(compounds)
output = {}
- output["model_name"] = "Lowest observed adverse effect level (LOAEL) (Rats) (Mazzatorta)"
+ output["model_name"] = "Lowest observed adverse effect level (LOAEL) (Rat) (Mazzatorta)"
output["mazzatorta"] = []
#output["mazzatorta"] = prediction
# header
diff --git a/batch.rb b/batch.rb
index 2e72396..02fca2a 100644
--- a/batch.rb
+++ b/batch.rb
@@ -1,6 +1,10 @@
require 'csv'
require 'tempfile'
+def has_tab?(line)
+ !!(line =~ /\t/)
+end
+
module OpenTox
class Batch
@@ -24,7 +28,13 @@ module OpenTox
$logger.debug "Skipping import of #{file}, it is already in the database (id: #{batch.id})."
else
$logger.debug "Parsing #{file}."
- table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8'
+ # check delimiter
+ line = File.readlines(file).first
+ if has_tab?(line)
+ table = CSV.read file, :col_sep => "\t", :skip_blanks => true, :encoding => 'windows-1251:utf-8'
+ else
+ table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8'
+ end
batch = self.new(:source => source, :name => name, :identifiers => [], :ids => [], :compounds => [])
# original IDs
@@ -37,7 +47,11 @@ module OpenTox
feature_names = table.shift.collect{|f| f.strip}
warnings << "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size
compound_format = feature_names.shift.strip
- bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i
+ unless compound_format =~ /SMILES|InChI/i
+ File.delete file
+ bad_request_error "'#{compound_format}' is not a supported compound format in the header. " \
+ "Accepted formats: SMILES, InChI. Please take a look on the help page."
+ end
numeric = []
features = []
# guess feature types
diff --git a/views/help.haml b/views/help.haml
index 267f0ce..0e54eff 100644
--- a/views/help.haml
+++ b/views/help.haml
@@ -2,7 +2,7 @@
%h3 How to use batch prediction
%p
- You have two options to format your comma sperated spreadsheet (CSV) for batch predictions:
+ You have two options to format your comma or tab sperated spreadsheet for batch predictions:
%br
%p
@@ -66,7 +66,7 @@
="..."
%br
%p
- Uploaded CSV files will be stored until you delete them. You can repeat the prediction by selecting a file in the list of uploads.
+ Uploaded files will be stored until you delete them. You can repeat the prediction by selecting a file in the list of uploads.
%br
If you upload a file with the same name as one of the existing files, the existing file will be used.
If you want to replace an existing file delete it first!
diff --git a/views/predict.haml b/views/predict.haml
index 0afcc99..a776a93 100644
--- a/views/predict.haml
+++ b/views/predict.haml
@@ -142,14 +142,14 @@
%input{:type => 'text', :name => 'identifier', :id => 'identifier', :size => '60'}
%p
%label{:for=>"fileselect"}
- or upload a CSV file for batch predictions
+ or upload a CSV or SMI file for batch predictions
%br
%span.btn.btn-default.btn-file
- %input{:type=>"file", :name=> "fileselect", :id=>"fileselect", :autocomplete=>"off", :accept=>"text/csv"}
+ %input{:type=>"file", :name=> "fileselect", :id=>"fileselect", :autocomplete=>"off", :accept=>"text/csv,chemical/x-daylight-smiles"}
%a.btn.btn-warning{:href => to("/help"), :rel => "external", :style=>"margin-left: 1em;"} Help
%div.col-md-6
- if !@existing_datasets.blank?
- %label{:for=>"storage"} or select an uploaded CSV file
+ %label{:for=>"storage"} or select an uploaded file
#storage.storage-list
- @existing_datasets.each do |id,values|
%div.p2
@@ -187,7 +187,7 @@
%div{:id=>endpoint.gsub(/\s+/, "_")}
%h4.head-back=endpoint
- if endpoint !~ /^Oral|Mazzatorta/
- - @models.select{|m| endpoint = endpoint.gsub("(lazar)","").strip if endpoint =~ /lazar/; m.endpoint == endpoint}.each do |model|
+ - @models.select{|m| m.endpoint == endpoint}.each do |model|
%div.row{:id => model.id,:style=>"margin-bottom:1em;"}
%span.col-lg-4.col-md-4.col-sm-4.col-xs-4
%input.check{:type => "checkbox", :name => "selection[#{model.id}]", :id => "selection[#{model.species.gsub(/\s+/, "_")}]", :value => true, :disabled => false}
@@ -234,7 +234,7 @@
%span.col-lg-4.col-md-4.col-sm-4.col-xs-4
%input.check{:type => "checkbox", :name => "selection[Mazzatorta]", :id => "selection[Mazzatorta]", :value => true, :disabled => false}
%label{:for => "selection[Mazzatorta]"}
- Rats
+ Rat
%fieldset#bottom.well
%div.row
%div.col-md-2