summaryrefslogtreecommitdiff
path: root/application.rb
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2017-09-25 11:29:26 +0000
committergebele <gebele@in-silico.ch>2017-09-25 11:29:26 +0000
commite22513f460eeb42af5164537a7ecea9d21035cea (patch)
tree1a36c019ca4fce7e92c994ae9587f267b072974c /application.rb
parent437b78faf4bcd820ea3193da18abf6b71eda8153 (diff)
before new batch
Diffstat (limited to 'application.rb')
-rw-r--r--application.rb268
1 files changed, 194 insertions, 74 deletions
diff --git a/application.rb b/application.rb
index eefd833..70eecb5 100644
--- a/application.rb
+++ b/application.rb
@@ -1,38 +1,61 @@
-#require_relative 'helper.rb'
-require 'rdiscount'
include OpenTox
-configure :development do
+
+configure :production do
$logger = Logger.new(STDOUT)
+ enable :reloader
end
-helpers do
- class Numeric
- def percent_of(n)
- self.to_f / n.to_f * 100.0
- end
- end
-
+configure :development do
+ $logger = Logger.new(STDOUT)
+ enable :reloader
end
before do
@version = File.read("VERSION").chomp
end
+not_found do
+ redirect to('/predict')
+end
+
+error do
+ @error = request.env['sinatra.error']
+ haml :error
+end
+
+helpers do
+ def embedded_svg image, options={}
+ doc = Nokogiri::HTML::DocumentFragment.parse image
+ svg = doc.at_css 'svg'
+ title = doc.at_css 'title'
+ if options[:class].present?
+ svg['class'] = options[:class]
+ end
+ if options[:title].present?
+ title.children.remove
+ text_node = Nokogiri::XML::Text.new(options[:title], doc)
+ title.add_child(text_node)
+ end
+ doc.to_html.html_safe
+ end
+end
+
get '/?' do
redirect to('/predict')
end
get '/predict/?' do
- @models = OpenTox::Model::Prediction.all
+ @models = Model::Validation.all
@models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/}
@endpoints = @models.collect{|m| m.endpoint}.sort.uniq
+ @endpoints << "Oral toxicity (Cramer rules)"
@models.count <= 0 ? (haml :info) : (haml :predict)
end
get '/predict/modeldetails/:model' do
- model = OpenTox::Model::Prediction.find params[:model]
- crossvalidations = OpenTox::Validation::RepeatedCrossValidation.find(model.repeated_crossvalidation_id).crossvalidations
+ model = Model::Validation.find params[:model]
+ crossvalidations = Validation::RepeatedCrossValidation.find(model.repeated_crossvalidation_id).crossvalidations
return haml :model_details, :layout=> false, :locals => {:model => model, :crossvalidations => crossvalidations}
end
@@ -48,43 +71,10 @@ get '/predict/dataset/:name' do
csv
end
-get '/predict/?:csv?' do
+get '/predict/:tmppath/:filename/?' do
response['Content-Type'] = "text/csv"
- @csv = "\"Compound\",\"Endpoint\",\"Type\",\"Prediction\",\"95% Prediction interval\"\n"
- @@batch.each do |key, values|
- compound = key
- smiles = compound.smiles
- values.each do |array|
- model = array[0]
- type = model.model.class.to_s.match("Classification") ? "Classification" : "Regression"
- prediction = array[1]
- endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
- if prediction[:confidence] == "measured"
- if prediction[:value].is_a?(Array)
- prediction[:value].each do |value|
- pred = value.numeric? ? "#{value} (#{model.unit}), #{compound.mmol_to_mg(value.delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : value
- int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
- interval = (int.nil? ? "--" : "#{int[1].delog10} - #{int[0].delog10} (#{model.unit})")
- @csv += "\"#{smiles}\",\"#{endpoint}\",\"#{type}\",\"#{pred}\",\"#{interval}\"\n"
- end
- else
- pred = prediction[:value].numeric? ? "#{prediction[:value]} (#{model.unit}), #{compound.mmol_to_mg(prediction[:value].delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
- confidence = "measured activity"
- end
- elsif prediction[:neighbors].size > 0
- type = model.model.class.to_s.match("Classification") ? "Classification" : "Regression"
- pred = prediction[:value].numeric? ? "#{prediction[:value].delog10} (#{model.unit}), #{compound.mmol_to_mg(prediction[:value].delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
- int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
- interval = (int.nil? ? "--" : "#{int[1].delog10} - #{int[0].delog10} (#{model.unit})")
- else
- type = ""
- pred = "Not enough similar compounds in training dataset."
- interval = ""
- end
- @csv += "\"#{smiles}\",\"#{endpoint}\",\"#{type}\",\"#{pred}\",\"#{interval}\"\n" unless prediction[:value].is_a?(Array)
- end
- end
- @csv
+ path = "/tmp/#{params[:tmppath]}"
+ send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment"
end
post '/predict/?' do
@@ -92,66 +82,196 @@ post '/predict/?' do
# process batch prediction
if !params[:fileselect].blank?
if params[:fileselect][:filename] !~ /\.csv$/
- @error_report = "Please submit a csv file."
- return haml :error
+ bad_request_error "Please submit a csv file."
end
File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
f.write(params[:fileselect][:tempfile].read)
end
@filename = params[:fileselect][:filename]
begin
- input = OpenTox::Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true
+ input = Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true
if input.class == OpenTox::Dataset
- dataset = OpenTox::Dataset.find input
+ dataset = Dataset.find input
else
- @error_report = "Could not serialize file '#{@filename}' ."
- return haml :error
+ bad_request_error "Could not serialize file '#{@filename}'."
end
rescue
- @error_report = "Could not serialize file '#{@filename}' ."
- return haml :error
+ bad_request_error "Could not serialize file '#{@filename}'."
end
@compounds = dataset.compounds
if @compounds.size == 0
- @error_report = dataset[:warnings]
+ message = dataset[:warnings]
dataset.delete
- return haml :error
+ bad_request_error message
end
+
+ # for csv export
@batch = {}
- @compounds.each do |compound|
- @batch[compound] = []
- params[:selection].keys.each do |model_id|
- model = Model::Prediction.find model_id
+ # for haml table
+ @view = {}
+
+ @compounds.each{|c| @view[c] = []}
+ params[:selection].keys.each do |model_id|
+ model = Model::Validation.find model_id
+ @batch[model] = []
+ @compounds.each_with_index do |compound,idx|
prediction = model.predict(compound)
- @batch[compound] << [model, prediction]
+ @batch[model] << [compound, prediction]
+ @view[compound] << [model,prediction]
end
end
- @@batch = @batch
+
+ @csvhash = {}
@warnings = dataset[:warnings]
+ dupEntries = {}
+ delEntries = ""
+
+ # split duplicates and deleted entries
+ @warnings.each do |w|
+ substring = w.match(/line .* of/)
+ unless substring.nil?
+ delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n"
+ end
+ substring = w.match(/rows .* Entries/)
+ unless substring.nil?
+ lines = []
+ substring[0].split(",").each{|s| lines << s[/\d+/]}
+ lines.shift
+ lines.each{|l| dupEntries[l.to_i] = w.split(".").first}
+ end
+ end
+
+ @batch.each_with_index do |hash, idx|
+ @csvhash[idx] = ""
+ model = hash[0]
+ # create header
+ if model.regression?
+ predAunit = "(#{model.unit})"
+ predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})"
+ @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
+ else #classification
+ av = model.prediction_feature.accept_values
+ probFirst = av[0].capitalize
+ probLast = av[1].capitalize
+ @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
+ end
+ values = hash[1]
+ dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact!
+
+ values.each_with_index do |array, id|
+ type = (model.regression? ? "Regression" : "Classification")
+ endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
+
+ if id == 0
+ @csvhash[idx] += delEntries unless delEntries.blank?
+ end
+ unless array.kind_of? String
+ compound = array[0]
+ prediction = array[1]
+ smiles = compound.smiles
+
+ if prediction[:neighbors]
+ if prediction[:value]
+ pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
+ predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
+ predAunit = prediction[:value].numeric? ? "(#{model.unit})" : ""
+ predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value]
+ predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : ""
+ int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
+ intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}")
+ intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}")
+ intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}")
+ intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}")
+ inApp = "yes"
+ inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
+ note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
+
+ unless prediction[:probabilities].nil?
+ av = model.prediction_feature.accept_values
+ propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}"
+ propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}"
+ end
+ else
+ # no prediction value only one neighbor
+ inApp = "no"
+ inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
+ note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
+ end
+ else
+ # no prediction value
+ inApp = "no"
+ inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
+ note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
+ end
+ if @warnings
+ @warnings.each do |w|
+ note += (w.split(".").first + ".") if /\b(#{Regexp.escape(smiles)})\b/ === w
+ end
+ end
+ else
+ # string note for duplicates
+ endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = ""
+ note = array
+ end
+ if model.regression?
+ @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
+ else
+ @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
+ end
+ end
+ end
+ t = Tempfile.new
+ @csvhash.each do |model, csv|
+ t.write(csv)
+ t.write("\n")
+ end
+ t.rewind
+ @tmppath = t.path.split("/").last
+
dataset.delete
File.delete File.join("tmp", params[:fileselect][:filename])
return haml :batch
end
# validate identifier input
- # transfered input
if !params[:identifier].blank?
@identifier = params[:identifier]
$logger.debug "input:#{@identifier}"
# get compound from SMILES
@compound = Compound.from_smiles @identifier
- if @compound.blank?
- @error_report = "'#{@identifier}' is not a valid SMILES string."
- return haml :error
- end
-
+ bad_request_error "'#{@identifier}' is not a valid SMILES string." if @compound.blank?
+
@models = []
@predictions = []
+ @toxtree = false
params[:selection].keys.each do |model_id|
- model = Model::Prediction.find model_id
- @models << model
- @predictions << model.predict(@compound)
+ if model_id == "Cramer"
+ @toxtree = true
+ @predictions << [Toxtree.predict(@compound.smiles, "Cramer rules"), Toxtree.predict(@compound.smiles, "Cramer rules with extensions")]
+ else
+ model = Model::Validation.find model_id
+ @models << model
+ if model.model.name =~ /kazius/
+ sa_prediction = KaziusAlerts.predict(@compound.smiles)
+ lazar_mutagenicity = model.predict(@compound)
+ confidence = 0
+ lazar_mutagenicity_val = (lazar_mutagenicity[:value] == "non-mutagenic" ? false : true)
+ if sa_prediction[:prediction] == false && lazar_mutagenicity_val == false
+ confidence = 0.85
+ elsif sa_prediction[:prediction] == true && lazar_mutagenicity_val == true
+ confidence = 0.85 * ( 1 - sa_prediction[:error_product] )
+ elsif sa_prediction[:prediction] == false && lazar_mutagenicity_val == true
+ confidence = 0.11
+ elsif sa_prediction[:prediction] == true && lazar_mutagenicity_val == false
+ confidence = ( 1 - sa_prediction[:error_product] ) - 0.57
+ end
+ @predictions << [lazar_mutagenicity, {:prediction => sa_prediction, :confidence => confidence}]
+ else
+ @predictions << model.predict(@compound)
+ end
+ end
end
+
haml :prediction
end
end