From 9750e0309500259e9a56e267ce87984fb5bb5e53 Mon Sep 17 00:00:00 2001 From: gebele Date: Mon, 26 Nov 2018 15:29:26 +0000 Subject: clean out; better response codes; prepare for batch --- application.rb | 504 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 330 insertions(+), 174 deletions(-) (limited to 'application.rb') diff --git a/application.rb b/application.rb index f2d2e32..dd62fb6 100644 --- a/application.rb +++ b/application.rb @@ -1,16 +1,18 @@ require 'rdiscount' require_relative 'qmrf_report.rb' +require_relative 'task.rb' +require_relative 'prediction.rb' +require_relative 'batch.rb' +require_relative 'helper.rb' include OpenTox [ - "aa.rb", "api.rb", "compound.rb", "dataset.rb", "endpoint.rb", "feature.rb", "model.rb", - "nanoparticle.rb", "report.rb", "substance.rb", "swagger.rb", @@ -18,27 +20,37 @@ include OpenTox ].each{ |f| require_relative "./lib/#{f}" } -configure :production do - $logger = Logger.new(STDOUT) - enable :reloader -end - -configure :development do +configure :production, :development do + STDOUT.sync = true $logger = Logger.new(STDOUT) + $logger.level = Logger::DEBUG enable :reloader + also_reload './helper.rb' + also_reload './prediction.rb' + also_reload './batch.rb' + [ + "api.rb", + "compound.rb", + "dataset.rb", + "endpoint.rb", + "feature.rb", + "model.rb", + "report.rb", + "substance.rb", + "swagger.rb", + "validation.rb" + ].each{ |f| also_reload "./lib/#{f}" } end before do $paths = [ "/", - "aa", "api", "compound", "dataset", "endpoint", "feature", "model", - "nanoparticle", "report", "substance", "swagger", @@ -92,21 +104,18 @@ get '/predict/modeldetails/:model' do return haml :model_details, :layout=> false, :locals => {:model => model, :crossvalidations => crossvalidations} end -# get individual compound details -get '/prediction/:neighbor/details/?' do - @compound = OpenTox::Compound.find params[:neighbor] - @smiles = @compound.smiles - begin - @names = @compound.names.nil? ? "No names for this compound available." : @compound.names - rescue - @names = "No names for this compound available." - end - @inchi = @compound.inchi.gsub("InChI=", "") - - haml :details, :layout => false +get "/predict/report/:id/?" do + prediction_model = Model::Validation.find params[:id] + bad_request_error "model with id: '#{params[:id]}' not found." unless prediction_model + report = qmrf_report params[:id] + # output + t = Tempfile.new + t << report.to_xml + name = prediction_model.species.sub(/\s/,"-")+"-"+prediction_model.endpoint.downcase.sub(/\s/,"-") + send_file t.path, :filename => "QMRF_report_#{name.gsub!(/[^0-9A-Za-z]/, '_')}.xml", :type => "application/xml", :disposition => "attachment" end -get '/jme_help/?' do +get '/predict/jme_help/?' do File.read(File.join('views','jme_help.html')) end @@ -123,190 +132,336 @@ get '/predict/:tmppath/:filename/?' do send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment" end -post '/predict/?' do +get '/predict/csv/:task/:model/:filename/?' do + response['Content-Type'] = "text/csv" + filename = params[:filename] =~ /\.csv$/ ? params[:filename].gsub(/\.csv$/,"") : params[:filename] + task = Task.find params[:task].to_s + m = Model::Validation.find params[:model].to_s + dataset = Batch.find_by(:name => filename) + @ids = dataset.ids + warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n") + unless warnings.nil? + @parse = [] + warnings.split("\n").each do |warning| + if warning =~ /^Cannot/ + smi = warning.split("SMILES compound").last.split("at").first + line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i + @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n" + end + end + keys_array = [] + warnings.split("\n").each do |warning| + if warning =~ /^Duplicate/ + text = warning.split("ID").first + numbers = warning.split("ID").last.split("and") + keys_array << numbers.collect{|n| n.strip.to_i} + end + end + @dups = {} + keys_array.each do |keys| + keys.each do |key| + @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n" + end + end + end + endpoint = "#{m.endpoint}_(#{m.species})" + tempfile = Tempfile.new + header = task.csv + lines = [] + task.predictions[params[:model]].each_with_index do |hash,idx| + identifier = hash.keys[0] + prediction_id = hash.values[0] + # add duplicate warning at the end of a line if ID matches + if @dups && @dups[idx+1] + if prediction_id.is_a? BSON::ObjectId + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + end + end + else + if prediction_id.is_a? BSON::ObjectId + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}" + end + else + if @ids.blank? + lines << "#{idx+1},#{identifier},#{p}\n" + else + lines << "#{idx+1},#{@ids[idx]}#{identifier},#{p}\n" + end + end + end + end + (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join("")) + #tempfile.write(header+lines.join("")) + tempfile.rewind + send_file tempfile, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{endpoint}_#{filename}.csv", :type => "text/csv", :disposition => "attachment" +end +post '/predict/?' do # process batch prediction if !params[:fileselect].blank? + next if params[:fileselect][:filename] !~ /\.csv$/ - bad_request_error "Please submit a csv file." - end - File.open('tmp/' + params[:fileselect][:filename], "w") do |f| - f.write(params[:fileselect][:tempfile].read) + bad_request_error "Wrong file extension for '#{params[:fileselect][:filename]}'. Please upload a CSV file." end @filename = params[:fileselect][:filename] begin - input = OpenTox::Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true - if input.class == OpenTox::Dataset - dataset = OpenTox::Dataset.find input + File.open('tmp/' + params[:fileselect][:filename], "w") do |f| + f.write(params[:fileselect][:tempfile].read) + end + input = Batch.from_csv_file File.join("tmp", params[:fileselect][:filename]) + $logger.debug "Processing '#{params[:fileselect][:filename]}'" + if input.class == OpenTox::Batch + @dataset = input + @compounds = @dataset.compounds + @identifiers = @dataset.identifiers + @ids = @dataset.ids else + File.delete File.join("tmp", params[:fileselect][:filename]) bad_request_error "Could not serialize file '#{@filename}'." end rescue + File.delete File.join("tmp", params[:fileselect][:filename]) bad_request_error "Could not serialize file '#{@filename}'." end - @compounds = dataset.compounds + if @compounds.size == 0 - message = dataset[:warnings] - dataset.delete + message = @dataset.warnings + @dataset.delete bad_request_error message end - - # for csv export - @batch = {} - # for haml table - @view = {} - - @compounds.each{|c| @view[c] = []} - params[:selection].keys.each do |model_id| - model = OpenTox::Model::Validation.find model_id - @batch[model] = [] - @compounds.each_with_index do |compound,idx| - prediction = model.predict(compound) - @batch[model] << [compound, prediction] - @view[compound] << [model,prediction] - end - end - - @csvhash = {} - @warnings = dataset[:warnings] - dupEntries = {} - delEntries = "" - - # split duplicates and deleted entries - @warnings.each do |w| - substring = w.match(/line .* of/) - unless substring.nil? - delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n" - end - substring = w.match(/rows .* Entries/) - unless substring.nil? - lines = [] - substring[0].split(",").each{|s| lines << s[/\d+/]} - lines.shift - lines.each{|l| dupEntries[l.to_i] = w.split(".").first} - end - end - - @batch.each_with_index do |hash, idx| - @csvhash[idx] = "" - model = hash[0] - # create header - if model.regression? - predAunit = "(#{model.unit})" - predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})" - @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n" - else #classification - av = model.prediction_feature.accept_values - probFirst = av[0].capitalize - probLast = av[1].capitalize - @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n" - end - values = hash[1] - dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact! - - values.each_with_index do |array, id| - type = (model.regression? ? "Regression" : "Classification") - endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})" - if id == 0 - @csvhash[idx] += delEntries unless delEntries.blank? + @models = params[:selection].keys + # for single predictions in batch + @tasks = [] + @models.each{|m| t = Task.new; t.save; @tasks << t} + @predictions = {} + task = Task.run do + @models.each_with_index do |model,idx| + t = @tasks[idx] + m = Model::Validation.find model + type = (m.regression? ? "Regression" : "Classification") + # add header for regression + if type == "Regression" + unit = (type == "Regression") ? "(#{m.unit})" : "" + converted_unit = (type == "Regression") ? "#{m.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" + if @ids.blank? + header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\ + "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\ + "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\ + "inApplicabilityDomain,Note\n" + else + header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\ + "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\ + "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\ + "inApplicabilityDomain,Note\n" + end end - unless array.kind_of? String - compound = array[0] - prediction = array[1] - smiles = compound.smiles - - if prediction[:neighbors] - if prediction[:value] - pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value] - predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value] - predAunit = prediction[:value].numeric? ? "(#{model.unit})" : "" - predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value] - predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" - int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval]) - intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}") - intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}") - intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}") - intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}") - inApp = "yes" - inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" - note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) - - unless prediction[:probabilities].nil? - av = model.prediction_feature.accept_values - propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}" - propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}" - end - else - # no prediction value only one neighbor - inApp = "no" - inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" - note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) - end + # add header for classification + if type == "Classification" + av = m.prediction_feature.accept_values + if @ids.blank? + header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\ + "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n" else - # no prediction value - inApp = "no" - inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" - note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) + header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Prediction,"\ + "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,Note\n" end - if @warnings - @warnings.each do |w| - note += (w.split(".").first + ".") if /\b(#{Regexp.escape(smiles)})\b/ === w + end + # predict compounds + p = 100.0/@compounds.size + counter = 1 + predictions = [] + @compounds.each_with_index do |cid,idx| + compound = Compound.find cid + if Prediction.where(compound: compound.id, model: m.id).exists? + prediction_object = Prediction.find_by(compound: compound.id, model: m.id) + prediction = prediction_object.prediction + prediction_id = prediction_object.id + # in case prediction object was created by single prediction + if prediction_object.csv.blank? + prediction_object[:csv] = prediction_to_csv(m,compound,prediction) + prediction_object.save + end + # identifier + identifier = @identifiers[idx] + else + prediction = m.predict(compound) + # save prediction object + prediction_object = Prediction.new + prediction_id = prediction_object.id + prediction_object[:compound] = compound.id + prediction_object[:model] = m.id + # add additionally fields for html representation + unless prediction[:value].blank? || type == "Classification" + prediction[:prediction_value] = "#{prediction[:value].delog10.signif(3)} #{unit}" + prediction["converted_prediction_value"] = "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)} #{converted_unit}" + end + unless prediction[:prediction_interval].blank? + interval = prediction[:prediction_interval] + prediction[:interval] = "#{interval[1].delog10.signif(3)} - #{interval[0].delog10.signif(3)} #{unit}" + prediction[:converted_interval] = "#{compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{compound.mmol_to_mg(interval[0].delog10).signif(3)} #{converted_unit}" end + prediction["unit"] = unit + prediction["converted_unit"] = converted_unit + if prediction[:measurements].is_a?(Array) + prediction["measurements_string"] = (type == "Regression") ? prediction[:measurements].collect{|value| "#{value.delog10.signif(3)} #{unit}"} : prediction[:measurements].join("
") + prediction["converted_measurements"] = prediction[:measurements].collect{|value| "#{compound.mmol_to_mg(value.delog10).signif(3)} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"} if type == "Regression" + else + output["measurements_string"] = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} #{unit}}" : prediction[:measurements] + output["converted_measurements"] = "#{compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if type == "Regression" + end + + # store in prediction_object + prediction_object[:prediction] = prediction + prediction_object[:csv] = prediction_to_csv(m,compound,prediction) + prediction_object.save + + # identifier + identifier = @identifiers[idx] end - else - # string note for duplicates - endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = "" - note = array + # collect prediction_object ids with identifier + predictions << {identifier => prediction_id} + t.update_percent((counter*p).ceil > 100 ? 100 : (counter*p).ceil) + counter += 1 end - if model.regression? - @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n" + # write csv + t[:csv] = header + # write predictions + @predictions["#{model}"] = predictions + # save task + # append predictions as last action otherwise they won't save + # mongoid works with shallow copy via #dup + t[:predictions] = @predictions + t.save + end#models + + end#main task + @pid = task.pid + + #@dataset.delete + #File.delete File.join("tmp", params[:fileselect][:filename]) + return haml :batch + else + # single compound prediction + # validate identifier input + if !params[:identifier].blank? + @identifier = params[:identifier].strip + $logger.debug "input:#{@identifier}" + # get compound from SMILES + @compound = Compound.from_smiles @identifier + bad_request_error "'#{@identifier}' is not a valid SMILES string." if @compound.blank? + + @models = [] + @predictions = [] + @toxtree = false + params[:selection].keys.each do |model_id| + model = Model::Validation.find model_id + @models << model + if Prediction.where(compound: @compound.id, model: model.id).exists? + prediction_object = Prediction.find_by(compound: @compound.id, model: model.id) + prediction = prediction_object.prediction + @predictions << prediction else - @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n" + prediction_object = Prediction.new + prediction = model.predict(@compound) + prediction_object[:compound] = @compound.id + prediction_object[:model] = model.id + prediction_object[:prediction] = prediction + prediction_object.save + @predictions << prediction end end - end - t = Tempfile.new - @csvhash.each do |model, csv| - t.write(csv) - t.write("\n") - end - t.rewind - @tmppath = t.path.split("/").last - dataset.delete - File.delete File.join("tmp", params[:fileselect][:filename]) - return haml :batch + haml :prediction + end end +end - # validate identifier input - if !params[:identifier].blank? - @identifier = params[:identifier] - $logger.debug "input:#{@identifier}" - # get compound from SMILES - @compound = Compound.from_smiles @identifier - bad_request_error "'#{@identifier}' is not a valid SMILES string." if @compound.blank? +get '/prediction/task/?' do + if params[:turi] + task = Task.find(params[:turi].to_s) + return JSON.pretty_generate(:percent => task.percent) + elsif params[:predictions] + task = Task.find(params[:predictions]) + pageSize = params[:pageSize].to_i - 1 + pageNumber= params[:pageNumber].to_i - 1 + predictions = task.predictions[params[:model]].collect{|hash| hash.values[0]} + prediction_object = Prediction.find predictions[pageNumber] + prediction = prediction_object.prediction + compound = Compound.find prediction_object.compound + model = Model::Validation.find prediction_object.model + image = compound.svg + smiles = compound.smiles + type = (model.regression? ? "Regression" : "Classification") + html = "" + html += "" + string = "" + html += "#{string}
#{image}
#{smiles}
" + sorter = [] + if prediction[:info] + prediction[:info] = "This compound was part of the training dataset. All information from this compound was "\ + "removed from the training data before the prediction to obtain unbiased results." + sorter << {"Info" => prediction[:info]} + if prediction["measurements_string"].kind_of?(Array) + sorter << {"Measured activity" => "#{prediction["measurements_string"].join(";")}
#{prediction["converted_measurements"].join(";")}"} + else + sorter << {"Measured activity" => "#{prediction["measurements_string"]}
#{prediction["converted_measurements"]}"} + end + end - @models = [] - @predictions = [] - params[:selection].keys.each do |model_id| - model = OpenTox::Model::Validation.find model_id - @models << model - @predictions << model.predict(@compound) + # regression + if prediction[:value] && type == "Regression" + sorter << {"Prediction" => "#{prediction["prediction_value"]}
#{prediction["converted_prediction_value"]}"} + sorter << {"95% Prediction interval" => "#{prediction[:interval]}
#{prediction["converted_interval"]}"} + sorter << {"Warnings" => prediction[:warnings].join("
")} + elsif !prediction[:value] && type == "Regression" + sorter << {"Prediction" => ""} + sorter << {"95% Prediction interval" => ""} + sorter << {"Warnings" => prediction[:warnings].join("
")} + # classification + elsif prediction[:value] && type == "Classification" + sorter << {"Prediction" => prediction[:value]} + sorter << {"Probability" => prediction[:probabilities].collect{|k,v| "#{k}: #{v.signif(3)}"}.join("
")} + elsif !prediction[:value] && type == "Classification" + sorter << {"Prediction" => ""} + sorter << {"Probability" => ""} + #else + sorter << {"Warnings" => prediction[:warnings].join("
")} + end + sorter.each_with_index do |hash,idx| + k = hash.keys[0] + v = hash.values[0] + string += (idx == 0 ? "" : "")+(k =~ /lazar/i ? "" end - haml :prediction + string += "
" : "") + # keyword + string += "#{k}:" + string += "" + # values + string += "#{v}" + string += "
" + return JSON.pretty_generate(:prediction => [html]) end end -get "/report/:id/?" do - prediction_model = Model::Validation.find params[:id] - bad_request_error "model with id: '#{params[:id]}' not found." unless prediction_model - report = qmrf_report params[:id] - # output - t = Tempfile.new - t << report.to_xml - name = prediction_model.species.sub(/\s/,"-")+"-"+prediction_model.endpoint.downcase.sub(/\s/,"-") - send_file t.path, :filename => "QMRF_report_#{name.gsub!(/[^0-9A-Za-z]/, '_')}.xml", :type => "application/xml", :disposition => "attachment" +# get individual compound details +get '/prediction/:neighbor/details/?' do + @compound = OpenTox::Compound.find params[:neighbor] + @smiles = @compound.smiles + begin + @names = @compound.names.nil? ? "No names for this compound available." : @compound.names + rescue + @names = "No names for this compound available." + end + @inchi = @compound.inchi.gsub("InChI=", "") + + haml :details, :layout => false end get '/license' do @@ -314,7 +469,7 @@ get '/license' do haml :license, :layout => false end -get '/faq' do +get '/predict/faq' do @faq = RDiscount.new(File.read("FAQ.md")).to_html haml :faq#, :layout => false end @@ -324,6 +479,7 @@ get '/style.css' do scss :style end +# for swagger representation get '/swagger-ui.css' do headers 'Content-Type' => 'text/css; charset=utf-8' scss :style -- cgit v1.2.3