From 315db036a63defb5465178279cbc3cbffde375eb Mon Sep 17 00:00:00 2001 From: gebele Date: Tue, 10 Oct 2017 15:49:36 +0000 Subject: new batch mode with single calls --- application.rb | 321 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 194 insertions(+), 127 deletions(-) (limited to 'application.rb') diff --git a/application.rb b/application.rb index 70eecb5..a4f632a 100644 --- a/application.rb +++ b/application.rb @@ -71,10 +71,173 @@ get '/predict/dataset/:name' do csv end -get '/predict/:tmppath/:filename/?' do +get '/predict/:tmppath/:model/:filename?' do response['Content-Type'] = "text/csv" - path = "/tmp/#{params[:tmppath]}" - send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment" + path = File.join("tmp", params[:tmppath]) + `sort -gk1 #{path} -o #{path}` + + send_file path, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{params[:model]}_#{params[:filename]}", :type => "text/csv", :disposition => "attachment" +end + +get '/batch/:model/' do + + if params[:model] == "Cramer" + dataset = Dataset.find params[:dataset] + compounds = dataset.compounds.collect{|c| c.smiles} + + prediction = [Toxtree.predict(compounds, "Cramer rules"), Toxtree.predict(compounds, "Cramer rules with extensions")] + output = {} + output["model_name"] = "Oral toxicity (Cramer rules)" + output["model_type"] = false + output["model_unit"] = false + ["measurements", "converted_measurements", "prediction_value", "converted_value", "interval", "converted_interval", "probability", "db_hit", "warnings", "info", "toxtree", "sa_prediction", "sa_matches", "confidence"].each do |key| + output["#{key}"] = false + end + output["toxtree"] = true + output["cramer_rules"] = prediction.collect{|array| array.collect{|hash| hash["Cramer rules"]}}.flatten.compact + output["cramer_rules_extensions"] = prediction.collect{|array| array.collect{|hash| hash["Cramer rules, with extensions"]}}.flatten.compact + + # td paths to insert results in GUI + compound_ids = dataset.compounds.collect{|c| c.id} + output["tds"] = compound_ids.each_with_index.map{|cid,idx| "prediction_#{cid}_Cramer_#{idx}"} + + # write to file + # header + csv = "ID,Endpoint,Unique SMILES,Cramer rules,Cramer rules with extensions\n" + + compounds.each_with_index do |smiles, idx| + csv << "#{idx+1},#{output["model_name"]},#{smiles},"\ + "#{output["cramer_rules"][idx] != "nil" ? output["cramer_rules"][idx] : "none" },"\ + "#{output["cramer_rules_extensions"][idx] != "nil" ? output["cramer_rules_extensions"][idx] : "none"}\n" + end + File.open(File.join("tmp", params[:tmppath]),"a+"){|file| file.write(csv)} + + # cleanup + dataset.delete + + # return output + response['Content-Type'] = "application/json" + return JSON.pretty_generate output + + else + idx = params[:idx].to_i + compound = Compound.find params[:compound] + + model = Model::Validation.find params[:model] + prediction = model.predict(compound) + output = {} + output["model_name"] = "#{model.endpoint.gsub('_', ' ')} (#{model.species})" + output["model_type"] = model.model.class.to_s.match("Classification") ? type = "Classification" : type = "Regression" + output["model_unit"] = (type == "Regression") ? "(#{model.unit})" : "" + output["converted_model_unit"] = (type == "Regression") ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" + ["measurements", "converted_measurements", "prediction_value", "converted_value", "interval", "converted_interval", "probability", "db_hit", "warnings", "info", "toxtree", "sa_prediction", "sa_matches", "confidence"].each do |key| + output["#{key}"] = false + end + + if prediction[:value] + inApp = prediction[:neighbors] ? "yes" : "no" + inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" + if prediction[:info] =~ /\b(identical)\b/i + prediction[:info] = "This compound was part of the training dataset. All information "\ + "from this compound was removed from the training data before the "\ + "prediction, to obtain unbiased results." + end + note = "\"#{prediction[:warnings].uniq.join(" ")}" + ( prediction[:info] ? "#{prediction[:info]}\"" : "\"" ) + + output["prediction_value"] = (type == "Regression") ? "#{prediction[:value].delog10.signif(3)}" : "#{prediction[:value]}" + output["converted_value"] = "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" if type == "Regression" + + output["db_hit"] = prediction[:info] if prediction[:info] + + if prediction[:measurements].is_a?(Array) + output["measurements"] = (type == "Regression") ? prediction[:measurements].collect{|value| "#{value.delog10.signif(3)} (#{model.unit})"} : prediction[:measurements].collect{|value| "#{value}"} + output["converted_measurements"] = (type == "Regression") ? prediction[:measurements].collect{|value| "#{compound.mmol_to_mg(value.delog10).signif(3)} #{model.unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"} : false + else + output["measurements"] = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} (#{model.unit})}" : "#{prediction[:measurements]}" + output["converted_measurements"] = (type == "Regression") ? "#{compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(model.unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : false + + end #db_hit + + if type == "Regression" + + if !prediction[:prediction_interval].nil? + interval = prediction[:prediction_interval] + output['interval'] = "#{interval[1].delog10.signif(3)} - #{interval[0].delog10.signif(3)}" + output['converted_interval'] = "#{compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{compound.mmol_to_mg(interval[0].delog10).signif(3)}" + end #prediction interval + + csv = "#{idx+1},#{output['model_name']},#{output['model_type']},#{compound.smiles},"\ + "#{output['prediction_value'] != false ? output['prediction_value'] : "-"},"\ + "#{output['converted_value'] != false ? output['converted_value'] : "-"},"\ + "#{output['interval'].split(" - ").first.strip unless output['interval'] == false},"\ + "#{output['interval'].split(" - ").last.strip unless output['interval'] == false},"\ + "#{output['converted_interval'].split(" - ").first.strip unless output['converted_interval'] == false},"\ + "#{output['converted_interval'].split(" - ").last.strip unless output['converted_interval'] == false},"\ + "#{inApp},#{inT},#{note.nil? ? "" : note.chomp}\n" + else # Classification + + # consensus mutagenicity + + sa_prediction = KaziusAlerts.predict(compound.smiles) + lazar_mutagenicity = prediction + confidence = 0 + lazar_mutagenicity_val = (lazar_mutagenicity[:value] == "non-mutagenic" ? false : true) + if sa_prediction[:prediction] == false && lazar_mutagenicity_val == false + confidence = 0.85 + elsif sa_prediction[:prediction] == true && lazar_mutagenicity_val == true + confidence = 0.85 * ( 1 - sa_prediction[:error_product] ) + elsif sa_prediction[:prediction] == false && lazar_mutagenicity_val == true + confidence = 0.11 + elsif sa_prediction[:prediction] == true && lazar_mutagenicity_val == false + confidence = ( 1 - sa_prediction[:error_product] ) - 0.57 + end + output["sa_prediction"] = sa_prediction + output["sa_matches"] = sa_prediction[:matches].flatten.first unless sa_prediction[:matches].blank? + output["confidence"] = confidence.signif(3) + output["model_name"] = "Lazar #{model.endpoint.gsub('_', ' ').downcase} (#{model.species}):" + output["probability"] = prediction[:probabilities] ? prediction[:probabilities].collect{|k,v| "#{k}: #{v.signif(3)}"} : false + + csv = "#{idx+1},Consensus mutagenicity,#{compound.smiles},"\ + "#{output['sa_prediction']['prediction'] == false ? "non-mutagenic" : "mutagenic"},"\ + "#{output['confidence']},#{output['sa_matches'] != false ? "\"#{output['sa_matches']}\"" : "none"}, ,"\ + "#{output['model_type']},#{output['prediction_value']},"\ + "#{output['probability'][0] != false ? output['probability'][0].split(":").last : ""},"\ + "#{output['probability'][1] != false ? output['probability'][1].split(":").last : ""},"\ + "#{inApp},#{inT},#{note.nil? ? "" : note}\n" + + end + + output["warnings"] = prediction[:warnings] if prediction[:warnings] + + else #no prediction value + inApp = "no" + inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" + if prediction[:info] =~ /\b(identical)\b/i + prediction[:info] = "This compound was part of the training dataset. All information "\ + "from this compound was removed from the training data before the "\ + "prediction, to obtain unbiased results." + end + note = "\"#{prediction[:warnings].join(" ")}\"" + ( prediction[:info] ? "\"#{prediction[:info]}\"" : "" ) + + output["warnings"] = prediction[:warnings] + output["info"] = prediction[:info] if prediction[:info] + + if type == "Regression" + csv = "#{idx+1},#{output['model_name']},#{output['model_type']},#{compound.smiles},,,,,,,"+ [inApp,inT,note].join(",")+"\n" + else + csv = "#{idx+1},Consensus mutagenicity,#{compound.smiles},,,,,#{output['model_type']},,,,"+ [inApp,inT,note].join(",")+"\n" + end + + end #prediction value + + # write to file + File.open(File.join("tmp", params[:tmppath]),"a"){|file| file.write(csv)} + + # return output + response['Content-Type'] = "application/json" + return JSON.pretty_generate output + + end# if Cramer end post '/predict/?' do @@ -90,152 +253,56 @@ post '/predict/?' do @filename = params[:fileselect][:filename] begin input = Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true + $logger.debug "save dataset #{params[:fileselect][:filename]}" if input.class == OpenTox::Dataset - dataset = Dataset.find input + @dataset = Dataset.find input + @compounds = @dataset.compounds else bad_request_error "Could not serialize file '#{@filename}'." end rescue bad_request_error "Could not serialize file '#{@filename}'." end - @compounds = dataset.compounds + if @compounds.size == 0 message = dataset[:warnings] - dataset.delete + @dataset.delete bad_request_error message end - - # for csv export - @batch = {} - # for haml table - @view = {} - - @compounds.each{|c| @view[c] = []} - params[:selection].keys.each do |model_id| - model = Model::Validation.find model_id - @batch[model] = [] - @compounds.each_with_index do |compound,idx| - prediction = model.predict(compound) - @batch[model] << [compound, prediction] - @view[compound] << [model,prediction] - end - end - - @csvhash = {} - @warnings = dataset[:warnings] - dupEntries = {} - delEntries = "" - # split duplicates and deleted entries - @warnings.each do |w| - substring = w.match(/line .* of/) - unless substring.nil? - delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n" + @models = params[:selection].keys + @tmppaths = {} + @models.each do |model| + m = Model::Validation.find model + type = (m.regression? ? "Regression" : "Classification") unless model == "Cramer" + # add header for regression + if type == "Regression" + unit = (type == "Regression") ? "(#{m.unit})" : "" + converted_unit = (type == "Regression") ? "#{m.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" + header = "ID,Endpoint,Type,Unique SMILES,Prediction #{unit},Prediction #{converted_unit},"\ + "Interval Low #{unit},Interval High #{unit},Interval Low #{converted_unit},Interval High #{converted_unit},"\ + "inApplicabilityDomain,inTrainningSet,Note\n" end - substring = w.match(/rows .* Entries/) - unless substring.nil? - lines = [] - substring[0].split(",").each{|s| lines << s[/\d+/]} - lines.shift - lines.each{|l| dupEntries[l.to_i] = w.split(".").first} + # add header for classification + if type == "Classification" + av = m.prediction_feature.accept_values + header = "ID,Endpoint,Unique SMILES,Structural alerts prediction,Structural alerts confidence,"\ + "Structural alerts for mutagenicity,Lazar mutagenicity (Salmonella typhimurium),Type,Prediction,"\ + "predProbability #{av[0]},predProbability #{av[1]},inApplicabilityDomain,inTrainningSet,Note\n" end + path = File.join("tmp", "#{Time.now.strftime("%Y-%m-%d")}_#{SecureRandom.urlsafe_base64(5)}") + File.open(path, "w"){|f| f.write(header) if header} + @tmppaths[model] = path.split("/").last end - @batch.each_with_index do |hash, idx| - @csvhash[idx] = "" - model = hash[0] - # create header - if model.regression? - predAunit = "(#{model.unit})" - predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})" - @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n" - else #classification - av = model.prediction_feature.accept_values - probFirst = av[0].capitalize - probLast = av[1].capitalize - @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n" - end - values = hash[1] - dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact! - - values.each_with_index do |array, id| - type = (model.regression? ? "Regression" : "Classification") - endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})" - - if id == 0 - @csvhash[idx] += delEntries unless delEntries.blank? - end - unless array.kind_of? String - compound = array[0] - prediction = array[1] - smiles = compound.smiles - - if prediction[:neighbors] - if prediction[:value] - pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value] - predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value] - predAunit = prediction[:value].numeric? ? "(#{model.unit})" : "" - predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value] - predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" - int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval]) - intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}") - intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}") - intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}") - intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}") - inApp = "yes" - inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" - note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) - - unless prediction[:probabilities].nil? - av = model.prediction_feature.accept_values - propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}" - propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}" - end - else - # no prediction value only one neighbor - inApp = "no" - inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" - note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) - end - else - # no prediction value - inApp = "no" - inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" - note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) - end - if @warnings - @warnings.each do |w| - note += (w.split(".").first + ".") if /\b(#{Regexp.escape(smiles)})\b/ === w - end - end - else - # string note for duplicates - endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = "" - note = array - end - if model.regression? - @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n" - else - @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n" - end - end - end - t = Tempfile.new - @csvhash.each do |model, csv| - t.write(csv) - t.write("\n") - end - t.rewind - @tmppath = t.path.split("/").last - - dataset.delete File.delete File.join("tmp", params[:fileselect][:filename]) return haml :batch end + # single compound prediction # validate identifier input if !params[:identifier].blank? - @identifier = params[:identifier] + @identifier = params[:identifier].strip $logger.debug "input:#{@identifier}" # get compound from SMILES @compound = Compound.from_smiles @identifier -- cgit v1.2.3