#require_relative 'helper.rb' require 'rdiscount' include OpenTox configure :production do $logger = Logger.new(STDOUT) enable :reloader end configure :development do $logger = Logger.new(STDOUT) enable :reloader end helpers do class Numeric def percent_of(n) self.to_f / n.to_f * 100.0 end end end before do @version = File.read("VERSION").chomp end not_found do redirect to('/predict') end get '/?' do redirect to('/predict') end get '/predict/?' do @models = OpenTox::Model::Validation.all @models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/} @endpoints = @models.collect{|m| m.endpoint}.sort.uniq @models.count <= 0 ? (haml :info) : (haml :predict) end get '/predict/modeldetails/:model' do model = OpenTox::Model::Validation.find params[:model] crossvalidations = OpenTox::Validation::RepeatedCrossValidation.find(model.repeated_crossvalidation_id).crossvalidations return haml :model_details, :layout=> false, :locals => {:model => model, :crossvalidations => crossvalidations} end # get individual compound details get '/prediction/:neighbor/details/?' do @compound = OpenTox::Compound.find params[:neighbor] @smiles = @compound.smiles begin @names = @compound.names.nil? ? "No names for this compound available." : @compound.names rescue @names = "No names for this compound available." end @inchi = @compound.inchi.gsub("InChI=", "") haml :details, :layout => false end get '/jme_help/?' do File.read(File.join('views','jme_help.html')) end get '/predict/dataset/:name' do response['Content-Type'] = "text/csv" dataset = Dataset.find_by(:name=>params[:name]) csv = dataset.to_csv csv end get '/predict/:tmppath/:filename/?' do response['Content-Type'] = "text/csv" path = "/tmp/#{params[:tmppath]}" send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment" end post '/predict/?' do # process batch prediction if !params[:fileselect].blank? if params[:fileselect][:filename] !~ /\.csv$/ @error_report = "Please submit a csv file." return haml :error end File.open('tmp/' + params[:fileselect][:filename], "w") do |f| f.write(params[:fileselect][:tempfile].read) end @filename = params[:fileselect][:filename] begin input = OpenTox::Dataset.from_csv_file File.join("tmp", params[:fileselect][:filename]), true if input.class == OpenTox::Dataset dataset = OpenTox::Dataset.find input else @error_report = "Could not serialize file '#{@filename}' ." return haml :error end rescue @error_report = "Could not serialize file '#{@filename}' ." return haml :error end @compounds = dataset.compounds if @compounds.size == 0 @error_report = dataset[:warnings] dataset.delete return haml :error end # for csv export @batch = {} # for haml table @view = {} @compounds.each{|c| @view[c] = []} params[:selection].keys.each do |model_id| model = OpenTox::Model::Validation.find model_id @batch[model] = [] @compounds.each_with_index do |compound,idx| prediction = model.predict(compound) @batch[model] << [compound, prediction] @view[compound] << [model,prediction] end end @csvhash = {} @warnings = dataset[:warnings] dupEntries = {} delEntries = "" # split duplicates and deleted entries @warnings.each do |w| substring = w.match(/line .* of/) unless substring.nil? delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n" end substring = w.match(/rows .* Entries/) unless substring.nil? lines = [] substring[0].split(",").each{|s| lines << s[/\d+/]} lines.shift lines.each{|l| dupEntries[l.to_i] = w.split(".").first} end end @batch.each_with_index do |hash, idx| @csvhash[idx] = "" model = hash[0] # create header if model.regression? predAunit = "(#{model.unit})" predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})" @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n" else #classification av = model.prediction_feature.accept_values probFirst = av[0].capitalize probLast = av[1].capitalize @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n" end values = hash[1] dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact! values.each_with_index do |array, id| type = (model.regression? ? "Regression" : "Classification") endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})" if id == 0 @csvhash[idx] += delEntries unless delEntries.blank? end unless array.kind_of? String compound = array[0] prediction = array[1] smiles = compound.smiles if prediction[:neighbors] if prediction[:value] pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value] predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value] predAunit = prediction[:value].numeric? ? "(#{model.unit})" : "" predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value] predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval]) intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}") intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}") intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}") intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}") inApp = "yes" inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) unless prediction[:probabilities].nil? av = model.prediction_feature.accept_values propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}" propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}" end else # no prediction value only one neighbor inApp = "no" inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) end else # no prediction value inApp = "no" inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no" note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" ) end if @warnings @warnings.each do |w| note += (w.split(".").first + ".") if /\b(#{Regexp.escape(smiles)})\b/ === w end end else # string note for duplicates endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = "" note = array end if model.regression? @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n" else @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n" end end end t = Tempfile.new @csvhash.each do |model, csv| t.write(csv) t.write("\n") end t.rewind @tmppath = t.path.split("/").last dataset.delete File.delete File.join("tmp", params[:fileselect][:filename]) return haml :batch end # validate identifier input if !params[:identifier].blank? @identifier = params[:identifier] $logger.debug "input:#{@identifier}" # get compound from SMILES @compound = Compound.from_smiles @identifier if @compound.blank? @error_report = "'#{@identifier}' is not a valid SMILES string." return haml :error end @models = [] @predictions = [] params[:selection].keys.each do |model_id| model = OpenTox::Model::Validation.find model_id @models << model @predictions << model.predict(@compound) end haml :prediction end end get "/report/:id/?" do lazarpath = `gem path lazar` lazarpath = File.dirname lazarpath lazarpath = File.dirname lazarpath qmrfpath = `gem path qsar-report` qmrfpath = File.dirname qmrfpath qmrfpath = File.dirname qmrfpath prediction_model = Model::Validation.find params[:id] model = prediction_model.model validation_template = "./views/model_details.haml" if File.directory?(lazarpath) lazar_commit = `cd #{lazarpath}; git rev-parse HEAD`.strip lazar_commit = "https://github.com/opentox/lazar/tree/#{lazar_commit}" else lazar_commit = "https://github.com/opentox/lazar/releases/tag/v#{Gem.loaded_specs["lazar"].version}" end report = OpenTox::QMRFReport.new # QSAR Identifier Title 1.1 report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint.downcase}" # Software coding the model 1.3 report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships. See #{lazar_commit}", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware # Date of QMRF 2.1 report.value "qmrf_date", "#{Time.now.strftime('%d %B %Y')}" # QMRF author(s) and contact details 2.1 report.change_catalog :authors_catalog, :firstauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} report.ref_catalog :qmrf_authors, :authors_catalog, :firstauthor # Model developer(s) and contact details 2.5 report.change_catalog :authors_catalog, :modelauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} report.ref_catalog :model_authors, :authors_catalog, :modelauthor # Date of model development and/or publication 2.6 report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}" # Reference(s) to main scientific papers and/or software package 2.7 report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz A., Guetlein M., Rautenberg M., Vorgrimmler D., Gebele D. and Helma C. (2013), lazar: a modular predictive toxicology framework ", :number => "1", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"} report.ref_catalog :references, :publications_catalog, :publications_catalog_1 report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Helma C, Gebele D, Rautenberg M (2017) lazar, software available at https://lazar.in-silico.ch,source code available at #{lazar_commit}", :number => "2", :url => "https://doi.org/10.5281/zenodo.215483"} report.ref_catalog :references, :publications_catalog, :publications_catalog_2 # Availability of information about the model 2.8 report.value "info_availability", "Prediction interface and validation results available at https://lazar.in-silico.ch" # Species 3.1 report.value "model_species", prediction_model.species # Endpoint 3.2 report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.qmrf["name"], :group => "#{prediction_model.qmrf["group"]}"} report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1 # Endpoint Units 3.4 report.value "endpoint_units", "#{prediction_model.unit}" # Dependent variable 3.5 report.value "endpoint_variable", "#{prediction_model.endpoint} #{prediction_model.regression? ? "regression" : "classification"}" # Type of model 4.1 model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','') report.value "algorithm_type", "#{model_type}" # Explicit algorithm 4.2 report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1 report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "", :description => "modified k-nearest neighbor #{model_type.downcase} (#{model_type =~ /regression/i ? "local random forest" : "weighted majority vote"}), see #{lazar_commit}" } # Descriptors in the model 4.3 if model.algorithms["descriptors"][:type] report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "Molprint 2D (Bender et al. 2004)", :name => "#{model.algorithms["descriptors"][:type]} fingerprints", :publication_ref => "", :units => ""} report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1 end # Descriptor selection 4.4 report.value "descriptors_selection", (model.class == OpenTox::Model::LazarRegression ? "Correlation with dependent variable (Pearson p <= 0.05)" : "none") # Algorithm and descriptor generation 4.5 report.value "descriptors_generation", "lazar" # Software name and version for descriptor generation 4.6 report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "2", :url => "", :contact => ""} report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2 # Chemicals/Descriptors ratio 4.7 report.value "descriptors_chemicals_ratio", (model.class == OpenTox::Model::LazarRegression ? "variable (local regression models)" : "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)") # Description of the applicability domain of the model 5.1 report.value "app_domain_description", "

No predictions are made for query compounds without similar structures in the training data. Similarity is determined as the Tanimoto coefficient of Molprint 2D fingerprints with a threshold of 0.1.

Predictions based on a low number and/or very dissimilar neighbors or on neighbors with conflicting experimental measurements should be treated with caution.

" # Method used to assess the applicability domain 5.2 report.value "app_domain_method", "Number and similarity of training set compounds (part of the main lazar algorithm)" # Software name and version for applicability domain assessment 5.3 report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "3", :url => "", :contact => ""} report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3 # Limits of applicability 5.4 report.value "applicability_limits", "Compounds without similar substances in the training dataset" # Availability of the training set 6.1 report.change_attributes "training_set_availability", {:answer => "Yes"} # Available information for the training set 6.2 report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"} # Data for each descriptor variable for the training set 6.3 report.change_attributes "training_set_descriptors", {:answer => "on demand"} # Data for the dependent variable for the training set 6.4 report.change_attributes "dependent_var_availability", {:answer => "Yes"} # Other information about the training set 6.5 report.value "other_info", "Original data from: #{prediction_model.source}" # Pre-processing of data before modelling 6.6 report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none") # Robustness - Statistics obtained by leave-many-out cross-validation 6.9 if prediction_model.repeated_crossvalidation crossvalidations = prediction_model.crossvalidations block = "" crossvalidations.each do |cv| block += "

Num folds: #{cv.folds}

Num instances: #{cv.nr_instances}

Num unpredicted: #{cv.nr_unpredicted}

" if model_type =~ /classification/i block += "

Accuracy: #{cv.accuracy.signif(3)}

Weighted accuracy: #{cv.weighted_accuracy.signif(3)}

True positive rate: #{cv.true_rate[cv.accept_values[0]].signif(3)}

True negative rate: #{cv.true_rate[cv.accept_values[1]].signif(3)}

Positive predictive value: #{cv.predictivity[cv.accept_values[0]].signif(3)}

Negative predictive value: #{cv.predictivity[cv.accept_values[1]].signif(3)}

" end if model_type =~ /regression/i block += "

RMSE: #{cv.rmse.signif(3)}

MAE: #{cv.mae.signif(3)}

R2: #{cv.r_squared.signif(3)}

" end block += "

" end report.value "lmo", "3 independent 10-fold crossvalidations:"+block+"" end # Availability of the external validation set 7.1 report.change_attributes "validation_set_availability", {:answer => "No"} # Available information for the external validation set 7.2 report.change_attributes "validation_set_data", {:cas => "", :chemname => "", :formula => "", :inchi => "", :mol => "", :smiles => ""} # Data for each descriptor variable for the external validation set 7.3 report.change_attributes "validation_set_descriptors", {:answer => "Unknown"} # Data for the dependent variable for the external validation set 7.4 report.change_attributes "validation_dependent_var_availability", {:answer => "Unknown"} # Mechanistic basis of the model 8.1 report.value "mechanistic_basis","

Compounds with similar structures (neighbors) are assumed to have similar activities as the query compound.

" # A priori or a posteriori mechanistic interpretation 8.2 report.value "mechanistic_basis_comments","A posteriori for individual predictions" # Other information about the mechanistic interpretation 8.3 report.value "mechanistic_basis_info","

Hypothesis about biochemical mechanisms can be derived from individual predictions by inspecting neighbors and relevant descriptors.

Neighbors are compounds that are similar in respect to a certain endpoint and it is likely that compounds with high similarity act by similar mechanisms as the query compound. Links at the webinterface prove an easy access to additional experimental data and literature citations for the neighbors and the query structure.

Please note that lazar predictions are based on neighbors. Descriptors are only used for the calculation of similarities.

" # Comments 9.1 report.value "comments", "

Public model interface: https://lazar.in-silico.ch

Source code: #{lazar_commit}

Docker image: https://hub.docker.com/r/insilicotox/lazar/

" # Bibliography 9.2 report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Helma (2017), Nano-Lazar: Read across Predictions for Nanoparticle Toxicities with Calculated and Measured Properties", :url => "https://dx.doi.org/10.3389%2Ffphar.2017.00377"} report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Lo Piparo (2014), Automated and reproducible read-across like models for predicting carcinogenic potency", :url => "https://doi.org/10.1016/j.yrtph.2014.07.010"} report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"} report.change_catalog :publications_catalog, :publications_catalog_4, {:title => "Bender et al. (2004), Molecular similarity searching using atom environments, information-based feature selection, and a nave bayesian classifier.", :url => "https://doi.org/10.1021/ci034207y"} report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1 report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2 report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3 report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_4 # output t = Tempfile.new t << report.to_xml name = prediction_model.species.sub(/\s/,"-")+"-"+prediction_model.endpoint.downcase.sub(/\s/,"-") send_file t.path, :filename => "QMRF_report_#{name.gsub!(/[^0-9A-Za-z]/, '_')}.xml", :type => "application/xml", :disposition => "attachment" end get '/license' do @license = RDiscount.new(File.read("LICENSE.md")).to_html haml :license, :layout => false end get '/faq' do @faq = RDiscount.new(File.read("FAQ.md")).to_html haml :faq, :layout => false end get '/style.css' do headers 'Content-Type' => 'text/css; charset=utf-8' scss :style end