From 8775a27a2bd41e16efb1cc627f915977a10916bb Mon Sep 17 00:00:00 2001
From: gebele
- No predictions are made for query compounds without similar structures
- in the training data. Similarity is determined as the Tanimoto coefficient of
- Molprint 2D fingerprints with a threshold of 0.1.
-
- Predictions based on a low number and/or very dissimilar neighbors or
- on neighbors with conflicting experimental measurements
- should be treated with caution.
-
- Num folds: #{cv.folds} Num instances: #{cv.nr_instances} Num unpredicted: #{cv.nr_unpredicted} Accuracy: #{cv.accuracy.signif(3)} Weighted accuracy: #{cv.weighted_accuracy.signif(3)} True positive rate: #{cv.true_rate[cv.accept_values[0]].signif(3)} True negative rate: #{cv.true_rate[cv.accept_values[1]].signif(3)} Positive predictive value: #{cv.predictivity[cv.accept_values[0]].signif(3)} Negative predictive value: #{cv.predictivity[cv.accept_values[1]].signif(3)} RMSE: #{cv.rmse.signif(3)} MAE: #{cv.mae.signif(3)} R2: #{cv.r_squared.signif(3)}
- Compounds with similar structures (neighbors) are assumed to have - similar activities as the query compound. -
- -" - - # A priori or a posteriori mechanistic interpretation 8.2 - report.value "mechanistic_basis_comments","A posteriori for individual predictions" - - # Other information about the mechanistic interpretation 8.3 - report.value "mechanistic_basis_info"," -- Hypothesis about biochemical mechanisms can be derived from individual - predictions by inspecting neighbors and relevant descriptors. -
-- Neighbors are compounds that are similar in respect to a certain - endpoint and it is likely that compounds with high similarity act by - similar mechanisms as the query compound. Links at the webinterface - prove an easy access to additional experimental data and literature - citations for the neighbors and the query structure. -
-- Please note that lazar predictions are based on neighbors. - Descriptors are only used for the calculation of similarities. -
- -" - - # Comments 9.1 - report.value "comments", " -- Public model interface: https://lazar.in-silico.ch -
-- Source code: #{lazar_commit} -
-- Docker image: https://hub.docker.com/r/insilicotox/lazar/ -
- -" - - # Bibliography 9.2 - report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Helma (2017), Nano-Lazar: Read across Predictions for Nanoparticle Toxicities with Calculated and Measured Properties", :url => "https://dx.doi.org/10.3389%2Ffphar.2017.00377"} - report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Lo Piparo (2014), Automated and reproducible read-across like models for predicting carcinogenic potency", :url => "https://doi.org/10.1016/j.yrtph.2014.07.010"} - report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"} - report.change_catalog :publications_catalog, :publications_catalog_4, {:title => "Bender et al. (2004), Molecular similarity searching using atom environments, information-based feature selection, and a nave bayesian classifier.", :url => "https://doi.org/10.1021/ci034207y"} - - report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1 - report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2 - report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3 - report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_4 - - + bad_request_error "model with id: '#{params[:id]}' not found." unless prediction_model + report = qmrf_report params[:id] # output t = Tempfile.new t << report.to_xml diff --git a/qmrf_report.rb b/qmrf_report.rb new file mode 100644 index 0000000..a23607f --- /dev/null +++ b/qmrf_report.rb @@ -0,0 +1,233 @@ +def qmrf_report id + lazarpath = `gem path lazar` + lazarpath = File.dirname lazarpath + lazarpath = File.dirname lazarpath + qmrfpath = `gem path qsar-report` + qmrfpath = File.dirname qmrfpath + qmrfpath = File.dirname qmrfpath + prediction_model = Model::Validation.find id + model = prediction_model.model + #validation_template = "./views/model_details.haml" + + if File.directory?(lazarpath) + lazar_commit = `cd #{lazarpath}; git rev-parse HEAD`.strip + lazar_commit = "https://github.com/opentox/lazar/tree/#{lazar_commit}" + else + lazar_commit = "https://github.com/opentox/lazar/releases/tag/v#{Gem.loaded_specs["lazar"].version}" + end + + report = OpenTox::QMRFReport.new + + # QSAR Identifier Title 1.1 + report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint.downcase}" + + # Software coding the model 1.3 + report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships. See #{lazar_commit}", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} + report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware + + # Date of QMRF 2.1 + report.value "qmrf_date", "#{Time.now.strftime('%d %B %Y')}" + + # QMRF author(s) and contact details 2.1 + report.change_catalog :authors_catalog, :firstauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} + report.ref_catalog :qmrf_authors, :authors_catalog, :firstauthor + + # Model developer(s) and contact details 2.5 + report.change_catalog :authors_catalog, :modelauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} + report.ref_catalog :model_authors, :authors_catalog, :modelauthor + + # Date of model development and/or publication 2.6 + report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}" + + # Reference(s) to main scientific papers and/or software package 2.7 + report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz A., Guetlein M., Rautenberg M., Vorgrimmler D., Gebele D. and Helma C. (2013), lazar: a modular predictive toxicology framework ", :number => "1", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"} + report.ref_catalog :references, :publications_catalog, :publications_catalog_1 + + report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Helma C, Gebele D, Rautenberg M (2017) lazar, software available at https://lazar.in-silico.ch,source code available at #{lazar_commit}", :number => "2", :url => "https://doi.org/10.5281/zenodo.215483"} + report.ref_catalog :references, :publications_catalog, :publications_catalog_2 + + # Availability of information about the model 2.8 + report.value "info_availability", "Prediction interface and validation results available at https://lazar.in-silico.ch" + + # Species 3.1 + report.value "model_species", prediction_model.species + + # Endpoint 3.2 + report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.qmrf["name"], :group => "#{prediction_model.qmrf["group"]}"} + report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1 + + # Endpoint Units 3.4 + report.value "endpoint_units", "#{prediction_model.unit}" + + # Dependent variable 3.5 + report.value "endpoint_variable", "#{prediction_model.endpoint} #{prediction_model.regression? ? "regression" : "classification"}" + + # Type of model 4.1 + model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','') + report.value "algorithm_type", "#{model_type}" + + # Explicit algorithm 4.2 + report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1 + report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "", :description => "modified k-nearest neighbor #{model_type.downcase} (#{model_type =~ /regression/i ? "local random forest" : "weighted majority vote"}), see #{lazar_commit}" } + + # Descriptors in the model 4.3 + if model.algorithms["descriptors"][:type] + report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "Molprint 2D (Bender et al. 2004)", :name => "#{model.algorithms["descriptors"][:type]} fingerprints", :publication_ref => "", :units => ""} + report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1 + end + + # Descriptor selection 4.4 + report.value "descriptors_selection", (model.class == OpenTox::Model::LazarRegression ? "Correlation with dependent variable (Pearson p <= 0.05)" : "none") + + # Algorithm and descriptor generation 4.5 + report.value "descriptors_generation", "lazar" + + # Software name and version for descriptor generation 4.6 + report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "2", :url => "", :contact => ""} + report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2 + + # Chemicals/Descriptors ratio 4.7 + report.value "descriptors_chemicals_ratio", (model.class == OpenTox::Model::LazarRegression ? "variable (local regression models)" : "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)") + + # Description of the applicability domain of the model 5.1 + report.value "app_domain_description", " ++ No predictions are made for query compounds without similar structures + in the training data. Similarity is determined as the Tanimoto coefficient of + Molprint 2D fingerprints with a threshold of 0.1. +
++ Predictions based on a low number and/or very dissimilar neighbors or + on neighbors with conflicting experimental measurements + should be treated with caution. +
+ + " + + # Method used to assess the applicability domain 5.2 + report.value "app_domain_method", "Number and similarity of training set compounds (part of the main lazar algorithm)" + + # Software name and version for applicability domain assessment 5.3 + report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "3", :url => "", :contact => ""} + report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3 + + # Limits of applicability 5.4 + report.value "applicability_limits", "Compounds without similar substances in the training dataset" + + # Availability of the training set 6.1 + report.change_attributes "training_set_availability", {:answer => "Yes"} + + # Available information for the training set 6.2 + report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"} + + # Data for each descriptor variable for the training set 6.3 + report.change_attributes "training_set_descriptors", {:answer => "on demand"} + + # Data for the dependent variable for the training set 6.4 + report.change_attributes "dependent_var_availability", {:answer => "Yes"} + + # Other information about the training set 6.5 + report.value "other_info", "Original data from: #{prediction_model.source}" + + # Pre-processing of data before modelling 6.6 + report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none") + + # Robustness - Statistics obtained by leave-many-out cross-validation 6.9 + if prediction_model.repeated_crossvalidation + crossvalidations = prediction_model.crossvalidations + block = "" + crossvalidations.each do |cv| + block += "+
Num folds: #{cv.folds}
+Num instances: #{cv.nr_instances}
+Num unpredicted: #{cv.nr_unpredicted}
" + if model_type =~ /classification/i + block += "Accuracy: #{cv.accuracy.signif(3)}
+Weighted accuracy: #{cv.weighted_accuracy.signif(3)}
+True positive rate: #{cv.true_rate[cv.accept_values[0]].signif(3)}
+True negative rate: #{cv.true_rate[cv.accept_values[1]].signif(3)}
+Positive predictive value: #{cv.predictivity[cv.accept_values[0]].signif(3)}
+Negative predictive value: #{cv.predictivity[cv.accept_values[1]].signif(3)}
" + end + if model_type =~ /regression/i + block += "RMSE: #{cv.rmse.signif(3)}
+MAE: #{cv.mae.signif(3)}
+R2: #{cv.r_squared.signif(3)}
" + end + block += "" + end + report.value "lmo", "3 independent 10-fold crossvalidations:"+block+"" + end + + # Availability of the external validation set 7.1 + report.change_attributes "validation_set_availability", {:answer => "No"} + + # Available information for the external validation set 7.2 + report.change_attributes "validation_set_data", {:cas => "", :chemname => "", :formula => "", :inchi => "", :mol => "", :smiles => ""} + + # Data for each descriptor variable for the external validation set 7.3 + report.change_attributes "validation_set_descriptors", {:answer => "Unknown"} + + # Data for the dependent variable for the external validation set 7.4 + report.change_attributes "validation_dependent_var_availability", {:answer => "Unknown"} + + # Mechanistic basis of the model 8.1 + report.value "mechanistic_basis"," ++ Compounds with similar structures (neighbors) are assumed to have + similar activities as the query compound. +
+ +" + + # A priori or a posteriori mechanistic interpretation 8.2 + report.value "mechanistic_basis_comments","A posteriori for individual predictions" + + # Other information about the mechanistic interpretation 8.3 + report.value "mechanistic_basis_info"," ++ Hypothesis about biochemical mechanisms can be derived from individual + predictions by inspecting neighbors and relevant descriptors. +
++ Neighbors are compounds that are similar in respect to a certain + endpoint and it is likely that compounds with high similarity act by + similar mechanisms as the query compound. Links at the webinterface + prove an easy access to additional experimental data and literature + citations for the neighbors and the query structure. +
++ Please note that lazar predictions are based on neighbors. + Descriptors are only used for the calculation of similarities. +
+ +" + + # Comments 9.1 + report.value "comments", " ++ Public model interface: https://lazar.in-silico.ch +
++ Source code: #{lazar_commit} +
++ Docker image: https://hub.docker.com/r/insilicotox/lazar/ +
+ +" + + # Bibliography 9.2 + report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Helma (2017), Nano-Lazar: Read across Predictions for Nanoparticle Toxicities with Calculated and Measured Properties", :url => "https://dx.doi.org/10.3389%2Ffphar.2017.00377"} + report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Lo Piparo (2014), Automated and reproducible read-across like models for predicting carcinogenic potency", :url => "https://doi.org/10.1016/j.yrtph.2014.07.010"} + report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"} + report.change_catalog :publications_catalog, :publications_catalog_4, {:title => "Bender et al. (2004), Molecular similarity searching using atom environments, information-based feature selection, and a nave bayesian classifier.", :url => "https://doi.org/10.1021/ci034207y"} + + report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1 + report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2 + report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3 + report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_4 + + report + +end diff --git a/views/error.haml b/views/error.haml index 555982b..05cb0da 100644 --- a/views/error.haml +++ b/views/error.haml @@ -4,5 +4,4 @@ Back %hr %div.well{:style=>"width:100%;margin-bottom:2em;"} - = @error_report - + = @error -- cgit v1.2.3