From f4873aac173c2d0c36cd5752185ad6f91429793c Mon Sep 17 00:00:00 2001 From: rautenberg Date: Fri, 16 Sep 2016 17:03:29 +0200 Subject: add example file , refine documentation --- .yardopts | 2 +- README.md | 8 ++- example/example.rb | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/qmrf-report.rb | 26 +++++--- test/qmrf-report.rb | 1 - 5 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 example/example.rb diff --git a/.yardopts b/.yardopts index aaca9b0..1515f00 100644 --- a/.yardopts +++ b/.yardopts @@ -1,4 +1,4 @@ --readme README.md --title 'QMRF and QPRF reporting class for OpenTox' --charset utf-8 -yardoc lib/*.rb - README.md ChangeLog VERSION LICENSE +yardoc lib/*.rb - README.md ChangeLog VERSION LICENSE lib/template/LICENSE_qmrf.xsd.txt diff --git a/README.md b/README.md index b13b4e0..225822b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # QSAR-Report -QMRF and QPRF reporting extension to OpenTox ruby modules and lazar +QMRF and QPRF reporting extension to OpenTox ruby modules and lazar. ## About Class for QMRF and QPRF reporting. * QMRF: @@ -77,3 +77,9 @@ Copyright --------- Copyright (c) 2016 Christoph Helma, Micha Rautenberg, Denis Gebele. See LICENSE for details. +Lazar QMRF Example +------------------ +Here is an advanced example of the usage of the gem in lazar code: + +{include:file:example/example.rb} + diff --git a/example/example.rb b/example/example.rb new file mode 100644 index 0000000..e268a5c --- /dev/null +++ b/example/example.rb @@ -0,0 +1,175 @@ +# require the qsar-report gem +require "qsar-report" + +# generate a new report +report = OpenTox::QMRFReport.new + +# lazar specific data resources for model information +model = Model::Lazar.find params[:id] +prediction_model = Model::Prediction.find_by :model_id => params[:id] +validation_template = File.join(File.dirname(__FILE__),"../views/model_details.haml") #detailed validation results from lazar-rest + + +# Start filling the QMRF report + +# QSAR identifier (title) 1.1 +report.value "QSAR_title", "Model for #{prediction_model.species} #{prediction_model.endpoint}" + +# Software coding the model 1.3 +report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "Contact Email"} +report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware + +# Date of QMRF 2.1 +report.value "qmrf_date", "#{Time.now.strftime('%d %B %Y')}" + +# QMRF author(s) and contact details 2.1 +report.change_catalog :authors_catalog, :firstauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Contact Address", :email => "Contact Email", :number => "1", :url => "Web Page"} +report.ref_catalog :qmrf_authors, :authors_catalog, :firstauthor + +# Model developer(s) and contact details 2.5 +report.change_catalog :authors_catalog, :modelauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Contact Address", :email => "Contact Email", :number => "1", :url => "Web Page"} +report.ref_catalog :model_authors, :authors_catalog, :modelauthor + +# Date of model development and/or publication 2.6 +report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}" + +# Reference(s) to main scientific papers and/or software package 2.7 +report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz, Guetlein, Rautenberg, Vorgrimmler, Gebele and Helma (2013), lazar: a modular predictive toxicology framework ", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"} +report.ref_catalog :references, :publications_catalog, :publications_catalog_1 + +# Reference(s) to main scientific papers and/or software package 2.7 +report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Maunz A and Helma C (2008) Prediction of chemical toxicity with local support vector regression and activity-specific kernels. SAR & QSAR in Environmental Research 19 (5-6), 413-431", :url => "http://dx.doi.org/10.1080/10629360802358430"} +report.ref_catalog :references, :publications_catalog, :publications_catalog_2 + +# Species 3.1 +report.value "model_species", prediction_model.species + +# Endpoint 3.2 +report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.endpoint, :group => ""} +report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1 + +# Endpoint Units 3.4 +report.value "endpoint_units", "#{prediction_model.unit}" + +# Type of model 4.1 +report.value "algorithm_type", "#{model.class.to_s.gsub('OpenTox::Model::Lazar','')}" + +# Explicit algorithm 4.2 +report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "modified k-nearest neighbor classification with activity specific similarities, weighted voting and exhaustive enumeration of fragments and neighbors"} +report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1 + +# Descriptors in the model 4.3 +report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "all statistically relevant paths are used for similarity calculation", :name => "linear fragmens (paths)", :publication_ref => "", :units => "true/false (i.e. present/absent)"} +report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1 + +# Descriptor selection 4.4 +report.value "descriptors_selection", "statistical filter (chi-square with Yates correction)" + +# Algorithm and descriptor generation 4.5 +report.value "descriptors_generation", "exhaustive breadth first search for paths in chemical graphs (simplified MolFea algorithm)" + +# Software name and version for descriptor generation 4.6 +report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "simplified MolFea algorithm", :number => "2", :url => "https://lazar.in-silico.ch", :contact => "Contact Email"} +report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2 + +# Chemicals/Descriptors ratio 4.7 +report.value "descriptors_chemicals_ratio", "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)" + +# Some QMRF fields (tag contents) can have formatted HTML contents. +# These contents have to have a complete HTML document structure with basic surrounding tags like . +# see Example below + +# Description of the applicability domain of the model 5.1 +report.value "app_domain_description", " +

+ The applicability domain (AD) of the training set is characterized by + the confidence index of a prediction (high confidence index: close to + the applicability domain of the training set/reliable prediction, low + confidence: far from the applicability domain of the + trainingset/unreliable prediction). The confidence index considers (i) + the similarity and number of neighbors and (ii) contradictory examples + within the neighbors. A formal definition can be found in Helma 2006. +

+

+ The reliability of predictions decreases gradually with increasing + distance from the applicability domain (i.e. decreasing confidence index) +

+ +" + +# Method used to assess the applicability domain 5.2 +report.value "app_domain_method", "see Helma 2006 and Maunz 2008" + +# Software name and version for applicability domain assessment 5.3 +report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "integrated into main lazar algorithm", :number => "3", :url => "https://lazar.in-silico.ch", :contact => "Contact Email"} +report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3 + +# Limits of applicability 5.4 +report.value "applicability_limits", "Predictions with low confidence index, unknown substructures and neighbors that might act by different mechanisms" + +# Availability of the training set 6.1 +report.change_attributes "training_set_availability", {:answer => "Yes"} + +# Available information for the training set 6.2 +report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"} + +# Data for each descriptor variable for the training set 6.3 +report.change_attributes "training_set_descriptors", {:answer => "No"} + +# Data for the dependent variable for the training set 6.4 +report.change_attributes "dependent_var_availability", {:answer => "All"} + +# Other information about the training set 6.5 +report.value "other_info", "#{prediction_model.source}" + +# Pre-processing of data before modelling 6.6 +report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none") + +# Robustness - Statistics obtained by leave-many-out cross-validation 6.9 +if prediction_model.crossvalidations + crossvalidations = prediction_model.crossvalidations + out = haml File.read(validation_template), :layout=> false, :locals => {:model => prediction_model} + report.value "lmo", out +end + +# Mechanistic basis of the model 8.1 +report.value "mechanistic_basis"," +

+ Compounds with similar structures (neighbors) are assumed to have + similar activities as the query compound. For the determination of + activity specific similarities only statistically relevant subtructures + (paths) are used. For this reason there is a priori no bias towards + specific mechanistic hypothesis. +

+ +" + +# A priori or a posteriori mechanistic interpretation 8.2 +report.value "mechanistic_basis_comments","a posteriori for individual predictions" + +# Other information about the mechanistic interpretation 8.3 +report.value "mechanistic_basis_info","

Hypothesis about biochemical mechanisms can be derived from individual + predictions by inspecting neighbors and relevant fragments.

+

Neighbors are compounds that are similar in respect to a certain + endpoint and it is likely that compounds with high similarity act by + similar mechanisms as the query compound. Links at the webinterface + prove an easy access to additional experimental data and literature + citations for the neighbors and the query structure.

+

Activating and deactivating parts of the query compound are highlighted + in red and green on the webinterface. Fragments that are unknown (or too + infrequent for statistical evaluation are marked in yellow and + additional statistical information about the individual fragments can be + retrieved. Please note that lazar predictions are based on neighbors and + not on fragments. Fragments and their statistical significance are used + for the calculation of activity specific similarities.

" + +# Reference all publications to the bibliography +# Bibliography 9.2 +report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1 +report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2 +report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"} +report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3 + +# output +response['Content-Type'] = "application/xml" +return report.to_xml \ No newline at end of file diff --git a/lib/qmrf-report.rb b/lib/qmrf-report.rb index 2c6cde8..a5cb86b 100644 --- a/lib/qmrf-report.rb +++ b/lib/qmrf-report.rb @@ -21,6 +21,8 @@ module OpenTox SCHEMA_FILE = File.join(File.dirname(__FILE__),"template/qmrf.xsd") # QMRF XML Template file TEMPLATE_FILE = File.join(File.dirname(__FILE__),"template/qmrf.xml") + # QMRF catalogs. Entries of a catalog can be referenced in certain tags. + CATALOGS = ["software_catalog", "algorithms_catalog", "descriptors_catalog", "endpoints_catalog", "publications_catalog", "authors_catalog"] # QMRF XML tags with attributes to edit ATTRIBUTE_TAGS = ["training_set_availability", "training_set_data", "training_set_descriptors", "dependent_var_availability", "validation_set_availability", "validation_set_data", "validation_set_descriptors", "validation_dependent_var_availability"] @@ -46,7 +48,11 @@ module OpenTox end # Get or Set a value - # e.G.: Title of My QSAR + # e.G.: + #@example change the title + # report.value "QSAR_title", "Title of My QSAR" + # #changes the QSAR_title tag to: + # #Title of My QSAR # @param [String] key Nodename e.g.: "QSAR_title" # @param [String] value Value to change. If not set the function returns the current value # @return [Error] returns Error message if fails @@ -58,11 +64,13 @@ module OpenTox t.content end - # Set attributes of an report XML tag - # e.G.: + # Set attributes of an report XML tag. + # Some of the QMRF XML tags have attributes to be edited. This applies to 6.1 to 6.4 and 7.1 to 7.4 see also: {OpenTox::QMRFReport::ATTRIBUTE_TAGS ATTRIBUTE_TAGS}. + # e.G. "Available information for the training set" at 6.2 of the report: + # #@example change_attributes # report.change_attributes "training_set_data", {:inchi => "Yes", :smiles => "Yes"} - # @param [String] key Nodename e.g.: "training_set_data" + # @param [String] tagname Nodename e.g.: "training_set_data" # @param [Hash] valuehash Key-Value Hash of tag attributes to change. # @return [Error] returns Error message if fails def change_attributes tagname, valuehash @@ -74,7 +82,7 @@ module OpenTox end # Change a catalog - # @param [String] catalog Name of the catalog - One of "software_catalog", "algorithms_catalog", "descriptors_catalog", "endpoints_catalog", "publications_catalog", "authors_catalog" in QMRF v1.3 + # @param [String] catalog Name of the catalog - One of {OpenTox::QMRFReport::CATALOGS CATALOGS}. # @param [String] id Single entry node in the catalog e.G.: " # @param [Hash] valuehash Key-Value Hash with attributes for a single catalog node # @return [Error] returns Error message if fails @@ -100,7 +108,7 @@ module OpenTox #@example ref_catalog # report.ref_catalog 'qmrf_authors', 'authors_catalog', 'firstauthor' # @param [String] chapter Name of the chapter to add the catalog reference. e.g.: qmrf_authors, model_authors, QSAR_software, ... - # @param [String] catalog Name of the catalog + # @param [String] catalog Name of the catalog. One of {OpenTox::QMRFReport::CATALOGS CATALOGS}. # @param [String] id entry node in the catalog def ref_catalog chapter, catalog, id catalog_exists? catalog @@ -117,7 +125,7 @@ module OpenTox end # get an attribute from a catalog entry - # @param [String] catalog Name of the catalog + # @param [String] catalog Name of the catalog. One of {OpenTox::QMRFReport::CATALOGS CATALOGS}. # @param [String] id entry id in the catalog # @param [String] key returns value of a key in a catalog node # @return [String, false] returns value of a key in a catalog node or false if catalog entry do not exists. @@ -132,9 +140,9 @@ module OpenTox # Check if a catalog exists in this QMRF version # @param [String] catalog Catalog - # @return [Error, true] returns true or Error if a catalog do not exists + # @return [Error, true] returns true or Error if a catalog do not exists. See also {OpenTox::QMRFReport::CATALOGS CATALOGS}. def catalog_exists? catalog - raise "Unknown catalog: #{catalog}" unless ["software_catalog", "algorithms_catalog", "descriptors_catalog", "endpoints_catalog", "publications_catalog", "authors_catalog"].include? catalog.to_s + raise "Unknown catalog: #{catalog}" unless CATALOGS.include? catalog.to_s true end diff --git a/test/qmrf-report.rb b/test/qmrf-report.rb index c973549..682485e 100644 --- a/test/qmrf-report.rb +++ b/test/qmrf-report.rb @@ -77,7 +77,6 @@ class QMRFReportTest < MiniTest::Test assert_equal report.to_xml, File.read(File.join(File.join(DATA_DIR, "qmrf_t9y.xml"))) report.change_attributes "training_set_data", {:inchi => "No", :smiles => "No"} assert_equal report.to_xml, File.read(File.join(File.join(DATA_DIR, "qmrf_t9n.xml"))) - end end -- cgit v1.2.3