diff options
author | Christoph Helma <helma@in-silico.ch> | 2017-12-18 17:13:03 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2017-12-18 17:13:03 +0100 |
commit | d467b34ca9ea79095205d022b9a62888294b543d (patch) | |
tree | c8473d4d8ae8db7eb6e30b440a05b0c92899a5e0 /loael.tex | |
parent | 155f553dd90a5f21c18ffc306f0e9b90ab595ade (diff) |
abstract, tex file added
Diffstat (limited to 'loael.tex')
-rw-r--r-- | loael.tex | 931 |
1 files changed, 931 insertions, 0 deletions
diff --git a/loael.tex b/loael.tex new file mode 100644 index 0000000..738fea5 --- /dev/null +++ b/loael.tex @@ -0,0 +1,931 @@ +\documentclass[]{article} +\usepackage{lmodern} +\usepackage{amssymb,amsmath} +\usepackage{ifxetex,ifluatex} +\usepackage{fixltx2e} % provides \textsubscript +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} +\else % if luatex or xelatex + \ifxetex + \usepackage{mathspec} + \else + \usepackage{fontspec} + \fi + \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} +\fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +% use microtype if available +\IfFileExists{microtype.sty}{% +\usepackage{microtype} +\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts +}{} +\usepackage[unicode=true]{hyperref} +\PassOptionsToPackage{usenames,dvipsnames}{color} % color is loaded by hyperref +\hypersetup{ + pdftitle={Modeling Chronic Toxicity: A comparison of experimental variability with read across predictions}, + pdfauthor={Christoph Helma1; David Vorgrimmler1; Denis Gebele1; Martin Gütlein2; Benoit Schilter3; Elena Lo Piparo3}, + pdfkeywords={(Q)SAR, read-across, LOAEL, experimental variability}, + colorlinks=true, + linkcolor=Maroon, + citecolor=Blue, + urlcolor=Blue, + breaklinks=true} +\urlstyle{same} % don't use monospace font for urls +\usepackage{longtable,booktabs} +% Fix footnotes in tables (requires footnote package) +\IfFileExists{footnote.sty}{\usepackage{footnote}\makesavenoteenv{long table}}{} +\usepackage{graphicx,grffile} +\makeatletter +\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} +\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} +\makeatother +% Scale images if necessary, so that they will not overflow the page +% margins by default, and it is still possible to overwrite the defaults +% using explicit options in \includegraphics[width, height, ...]{} +\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} +\IfFileExists{parskip.sty}{% +\usepackage{parskip} +}{% else +\setlength{\parindent}{0pt} +\setlength{\parskip}{6pt plus 2pt minus 1pt} +} +\setlength{\emergencystretch}{3em} % prevent overfull lines +\providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +\setcounter{secnumdepth}{0} +% Redefines (sub)paragraphs to behave more like sections +\ifx\paragraph\undefined\else +\let\oldparagraph\paragraph +\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} +\fi +\ifx\subparagraph\undefined\else +\let\oldsubparagraph\subparagraph +\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} +\fi + +% set default figure placement to htbp +\makeatletter +\def\fps@figure{htbp} +\makeatother + +\usepackage{a4wide} +\linespread{2} +\usepackage{lineno} +\linenumbers +\usepackage{subfig} +\AtBeginDocument{% +\renewcommand*\figurename{Figure} +\renewcommand*\tablename{Table} +} +\AtBeginDocument{% +\renewcommand*\listfigurename{List of Figures} +\renewcommand*\listtablename{List of Tables} +} +\usepackage{float} +\floatstyle{ruled} +\makeatletter +\@ifundefined{c@chapter}{\newfloat{codelisting}{h}{lop}}{\newfloat{codelisting}{h}{lop}[chapter]} +\makeatother +\floatname{codelisting}{Listing} +\newcommand*\listoflistings{\listof{codelisting}{List of Listings}} + +\title{Modeling Chronic Toxicity: A comparison of experimental variability with +read across predictions} +\author{Christoph Helma\textsuperscript{1} \and David Vorgrimmler\textsuperscript{1} \and Denis Gebele\textsuperscript{1} \and Martin Gütlein\textsuperscript{2} \and Benoit Schilter\textsuperscript{3} \and Elena Lo Piparo\textsuperscript{3}} +\date{\today} + +\begin{document} +\maketitle +\begin{abstract} +This study compares the accuracy of (Q)SAR/read-across predictions with +the experimental variability of chronic LOAEL values from \emph{in vivo} +experiments. We could demonstrate that predictions of the \texttt{lazar} +lazar algrorithm within the applicability domain of the training data +have the same variability as the experimental training data. Predictions +with a lower similarity threshold (i.e.~a larger distance from the +applicability domain) are also significantly better than random +guessing, but the errors to be expected are higher and a manual +inspection of prediction results is highly recommended. +\end{abstract} + +\textsuperscript{1} in silico toxicology gmbh, Basel, +Switzerland\newline\textsuperscript{2} Inst. f. Computer Science, +Johannes Gutenberg Universität Mainz, Germany\newline\textsuperscript{3} +Chemical Food Safety Group, Nestlé Research Center, Lausanne, +Switzerland + +\section{Introduction}\label{introduction} + +Relying on standard animal toxicological testing for chemical hazard +identification and characterization is increasingly questioned on both +scientific and ethical grounds. In addition, it appears obvious that +from a resource perspective, the capacity of standard toxicology to +address the safety of thousands of untested chemicals (Fowler, Savage, +and Mendez 2011) to which human may be exposed is very limited. It has +also been recognized that getting rapid insight on toxicity of chemicals +in case of emergency safety incidents or for early prioritization in +research and development (safety by design) is a big challenge mainly +because of the time and cost constraints associated with the generation +of relevant animal data. In this context, alternative approaches to +obtain timely and fit-for-purpose toxicological information are being +developed. Amongst others, non-testing, structure-activity based +\emph{in silico} toxicology methods (also called computational +toxicology) are considered highly promising. Importantly, they are +raising more and more interests and getting increased acceptance in +various regulatory (e.g. (ECHA 2008, EFSA (2016), EFSA (2014), Health +Canada (2016), OECD (2015))) and industrial (e.g. (Stanton and +Krusezewski 2016, Lo Piparo et al. (2011))) frameworks. + +For a long time already, computational methods have been an integral +part of pharmaceutical discovery pipelines, while in chemical food +safety their actual potentials emerged only recently (Lo Piparo et al. +2011). In this later field, an application considered critical is in the +establishment of levels of safety concern in order to rapidly and +efficiently manage toxicologically uncharacterized chemicals identified +in food. This requires a risk-based approach to benchmark exposure with +a quantitative value of toxicity relevant for risk assessment (Schilter +et al. 2014). Since most of the time chemical food safety deals with +life-long exposures to relatively low levels of chemicals, and because +long-term toxicity studies are often the most sensitive in food +toxicology databases, predicting chronic toxicity is of prime +importance. Up to now, read across and quantitative structure-activity +relationship (QSAR) have been the most used \emph{in silico} approaches +to obtain quantitative predictions of chronic toxicity. + +The quality and reproducibility of (Q)SAR and read-across predictions +has been a continuous and controversial topic in the toxicological +risk-assessment community. Although model predictions can be validated +with various procedures, to review results in context of experimental +variability has actually been rarely done or attempted. With missing +information about the variability of experimental toxicity data it is +hard to judge the performance of predictive models objectively and it is +tempting for model developers to use aggressive model optimisation +methods that lead to impressive validation results, but also to +overfitted models with little practical relevance. + +In the present study, automatic read-across like models were built to +generate quantitative predictions of long-term toxicity. Two databases +compiling chronic oral rat lowest adverse effect levels (LOAEL) as +endpoint were used. An early review of the databases revealed that many +chemicals had at least two independent studies/LOAELs. These studies +were exploited to generate information on the reproducibility of chronic +animal studies and were used to evaluate prediction performance of the +models in the context of experimental variability. + +An important limitation often raised for computational toxicology is the +lack of transparency on published models and consequently on the +difficulty for the scientific community to reproduce and apply them. To +overcome these issues, source code for all programs and libraries and +the databases that have been used to generate this manuscript are made +available under GPL3 licenses. Databases and compiled programs with all +dependencies for the reproduction of results in this manuscript are +available as a self-contained docker image. All data, tables and figures +in this manuscript was generated directly from experimental results +using the \texttt{R} package \texttt{knitR}. A single command repeats +all experiments (possibly with different settings) and updates the +manuscript with the new results. + +\section{Materials and Methods}\label{materials-and-methods} + +The following sections give a high level overview about algorithms and +datasets used for this study. In order to provide unambiguous references +to algorithms and datasets, links to source code and data sources are +included in the text. + +\subsection{Datasets}\label{datasets} + +\subsubsection{Nestlé database}\label{nestluxe9-database} + +The first database (Nestlé database for further reference) originates +from the publication of (P. Mazzatorta et al. 2008). It contains chronic +(\textgreater{} 180 days) lowest observed effect levels (LOAEL) for rats +(\emph{Rattus norvegicus}) after oral (gavage, diet, drinking water) +administration. The Nestlé database consists of 567 LOAEL values for 445 +unique chemical structures. The Nestlé database can be obtained from the +following GitHub links: + +\begin{itemize} +\tightlist +\item + original data: + \url{https://github.com/opentox/loael-paper/blob/submission/data/LOAEL_mg_corrected_smiles_mmol.csv} +\item + unique smiles: + \url{https://github.com/opentox/loael-paper/blob/submission/data/mazzatorta.csv} +\item + -log10 transfomed LOAEL: + \url{https://github.com/opentox/loael-paper/blob/submission/data/mazzatorta_log10.csv}. +\end{itemize} + +\subsubsection{Swiss Food Safety and Veterinary Office (FSVO) +database}\label{swiss-food-safety-and-veterinary-office-fsvo-database} + +Publicly available data from pesticide evaluations of chronic rat +toxicity studies from the European Food Safety Authority (EFSA) (EFSA +2014), the Joint FAO/WHO Meeting on Pesticide Residues (JMPR) (WHO 2011) +and the US EPA (US EPA 2011) were compiled to form the FSVO-database. +Only studies providing both an experimental NOAEL and an experimental +LOAEL were included. The LOAELs were taken as they were reported in the +evaluations. Further details on the database are described elsewhere +(Zarn, Engeli, and Schlatter 2011, Zarn, Engeli, and Schlatter (2013)). +The FSVO-database consists of 493 rat LOAEL values for 381 unique +chemical structures. It can be obtained from the following GitHub links: + +\begin{itemize} +\tightlist +\item + original data: + \url{https://github.com/opentox/loael-paper/blob/submission/data/NOAEL-LOAEL_SMILES_rat_chron.csv} +\item + unique smiles and mmol/kg\_bw/day units: + \url{https://github.com/opentox/loael-paper/blob/submission/data/swiss.csv} +\item + -log10 transfomed LOAEL: + \url{https://github.com/opentox/loael-paper/blob/submission/data/swiss_log10.csv} +\end{itemize} + +\subsubsection{Preprocessing}\label{preprocessing} + +Chemical structures (represented as SMILES (Weininger 1988)) in both +datasets were checked for correctness. When syntactically incorrect or +missing SMILES were generated from other identifiers (e.g names, CAS +numbers). Unique smiles from the OpenBabel library (OBoyle et al. 2011) +were used for the identification of duplicated structures. + +Studies with undefined or empty LOAEL entries were removed from the +datasets. LOAEL values were converted to mmol/kg\_bw/day units and +rounded to five significant digits. For prediction, validation and +visualisation purposes -log10 transformations are used. + +\subsubsection{Derived datasets}\label{derived-datasets} + +Two derived datasets were obtained from the original databases: + +The +\href{https://github.com/opentox/loael-paper/blob/submission/data/test_log10.csv}{\emph{test} +dataset} contains data from compounds that occur in both databases. +LOAEL values equal at five significant digits were considered as +duplicates originating from the same study/publication and only one +instance was kept in the test dataset. The test dataset has 375 LOAEL +values for 155 unique chemical structures and was used for + +\begin{itemize} +\tightlist +\item + evaluating experimental variability +\item + comparing model predictions with experimental variability. +\end{itemize} + +The +\href{https://github.com/opentox/loael-paper/blob/submission/data/training_log10.csv}{\emph{training} +dataset} is the union of the Nestlé and the FSVO databases and it was +used to build predictive models. LOAEL duplicates were removed using the +same criteria as for the test dataset. The training dataset has 998 +LOAEL values for 671 unique chemical structures. + +\subsection{Algorithms}\label{algorithms} + +In this study we are using the modular lazar (\emph{la}zy +\emph{s}tructure \emph{a}ctivity \emph{r}elationships) framework (A. +Maunz et al. 2013) for model development and validation. The complete +\texttt{lazar} source code can be found on +\href{https://github.com/opentox/lazar}{GitHub}. + +lazar follows the following basic +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/model.rb\#L180-L257}{workflow}: + +For a given chemical structure lazar + +\begin{itemize} +\tightlist +\item + searches in a database for similar structures (\emph{neighbors}) with + experimental data, +\item + builds a local QSAR model with these neighbors and +\item + uses this model to predict the unknown activity of the query compound. +\end{itemize} + +This procedure resembles an automated version of \emph{read across} +predictions in toxicology, in machine learning terms it would be +classified as a \emph{k-nearest-neighbor} algorithm. + +Apart from this basic workflow lazar is completely modular and allows +the researcher to use any algorithm for similarity searches and local +QSAR modelling. Within this study we are using the following algorithms: + +\subsubsection{Neighbor identification}\label{neighbor-identification} + +Similarity calculations are based on +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/nanoparticle.rb\#L17-L21}{MolPrint2D +fingerprints} (Bender et al. 2004) from the OpenBabel chemoinformatics +library (OBoyle et al. 2011). + +The MolPrint2D fingerprint uses atom environments as molecular +representation, which resemble basically the chemical concept of +functional groups. For each atom in a molecule it represents the +chemical environment using the atom types of connected atoms. + +MolPrint2D fingerprints are generated dynamically from chemical +structures and do not rely on predefined lists of fragments (such as +OpenBabel FP3, FP4 or MACCs fingerprints or lists of +toxocophores/toxicophobes). This has the advantage the they may capture +substructures of toxicological relevance that are not included in other +fingerprints. Unpublished experiments have shown that predictions with +MolPrint2D fingerprints are indeed more accurate than other OpenBabel +fingerprints. + +From MolPrint2D fingerprints we can construct a feature vector with all +atom environments of a compound, which can be used to calculate chemical +similarities. + +The +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/similarity.rb\#L18-L20}{chemical +similarity} between two compounds A and B is expressed as the proportion +between atom environments common in both structures \(A \cap B\) and the +total number of atom environments \(A \cup B\) (Jaccard/Tanimoto index, +Equation~\ref{eq:jaccard}). + +\begin{equation} sim = \frac{|A \cap B|}{|A \cup B|} \label{eq:jaccard}\end{equation} + +The threshold selection is a trade-off between prediction accuracy (high +threshold) and the number of predictable compounds (low threshold). As +it is in many practical cases desirable to make predictions even in the +absence of closely related neighbors, we follow a tiered approach: + +First a similarity threshold of 0.5 is used to collect neighbors, to +create a local QSAR model and to make a prediction for the query +compound. If any of this steps fail, the procedure is repeated with a +similarity threshold of 0.2 and the prediction is flagged with a warning +that it might be out of the applicability domain of the training data. + +Compounds with the same structure as the query structure are +automatically +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/model.rb\#L180-L257}{eliminated +from neighbors} to obtain unbiased predictions in the presence of +duplicates. + +\subsubsection{Local QSAR models and +predictions}\label{local-qsar-models-and-predictions} + +Only similar compounds (\emph{neighbors}) above the threshold are used +for local QSAR models. In this investigation we are using +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/caret.rb\#L7-L78}{weighted +random forests regression (RF)} for the prediction of quantitative +properties. First all uninformative fingerprints (i.e.~features with +identical values across all neighbors) are removed. The remaining set of +features is used as descriptors for creating a local weighted RF model +with atom environments as descriptors and model similarities as weights. +The RF method from the \texttt{caret} R package (Kuhn 2008) is used for +this purpose. Models are trained with the default \texttt{caret} +settings, optimizing the number of RF components by bootstrap +resampling. + +Finally the local RF model is applied to +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/model.rb\#L194-L272}{predict +the activity} of the query compound. The RMSE of bootstrapped local +model predictions is used to construct 95\% prediction intervals at +1.96*RMSE. + +If RF modelling or prediction fails, the program resorts to using the +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/regression.rb\#L6-L16}{weighted +mean} of the neighbors LOAEL values, where the contribution of each +neighbor is weighted by its similarity to the query compound. In this +case the prediction is also flagged with a warning. + +\subsubsection{Applicability domain}\label{applicability-domain} + +The applicability domain (AD) of lazar models is determined by the +structural diversity of the training data. If no similar compounds are +found in the training data no predictions will be generated. Warnings +are issued if the similarity threshold has to be lowered from 0.5 to 0.2 +in order to enable predictions and if lazar has to resort to weighted +average predictions, because local random forests fail. Thus predictions +without warnings can be considered as close to the applicability domain +and predictions with warnings as more distant from the applicability +domain. Quantitative applicability domain information can be obtained +from the similarities of individual neighbors. + +Local regression models consider neighbor similarities to the query +compound, by weighting the contribution of each neighbor is by its +similarity. The variability of local model predictions is reflected in +the 95\% prediction interval associated with each prediction. + +\subsubsection{Validation}\label{validation} + +For the comparison of experimental variability with predictive +accuracies we are using a test set of compounds that occur in both +databases. Unbiased read across predictions are obtained from the +\emph{training} dataset, by +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/model.rb\#L234-L238}{removing +\emph{all} information} from the test compound from the training set +prior to predictions. This procedure is hardcoded into the prediction +algorithm in order to prevent validation errors. As we have only a +single test set no model or parameter optimisations were performed in +order to avoid overfitting a single dataset. + +Results from 3 repeated +\href{https://github.com/opentox/lazar/blob/loael-paper.submission/lib/crossvalidation.rb\#L85-L93}{10-fold +crossvalidations} with independent training/test set splits are provided +as additional information to the test set results. + +The final model for production purposes was trained with all available +LOAEL data (Nestlé and FSVO databases combined). + +\subsection{Availability}\label{availability} + +\begin{description} +\tightlist +\item[Public webinterface] +\url{https://lazar.in-silico.ch} +\item[\texttt{lazar} framework] +\url{https://github.com/opentox/lazar} (source code) +\item[\texttt{lazar} GUI] +\url{https://github.com/opentox/lazar-gui} (source code) +\item[Manuscript] +\url{https://github.com/opentox/loael-paper} (source code for the +manuscript and validation experiments) +\item[Docker image] +\url{https://hub.docker.com/r/insilicotox/loael-paper/} (container with +manuscript, validation experiments, \texttt{lazar} libraries and third +party dependencies) +\end{description} + +\section{Results}\label{results} + +\subsubsection{Dataset comparison}\label{dataset-comparison} + +The main objective of this section is to compare the content of both +databases in terms of structural composition and LOAEL values, to +estimate the experimental variability of LOAEL values and to establish a +baseline for evaluating prediction performance. + +\subparagraph{Structural diversity}\label{structural-diversity} + +In order to compare the structural diversity of both datasets we +evaluated the frequency of functional groups from the OpenBabel FP4 +fingerprint. Figure~\ref{fig:fg} shows the frequency of functional +groups in both datasets. 139 functional groups with a frequency +\textgreater{} 25 are depicted, the complete table for all functional +groups can be found in the supplemental material at +\href{https://github.com/opentox/loael-paper/blob/submission/data/functional-groups.csv}{GitHub}. + +\begin{figure} +\centering +\includegraphics{figures/functional-groups.pdf} +\caption{Frequency of functional groups.}\label{fig:fg} +\end{figure} + +This result was confirmed with a visual inspection using the +\href{http://ches-mapper.org}{CheS-Mapper} (Chemical Space Mapping and +Visualization in 3D, Gütlein, Karwath, and Kramer (2012)) tool. +CheS-Mapper can be used to analyze the relationship between the +structure of chemical compounds, their physico-chemical properties, and +biological or toxic effects. It depicts closely related (similar) +compounds in 3D space and can be used with different kinds of features. +We have investigated structural as well as physico-chemical properties +and concluded that both datasets are very similar, both in terms of +chemical structures and physico-chemical properties. + +The only statistically significant difference between both datasets, is +that the Nestlé database contains more small compounds (61 structures +with less than 11 atoms) than the FSVO-database (19 small structures, +p-value 3.7E-7). + +\subsubsection{Experimental variability versus prediction +uncertainty}\label{experimental-variability-versus-prediction-uncertainty} + +Duplicated LOAEL values can be found in both datasets and there is a +substantial number of 155 compounds with more than one LOAEL. These +chemicals allow us to estimate the variability of experimental results +within individual datasets and between datasets. Data with +\emph{identical} values (at five significant digits) in both datasets +were excluded from variability analysis, because it it likely that they +originate from the same experiments. + +\subparagraph{Intra database +variability}\label{intra-database-variability} + +The Nestlé database has 567 LOAEL values for 445 unique structures, 93 +compounds have multiple measurements with a mean standard deviation +(-log10 transformed values) of 0.32 (0.56 mg/kg\_bw/day, 0.56 +mmol/kg\_bw/day) (P. Mazzatorta et al. (2008), Figure~\ref{fig:intra}). + +The FSVO database has 493 rat LOAEL values for 381 unique structures, 91 +compounds have multiple measurements with a mean standard deviation +(-log10 transformed values) of 0.29 (0.57 mg/kg\_bw/day, 0.59 +mmol/kg\_bw/day) (Figure~\ref{fig:intra}). + +Standard deviations of both datasets do not show a statistically +significant difference with a p-value (t-test) of 0.21. The combined +test set has a mean standard deviation (-log10 transformed values) of +0.33 (0.56 mg/kg\_bw/day, 0.55 mmol/kg\_bw/day) +(Figure~\ref{fig:intra}). + +\begin{figure} +\centering +\includegraphics{figures/dataset-variability.pdf} +\caption{Distribution and variability of LOAEL values in both datasets. +Each vertical line represents a compound, dots are individual LOAEL +values.}\label{fig:intra} +\end{figure} + +\subparagraph{Inter database +variability}\label{inter-database-variability} + +Figure~\ref{fig:comp} shows the experimental LOAEL variability of +compounds occurring in both datasets (i.e.~the \emph{test} dataset) +colored in red (experimental). This is the baseline reference for the +comparison with predicted values. + +Figure~\ref{fig:datacorr} depicts the correlation between LOAEL values +from both datasets. As both datasets contain duplicates medians were +used for the correlation plot and statistics. It should be kept in mind +that the aggregation of duplicated measurements into a single median +value hides a substantial portion of the experimental variability. +Correlation analysis shows a significant (p-value \textless{} 2.2e-16) +correlation between the experimental data in both datasets with r\^{}2: +0.52, RMSE: 0.59 + +\begin{figure} +\centering +\includegraphics{figures/median-correlation.pdf} +\caption{Correlation of median LOAEL values from Nestlé and FSVO +databases. Data with identical values in both databases was removed from +analysis.}\label{fig:datacorr} +\end{figure} + +\subsubsection{Local QSAR models}\label{local-qsar-models} + +In order to compare the performance of \emph{in silico} read across +models with experimental variability we are using compounds that occur +in both datasets as a test set (375 measurements, 155 compounds). +\texttt{lazar} read across predictions were obtained for 155 compounds, +37 predictions failed, because no similar compounds were found in the +training data (i.e.~they were not covered by the applicability domain of +the training data). + +Experimental data and 95\% prediction intervals overlapped in 100\% of +the test examples. + +Figure~\ref{fig:comp} shows a comparison of predicted with experimental +values. Most predicted values were located within the experimental +variability. + +\begin{figure} +\centering +\includegraphics{figures/test-prediction.pdf} +\caption{Comparison of experimental with predicted LOAEL values. Each +vertical line represents a compound, dots are individual measurements +(blue), predictions (green) or predictions far from the applicability +domain, i.e.~with warnings (red).}\label{fig:comp} +\end{figure} + +Correlation analysis was performed between individual predictions and +the median of experimental data. All correlations are statistically +highly significant with a p-value \textless{} 2.2e-16. These results are +presented in Figure~\ref{fig:corr} and Table~\ref{tbl:cv}. Please bear +in mind that the aggregation of multiple measurements into a single +median value hides experimental variability. + +\hypertarget{tbl:common-pred}{} +\begin{longtable}[]{@{}llll@{}} +\caption{\label{tbl:common-pred}Comparison of model predictions with +experimental variability. }\tabularnewline +\toprule +Comparison & \(r^2\) & RMSE & Nr. predicted\tabularnewline +\midrule +\endfirsthead +\toprule +Comparison & \(r^2\) & RMSE & Nr. predicted\tabularnewline +\midrule +\endhead +Nestlé vs.~FSVO database & 0.52 & 0.59\tabularnewline +AD close predictions vs.~test median & 0.48 & 0.56 & +34/155\tabularnewline +AD distant predictions vs.~test median & 0.38 & 0.68 & +84/155\tabularnewline +All predictions vs.~test median & 0.4 & 0.65 & 118/155\tabularnewline +\bottomrule +\end{longtable} + +\begin{figure} +\centering +\includegraphics{figures/prediction-test-correlation.pdf} +\caption{Correlation of experimental with predicted LOAEL values (test +set). Green dots indicate predictions close to the applicability domain +(i.e.~without warnings), red dots indicate predictions far from the +applicability domain (i.e.~with warnings).}\label{fig:corr} +\end{figure} + +For a further assessment of model performance three independent 10-fold +cross-validations were performed. Results are summarised in +Table~\ref{tbl:cv} and Figure~\ref{fig:cv}. All correlations of +predicted with experimental values are statistically highly significant +with a p-value \textless{} 2.2e-16. This is observed for compounds close +and more distant to the applicability domain. + +\hypertarget{tbl:cv}{} +\begin{longtable}[]{@{}llll@{}} +\caption{\label{tbl:cv}Results from 3 independent 10-fold +crossvalidations }\tabularnewline +\toprule +Predictions & \(r^2\) & RMSE & Nr. predicted\tabularnewline +\midrule +\endfirsthead +\toprule +Predictions & \(r^2\) & RMSE & Nr. predicted\tabularnewline +\midrule +\endhead +AD close & 0.61 & 0.58 & 102/671\tabularnewline +AD distant & 0.45 & 0.78 & 374/671\tabularnewline +All & 0.47 & 0.74 & 476/671\tabularnewline +& &\tabularnewline +AD close & 0.59 & 0.6 & 101/671\tabularnewline +AD distant & 0.45 & 0.77 & 376/671\tabularnewline +All & 0.47 & 0.74 & 477/671\tabularnewline +& &\tabularnewline +AD close & 0.59 & 0.57 & 93/671\tabularnewline +AD distant & 0.43 & 0.81 & 384/671\tabularnewline +All & 0.45 & 0.77 & 477/671\tabularnewline +\bottomrule +\end{longtable} + +\begin{figure} + +\subfloat[]{\includegraphics[height=0.30000\textwidth]{figures/crossvalidation0.pdf}\label{fig:cv0}} + +\subfloat[]{\includegraphics[height=0.30000\textwidth]{figures/crossvalidation1.pdf}\label{fig:cv1}} + +\subfloat[]{\includegraphics[height=0.30000\textwidth]{figures/crossvalidation2.pdf}\label{fig:cv2}} + +\caption{Correlation of predicted vs.~measured values for three +independent crossvalidations with MP2D fingerprint descriptors and local +random forest models.} + +\label{fig:cv} + +\end{figure} + +\section{Discussion}\label{discussion} + +It is currently acknowledged that there is a strong need for +toxicological information on the multiple thousands of chemicals to +which human may be exposed through food. These include for examples many +chemicals in commerce, which could potentially find their way into food +(Stanton and Krusezewski 2016, Fowler, Savage, and Mendez (2011)), but +also substances migrating from food contact materials (Grob et al. +2006), chemicals generated over food processing (Cotterill et al. 2008), +environmental contaminants as well as inherent plant toxicants +(Schilter, Constable, and Perrin 2013). For the vast majority of these +chemicals, no toxicological data is available and consequently insight +on their potential health risks is very difficult to obtain. It is +recognized that testing all of them in standard animal studies is +neither feasible from a resource perspective nor desirable because of +ethical issues associated with animal experimentation. In addition, for +many of these chemicals, risk may be very low and therefore testing may +actually be irrelevant. In this context, the identification of chemicals +of most concern on which limited resource available should focused is +essential and computational toxicology is thought to play an important +role for that. + +In order to establish the level of safety concern of food chemicals +toxicologically not characterized, a methodology mimicking the process +of chemical risk assessment, and supported by computational toxicology, +was proposed (Schilter et al. 2014). It is based on the calculation of +margins of exposure (MoE) between predicted values of toxicity and +exposure estimates. The level of safety concern of a chemical is then +determined by the size of the MoE and its suitability to cover the +uncertainties of the assessment. To be applicable, such an approach +requires quantitative predictions of toxicological endpoints relevant +for risk assessment. The present work focuses on prediction of chronic +toxicity, a major and often pivotal endpoints of toxicological databases +used for hazard identification and characterization of food chemicals. + +In a previous study, automated read-across like models for predicting +carcinogenic potency were developed. In these models, substances in the +training dataset similar to the query compounds are automatically +identified and used to derive a quantitative TD50 value. The errors +observed in these models were within the published estimation of +experimental variability (Lo Piparo et al. 2014). In the present study, +a similar approach was applied to build models generating quantitative +predictions of long-term toxicity. Two databases compiling chronic oral +rat lowest adverse effect levels (LOAEL) as endpoint were available from +different sources. \protect\hypertarget{dataset-comparison-1}{}{}Our +investigations clearly indicated that the Nestlé and FSVO databases are +very similar in terms of chemical structures and properties as well as +distribution of experimental LOAEL values. The only significant +difference that we observed was that the Nestlé one has larger amount of +small molecules, than the FSVO database. For this reason we pooled both +dataset into a single training dataset for read across predictions. + +An early review of the databases revealed that 155 out of the 671 +chemicals available in the training datasets had at least two +independent studies/LOAELs. These studies were exploited to generate +information on the reproducibility of chronic animal studies and were +used to evaluate prediction performance of the models in the context of +experimental variability.Considerable variability in the experimental +data was observed. Study design differences, including dose selection, +dose spacing and route of administration are likely explanation of +experimental variability. High experimental variability has an impact on +model building and on model validation. First it influences model +quality by introducing noise into the training data, secondly it +influences accuracy estimates because predictions have to be compared +against noisy data where ``true'' experimental values are unknown. This +will become obvious in the next section, where comparison of predictions +with experimental data is +discussed.\protect\hypertarget{lazar-predictions}{}{}The data obtained +in the present study indicate that \texttt{lazar} generates reliable +predictions for compounds within the applicability domain of the +training data (i.e.~predictions without warnings, which indicates a +sufficient number of neighbors with similarity \textgreater{} 0.5 to +create local random forest models). Correlation analysis shows that +errors (\(\text{RMSE}\)) and explained variance (\(r^{2}\)) are +comparable to experimental variability of the training data. + +Predictions with a warning (neighbor similarity \textless{} 0.5 and +\textgreater{} 0.2 or weighted average predictions) are more uncertain. +However, they still show a strong correlation with experimental data, +but the errors are larger than for compounds within the applicability +domain. Expected errors are displayed as 95\% prediction intervals, +which covers 100\% of the experimental data. The main advantage of +lowering the similarity threshold is that it allows to predict a much +larger number of substances than with more rigorous applicability domain +criteria. As each of this prediction could be problematic, they are +flagged with a warning to alert risk assessors that further inspection +is required. This can be done in the graphical interface +(\url{https://lazar.in-silico.ch}) which provides intuitive means of +inspecting the rationales and data used for read across predictions. + +Finally there is a substantial number of chemicals (37), where no +predictions can be made, because no similar compounds in the training +data are available. These compounds clearly fall beyond the +applicability domain of the training dataset and in such cases +predictions should not be used. In order to expand the domain of +applicability, the possibility to design models based on shorter, less +than chonic studies should be studied. It is likely that more substances +reflecting a wider chemical domain may be available. To predict such +shorter duration endpoints would also be valuable for chronic toxicy +since evidence suggest that exposure duration has little impact on the +levels of NOAELs/LOAELs (Zarn, Engeli, and Schlatter 2011, Zarn, Engeli, +and Schlatter (2013)). + +Elena: Should we add a GUI screenshot? + +\section{Summary}\label{summary} + +In conclusion, we could demonstrate that \texttt{lazar} predictions +within the applicability domain of the training data have the same +variability as the experimental training data. In such cases +experimental investigations can be substituted with \emph{in silico} +predictions. Predictions with a lower similarity threshold can still +give usable results, but the errors to be expected are higher and a +manual inspection of prediction results is highly recommended. + +\section*{References}\label{references} +\addcontentsline{toc}{section}{References} + +\hypertarget{refs}{} +\hypertarget{ref-doi:10.1021ux2fci034207y}{} +Bender, Andreas, Hamse Y. Mussa, Robert C. Glen, and Stephan Reiling. +2004. ``Molecular Similarity Searching Using Atom Environments, +Information-Based Feature Selection, and a Naïve Bayesian Classifier.'' +\emph{Journal of Chemical Information and Computer Sciences} 44 (1): +170--78. +doi:\href{https://doi.org/10.1021/ci034207y}{10.1021/ci034207y}. + +\hypertarget{ref-Cotterill2008}{} +Cotterill, J.V., M.Q. Chaudry, W. Mattews, and R. W. Watkins. 2008. ``In +Silico Assessment of Toxicity of Heat-Generated Food Contaminants.'' +\emph{Food Chemical Toxicology}, no. 46(6): 1905--18. + +\hypertarget{ref-ECHA2008}{} +ECHA. 2008. ``Guidance on Information Requirements and Chemical Safety +Assessment, Chapter R.6: QSARs and Grouping of Chemicals.'' ECHA. + +\hypertarget{ref-EFSA2014}{} +EFSA. 2014. ``Rapporteur Member State Assessment Reports Submitted for +the EU Peer Review of Active Substances Used in Plant Protection +Products.'' \url{http://dar.efsa.europa.eu/dar-web/provision}. + +\hypertarget{ref-EFSA2016}{} +EFSA. 2016. ``Guidance on the Establishment of the Residue Definition +for Dietary Assessment: EFSA Panel on Plant Protect Products and Their +Residues (PPR).'' \emph{EFSA Journal}, no. 14: 1--12. + +\hypertarget{ref-Fowler2011}{} +Fowler, B., S. Savage, and B. Mendez. 2011. ``White Paper: Protecting +Public Health in the 21st Century: The Case for Computational +Toxicology.'' ICF International, Inc.icfi.com. + +\hypertarget{ref-Grob2006}{} +Grob, K., M. Biedermann, E. Scherbaum, M. Roth, and K. Rieger. 2006. +``Food Contamination with Organic Materials in Perspective: Packaging +Materials as the Largest and Least Controlled Source? A View Focusing on +the European Situation.'' \emph{Crit. Rev. Food. Sci. Nutr.}, no. 46: +529--35. +doi:\href{https://doi.org/10.1080/10408390500295490}{10.1080/10408390500295490}. + +\hypertarget{ref-Guetlein2012}{} +Gütlein, Martin, Andreas Karwath, and Stefan Kramer. 2012. ``CheS-Mapper +- Chemical Space Mapping and Visualization in 3D.'' \emph{Journal of +Cheminformatics} 4 (1): 7. +doi:\href{https://doi.org/10.1186/1758-2946-4-7}{10.1186/1758-2946-4-7}. + +\hypertarget{ref-HealthCanada2016}{} +Health Canada. 2016. +\url{https://www.canada.ca/en/health-canada/services/chemical-substances/chemicals-management-plan.html}. + +\hypertarget{ref-Kuhn08}{} +Kuhn, Max. 2008. ``Building Predictive Models in R Using the Caret +Package.'' \emph{J. of Stat. Soft}. + +\hypertarget{ref-LoPiparo2014}{} +Lo Piparo, E., A. Maunz, C. Helma, D. Vorgrimmler, and B. Schilter. +2014. ``Automated and Reproducible Read-Across Like Models for +Predicting Carcinogenic Potency.'' \emph{Regulatory Toxicology and +Pharmacology}, no. 70: 370--78. + +\hypertarget{ref-LoPiparo2011}{} +Lo Piparo, E., A. Worth, A. Manibusan, C. Yang, B. Schilter, P. +Mazzatorta, M.N. Jacobs, H. Steinkelner, and L. Mohimont. 2011. ``Use of +Computational Tools in the Field of Food Safety.'' \emph{Regulatory +Toxicology and Pharmacology}, no. 60(3): 354--62. + +\hypertarget{ref-Maunz2013}{} +Maunz, Andreas, Martin Gütlein, Micha Rautenberg, David Vorgrimmler, +Denis Gebele, and Christoph Helma. 2013. ``Lazar: A Modular Predictive +Toxicology Framework.'' \emph{Frontiers in Pharmacology} 4. Frontiers +Media SA. +doi:\href{https://doi.org/10.3389/fphar.2013.00038}{10.3389/fphar.2013.00038}. + +\hypertarget{ref-mazzatorta08}{} +Mazzatorta, Paolo, Manuel Dominguez Estevez, Myriam Coulet, and Benoit +Schilter. 2008. ``Modeling Oral Rat Chronic Toxicity.'' \emph{Journal of +Chemical Information and Modeling} 48 (10): 1949--54. +doi:\href{https://doi.org/10.1021/ci8001974}{10.1021/ci8001974}. + +\hypertarget{ref-OBoyle2011}{} +OBoyle, Noel M, Michael Banck, Craig A James, Chris Morley, Tim +Vandermeersch, and Geoffrey R Hutchison. 2011. ``Open Babel: An Open +Chemical Toolbox.'' \emph{Journal of Cheminformatics} 3 (1). Springer +Science and Business Media: 33. +doi:\href{https://doi.org/10.1186/1758-2946-3-33}{10.1186/1758-2946-3-33}. + +\hypertarget{ref-OECD2015}{} +OECD. 2015. ``Fundamental and Guiding Principles for (Q)SAR Analysis of +Chemicals Carcinogens with Mechanistic Considerations Monograph 229 +ENV/JM/MONO(2015)46.'' In \emph{Series on Testing and Assessment No +229}. + +\hypertarget{ref-Schilter2014}{} +Schilter, B., R. Benigni, A. Boobis, A. Chiodini, A. Cockburn, M.T. +Cronin, E. Lo Piparo, S. Modi, Thiel A., and A. Worth. 2014. +``Establishing the Level of Safety Concern for Chemicals in Food Without +the Need for Toxicity Testing.'' \emph{Regulatory Toxicology and +Pharmacology}, no. 68: 275--98. + +\hypertarget{ref-Schilter2013}{} +Schilter, B., A. Constable, and I. Perrin. 2013. ``Naturally Occurring +Toxicants of Plant Origin: Risk Assessment and Management +Considerations.'' In \emph{Food Safety Management: A Practical Guide for +Industry}, edited by Y. Motarjemi, 45--57. Elsevier. + +\hypertarget{ref-Stanton2016}{} +Stanton, K., and F.H. Krusezewski. 2016. ``Quantifying the Benefits of +Using Read-Across and in Silico Techniques to Fullfill Hazard Data +Requirements for Chemical Categories.'' \emph{Regulatory Toxicology and +Pharmacology}, no. 81: 250--59. +doi:\href{https://doi.org/10.1016/j-yrtph.2016.09.004.}{10.1016/j-yrtph.2016.09.004.} + +\hypertarget{ref-EPA2011}{} +US EPA. 2011. ``Fact Sheets on New Active Ingredients.'' + +\hypertarget{ref-doi:10.1021ux2fci00057a005}{} +Weininger, David. 1988. ``SMILES, a Chemical Language and Information +System. 1. Introduction to Methodology and Encoding Rules.'' +\emph{Journal of Chemical Information and Computer Sciences} 28 (1): +31--36. +doi:\href{https://doi.org/10.1021/ci00057a005}{10.1021/ci00057a005}. + +\hypertarget{ref-WHO2011}{} +WHO. 2011. ``Joint FAO/WHO Meeting on Pesticide Residues (JMPR) +Publications.'' +\url{http://www.who.int/foodsafety/publications/jmpr-monographs/en/}. + +\hypertarget{ref-Zarn2011}{} +Zarn, J.A., B.E. Engeli, and J.R. Schlatter. 2011. ``Study Parameters +Influencing NOAEL and LOAEL in Toxicity Feeding Studies for Pesticides: +Exposure Duration Versus Dose Decrement, Dose Spacing, Group Size and +Chemical Class.'' \emph{Regul. Toxicol. Pharmacol.}, no. 61: 243--50. + +\hypertarget{ref-Zarn2013}{} +---------. 2013. ``Characterization of the Dose Decrement in Regulatory +Rat Pesticide Toxicity Feeding Studies.'' \emph{Regul. Toxicol. +Pharmacol.}, no. 67: 215--20. + +\end{document} |