From 1f26844dd7b25ac9f8891e745eefcce17239b16c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 29 Mar 2021 15:11:49 +0200 Subject: introduction, nixpkgs pinned --- bibliography.bib | 17 +++++++++++++ mutagenicity.md | 72 ++++++++++++++++++++++++++++++++----------------------- mutagenicity.pdf | Bin 3176503 -> 3178373 bytes shell.nix | 3 ++- 4 files changed, 61 insertions(+), 31 deletions(-) diff --git a/bibliography.bib b/bibliography.bib index 45c5852..ca5c3da 100644 --- a/bibliography.bib +++ b/bibliography.bib @@ -1,3 +1,20 @@ +@misc{ICH2017, + author = {International Council for Harmonisation of Technical Requirements for Pharmaceuticals for Human Use (ICH)}, + title = {Assessment and control of DNA reactive (mutagenic) impurities in pharmaceuticals to limit potential carcinogenic risk M7(R1)}, + year = 2017, + note = {\url{https://database.ich.org/sites/default/files/M7_R1_Guideline.pdf}}, + note = {Accessed: 29-03-2021}, +} + +@misc{ECHA2017, + author ={European Chemicals Agency (ECHA)}, + title = {Guidance on Information Requirements and Chemical Safety Assessment, Chapter R.7a: Endpoint specific guidance}, + year = 2017, + note ={\url{https://echa.europa.eu/documents/10162/13632/information_requirements_r6_en.pdf}}, + note = {Accessed: 29-03-2021}, + isbn = {978-92-9495-970-6}, + doi = {10.2823/337352}}, +} @article{Rubiolo1992, author = {Rubiolo, P. and Pieters, L. and Calomme, M. and Bicchi, C. and Vlietinck, A. and Vanden Berghe, D.}, year = 1992, diff --git a/mutagenicity.md b/mutagenicity.md index 3911799..9c5f427 100644 --- a/mutagenicity.md +++ b/mutagenicity.md @@ -52,28 +52,46 @@ MolPrint2D and Chemistry Development Kit (CDK) descriptors. Crossvalidation accuracies of all investigated models ranged from 80-85% which is comparable with the interlaboratory variability of the *Salmonella* mutagenicity assay. Pyrrolizidine alkaloid predictions showed a clear distinction between chemical -groups, where Otonecines had the highest proportion of positive mutagenicity -predictions and Monoesters the lowest. +groups, where otonecines had the highest proportion of positive mutagenicity +predictions and monoesters the lowest. Introduction ============ -**TODO**: rationale for investigation - +The assessment of mutagenicity is an important part in the safety assessment of +chemical structures, because genomic changes may lead to cancer and germ +cells damage. The *Salmonella typhimurium* bacterial reverse mutation +test (Ames test) is capable to identify substances that cause mutations (e.g., +base-pair substitutions, frameshifts, insertions, deletions) and is generally +used as the first step in genotoxicity and carcinogenicity assessments. + +Computer based (*in silico*) mutagenicity predictions can be used in the early +screening of novel compounds (e.g. drug candidates), but they are also gaining +regulatory acceptance e.g. for the registration of industrial chemicals within +REACH (@ECHA2017) or the assessment of impurities in pharmaceuticals (ICH M7 +guideline, @ICH2017). + +*Salmonella* mutagenicity is at the moment the toxicological endpoint with the +largest amount of public data for almost 10000 structures, whereas datasets for +other endpoints contain typically only a few hundred compounds. The Ames test +itself is relatively reproducible with an interlaboratory variability of 80-85% +(@Benigni1988). + +This makes the development of mutagenicity models also interesting from a +computational chemistry and machine learning point of view. The relatively +large amount of public data reduces the probability of chance effects due to +small sample sizes and the reliability of the underlying assay reduces the risk +of overfitting experimental errors. + +Within this study we attempted + + - to generate a new public mutagenicity training dataset, by combining the most comprehensive public datasets + - to compare the performance of MolPrint2D (*MP2D*) fingerprints with Chemistry Development Kit (*CDK*) descriptors for mutagenicity predictions + - to compare the performance of global QSAR models (random forests (*RF*), support vector machines (*SVM*), logistic regression (*LR*), neural nets (*NN*)) with local models (`lazar`) -As case study we decided to apply these mutagenicity models to {{pa.nr}} -Pyrrolizidines alkaloids (PAs) in order to highlight potentials and problems -with the applicability of mutagenicity models for compounds with very limited -experimental data. +In order to highlight potentials and problems with the application of +mutagenicity models to compounds with limited experimental data we decided to +apply these mutagenicity models to {{pa.nr}} Pyrrolizidine alkaloids (PAs). Pyrrolizidine alkaloids (PAs) are characteristic metabolites of some plant families, mainly: *Asteraceae*, *Boraginaceae*, *Fabaceae* and *Orchidaceae* @@ -87,14 +105,8 @@ base and necic acid (@Hadi2021; @Allemang2018, @Louisse2019). However, due to limited availability of pure substances, only a limited number of PAs have been investigated with regards to their structure-specific mutagenicity. To overcome this bottleneck, the prediction of structure-specific mutagenic potential of -PAs with different machine learning models could provide further inside in the mechanisms. - -Summing up the main objectives of this study were - - - to generate a new mutagenicity training dataset, by combining the most comprehensive public datasets - - to compare the performance of MolPrint2D (*MP2D*) fingerprints with Chemistry Development Kit (*CDK*) descriptors - - to compare the performance of global QSAR models (random forests (*RF*), support vector machines (*SVM*), logistic regression (*LR*), neural nets (*NN*)) with local models (`lazar`) - - to apply these models for the prediction of pyrrolizidine alkaloid mutagenicity +PAs with different machine learning models could provide further inside in the +mechanisms. Materials and Methods ===================== @@ -585,12 +597,12 @@ models ({{pa.mp2d_svm.mut_perc}}-{{pa.mp2d_lazar_high_confidence.mut_perc}}%, @tbl:pa-summary, @fig:pa-groups).  Over all models, the mean value of mutagenic predicted PAs was highest for -Otonecines ({{pa.groups.Otonecine.mut_perc}}%, +otonecines ({{pa.groups.Otonecine.mut_perc}}%, {{pa.groups.Otonecine.mut}}/{{pa.groups.Otonecine.n_pred}}), -followed by Macrocyclic diesters ({{pa.groups.Macrocyclic_diester.mut_perc}}%, {{pa.groups.Macrocyclic_diester.mut}}/{{pa.groups.Macrocyclic_diester.n_pred}}), -Dehydropyrrolizidine ({{pa.groups.Dehydropyrrolizidine.mut_perc}}%, {{pa.groups.Dehydropyrrolizidine.mut}}/{{pa.groups.Dehydropyrrolizidine.n_pred}}), -Tertiary PAs ({{pa.groups.Tertiary_PA.mut_perc}}%, {{pa.groups.Tertiary_PA.mut}}/{{pa.groups.Tertiary_PA.n_pred}}) and -Retronecines ({{pa.groups.Retronecine.mut_perc}}%, {{pa.groups.Retronecine.mut}}/{{pa.groups.Retronecine.n_pred}}). +followed by macrocyclic diesters ({{pa.groups.Macrocyclic_diester.mut_perc}}%, {{pa.groups.Macrocyclic_diester.mut}}/{{pa.groups.Macrocyclic_diester.n_pred}}), +dehydropyrrolizidines ({{pa.groups.Dehydropyrrolizidine.mut_perc}}%, {{pa.groups.Dehydropyrrolizidine.mut}}/{{pa.groups.Dehydropyrrolizidine.n_pred}}), +tertiary PAs ({{pa.groups.Tertiary_PA.mut_perc}}%, {{pa.groups.Tertiary_PA.mut}}/{{pa.groups.Tertiary_PA.n_pred}}) and +retronecines ({{pa.groups.Retronecine.mut_perc}}%, {{pa.groups.Retronecine.mut}}/{{pa.groups.Retronecine.n_pred}}). When excluding the aforementioned three deviating models, the rank order stays the same, but the percentage of mutagenic PAs is higher. diff --git a/mutagenicity.pdf b/mutagenicity.pdf index 5a333b1..e46da4d 100644 Binary files a/mutagenicity.pdf and b/mutagenicity.pdf differ diff --git a/shell.nix b/shell.nix index 1ee567f..ab0ed26 100644 --- a/shell.nix +++ b/shell.nix @@ -1,4 +1,5 @@ -with import { }; +#with import { }; +with import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/d88cdc7bc1a7b3d5a50369bf4d2c7844e4868be2.tar.gz") {}; let R-packages = rWrapper.override { packages = with rPackages; [ ggplot2 Rtsne ]; }; gems = bundlerEnv { name = "mustache"; gemdir = ./.; }; -- cgit v1.2.3