summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-06-05 17:28:10 +0200
committerChristoph Helma <helma@in-silico.ch>2019-06-05 17:28:10 +0200
commit9a217185e791d6abbe46549cd4e87c1d1a643c05 (patch)
tree1968aebd8a38acd7784e58d478e0f6c3fb08ab76
parentfeb1f82356da50a1ebf63b1eda434c388ab009e1 (diff)
first manuscript version
-rw-r--r--paper/190501_Genotox-PA.docxbin0 -> 569964 bytes
-rw-r--r--paper/190501_Genotox-PA.md1325
-rw-r--r--paper/LICENSE339
-rw-r--r--paper/Makefile102
-rw-r--r--paper/README.md269
-rw-r--r--paper/bibliography.bib366
-rw-r--r--paper/example/article.md136
-rw-r--r--paper/example/bibliography.bib13
-rw-r--r--paper/lua-filters/.travis.yml55
-rw-r--r--paper/lua-filters/LICENSE21
-rw-r--r--paper/lua-filters/Makefile5
-rw-r--r--paper/lua-filters/README.md21
-rw-r--r--paper/lua-filters/abstract-to-meta/Makefile8
-rw-r--r--paper/lua-filters/abstract-to-meta/README.md37
-rw-r--r--paper/lua-filters/abstract-to-meta/abstract-to-meta.lua23
-rw-r--r--paper/lua-filters/abstract-to-meta/expected.md19
-rw-r--r--paper/lua-filters/abstract-to-meta/sample.md17
-rw-r--r--paper/lua-filters/author-info-blocks/Makefile8
-rw-r--r--paper/lua-filters/author-info-blocks/README.md59
-rw-r--r--paper/lua-filters/author-info-blocks/author-info-blocks.lua176
-rw-r--r--paper/lua-filters/author-info-blocks/document-screenshot.jpgbin0 -> 14584 bytes
-rw-r--r--paper/lua-filters/author-info-blocks/expected.native5
-rw-r--r--paper/lua-filters/author-info-blocks/sample.md28
-rw-r--r--paper/lua-filters/bibexport/Makefile5
-rw-r--r--paper/lua-filters/bibexport/README.md31
-rw-r--r--paper/lua-filters/bibexport/bibexport.lua82
-rw-r--r--paper/lua-filters/bibexport/coffee.bib48
-rw-r--r--paper/lua-filters/bibexport/expected.bibexport.aux3
-rw-r--r--paper/lua-filters/bibexport/expected.bibexport.bib27
-rw-r--r--paper/lua-filters/bibexport/sample.bib13
-rw-r--r--paper/lua-filters/bibexport/sample.md18
-rw-r--r--paper/lua-filters/cito/Makefile6
-rw-r--r--paper/lua-filters/cito/README.md76
-rw-r--r--paper/lua-filters/cito/cito.lua138
-rw-r--r--paper/lua-filters/cito/expected.md19
-rw-r--r--paper/lua-filters/cito/sample.bib24
-rw-r--r--paper/lua-filters/cito/sample.md10
-rw-r--r--paper/lua-filters/diagram-generator/.gitignore2
-rw-r--r--paper/lua-filters/diagram-generator/Makefile13
-rw-r--r--paper/lua-filters/diagram-generator/README.md252
-rw-r--r--paper/lua-filters/diagram-generator/diagram-generator.lua295
-rw-r--r--paper/lua-filters/diagram-generator/sample.md244
-rw-r--r--paper/lua-filters/minted/Makefile65
-rw-r--r--paper/lua-filters/minted/README.md316
-rwxr-xr-xpaper/lua-filters/minted/background_color.py76
-rw-r--r--paper/lua-filters/minted/minted.lua456
-rwxr-xr-xpaper/lua-filters/minted/run_minted_tests.py522
-rw-r--r--paper/lua-filters/minted/sample.md135
-rw-r--r--paper/lua-filters/multiple-bibliographies/Makefile6
-rw-r--r--paper/lua-filters/multiple-bibliographies/README.md33
-rw-r--r--paper/lua-filters/multiple-bibliographies/expected.native14
-rw-r--r--paper/lua-filters/multiple-bibliographies/multiple-bibliographies.lua110
-rw-r--r--paper/lua-filters/multiple-bibliographies/primary.bib10
-rw-r--r--paper/lua-filters/multiple-bibliographies/sample.md17
-rw-r--r--paper/lua-filters/multiple-bibliographies/secondary.bib10
-rw-r--r--paper/lua-filters/pagebreak/Makefile4
-rw-r--r--paper/lua-filters/pagebreak/README.md68
-rw-r--r--paper/lua-filters/pagebreak/expected.html6
-rw-r--r--paper/lua-filters/pagebreak/pagebreak.lua97
-rw-r--r--paper/lua-filters/pagebreak/sample.md14
-rw-r--r--paper/lua-filters/plantuml/Makefile3
-rw-r--r--paper/lua-filters/plantuml/output.html45
-rw-r--r--paper/lua-filters/plantuml/plantuml.lua56
-rw-r--r--paper/lua-filters/plantuml/readme.md30
-rwxr-xr-xpaper/lua-filters/runtests.sh16
-rw-r--r--paper/lua-filters/scholarly-metadata/Makefile8
-rw-r--r--paper/lua-filters/scholarly-metadata/README.md91
-rw-r--r--paper/lua-filters/scholarly-metadata/expected.md41
-rw-r--r--paper/lua-filters/scholarly-metadata/sample.md30
-rw-r--r--paper/lua-filters/scholarly-metadata/scholarly-metadata.lua180
-rw-r--r--paper/lua-filters/scrlttr2/Makefile9
-rw-r--r--paper/lua-filters/scrlttr2/README.md60
-rw-r--r--paper/lua-filters/scrlttr2/expected-strings.sh31
-rw-r--r--paper/lua-filters/scrlttr2/sample.md16
-rw-r--r--paper/lua-filters/scrlttr2/scrlttr2.lua161
-rw-r--r--paper/lua-filters/section-refs/Makefile26
-rw-r--r--paper/lua-filters/section-refs/README.md19
-rw-r--r--paper/lua-filters/section-refs/bibliography.bib70
-rw-r--r--paper/lua-filters/section-refs/expected_default.native25
-rw-r--r--paper/lua-filters/section-refs/expected_no_citeproc.native7
-rw-r--r--paper/lua-filters/section-refs/expected_refs_name.native27
-rw-r--r--paper/lua-filters/section-refs/expected_section_level.native31
-rw-r--r--paper/lua-filters/section-refs/sample.md18
-rw-r--r--paper/lua-filters/section-refs/section-refs.lua138
-rw-r--r--paper/lua-filters/short-captions/Makefile18
-rw-r--r--paper/lua-filters/short-captions/README.md52
-rw-r--r--paper/lua-filters/short-captions/expected-1.tex67
-rw-r--r--paper/lua-filters/short-captions/expected-2.tex67
-rw-r--r--paper/lua-filters/short-captions/fig.pdfbin0 -> 56971 bytes
-rw-r--r--paper/lua-filters/short-captions/short-captions.lua37
-rw-r--r--paper/lua-filters/spellcheck/Makefile2
-rw-r--r--paper/lua-filters/spellcheck/README.md42
-rw-r--r--paper/lua-filters/spellcheck/expected.txt2
-rw-r--r--paper/lua-filters/spellcheck/sample.md15
-rw-r--r--paper/lua-filters/spellcheck/spellcheck.lua70
-rw-r--r--paper/lua-filters/table-short-captions/Makefile24
-rw-r--r--paper/lua-filters/table-short-captions/README.md66
-rw-r--r--paper/lua-filters/table-short-captions/expected-sample.native68
-rw-r--r--paper/lua-filters/table-short-captions/expected-sample.tex291
-rw-r--r--paper/lua-filters/table-short-captions/sample.md84
-rw-r--r--paper/lua-filters/table-short-captions/table-short-captions.lua160
-rw-r--r--paper/lua-filters/track-changes/.gitignore2
-rw-r--r--paper/lua-filters/track-changes/Makefile26
-rw-r--r--paper/lua-filters/track-changes/README.md18
-rw-r--r--paper/lua-filters/track-changes/TODO.md18
-rw-r--r--paper/lua-filters/track-changes/expected_accept.markdown29
-rw-r--r--paper/lua-filters/track-changes/expected_draft.html41
-rw-r--r--paper/lua-filters/track-changes/expected_draft.tex159
-rw-r--r--paper/lua-filters/track-changes/expected_reject.markdown29
-rw-r--r--paper/lua-filters/track-changes/sample.md43
-rw-r--r--paper/lua-filters/track-changes/test-track-changes.sh25
-rw-r--r--paper/lua-filters/track-changes/track-changes.lua247
-rw-r--r--paper/lua-filters/wordcount/Makefile2
-rw-r--r--paper/lua-filters/wordcount/README.md11
-rw-r--r--paper/lua-filters/wordcount/expected.txt1
-rw-r--r--paper/lua-filters/wordcount/sample.md12
-rw-r--r--paper/lua-filters/wordcount/wordcount.lua29
-rw-r--r--paper/media/image1.pngbin0 -> 150987 bytes
-rw-r--r--paper/media/image2.pngbin0 -> 50793 bytes
-rw-r--r--paper/media/image3.pngbin0 -> 51318 bytes
-rw-r--r--paper/media/image4.pngbin0 -> 48737 bytes
-rw-r--r--paper/media/image5.pngbin0 -> 48732 bytes
-rw-r--r--paper/media/image6.pngbin0 -> 48818 bytes
-rw-r--r--paper/media/image7.pngbin0 -> 36233 bytes
-rw-r--r--paper/media/image8.pngbin0 -> 18746 bytes
-rw-r--r--paper/mutagenicity.md532
-rw-r--r--paper/outfile.docxbin0 -> 204123 bytes
-rw-r--r--paper/outfile.enriched.json1
-rw-r--r--paper/outfile.epubbin0 -> 199245 bytes
-rw-r--r--paper/outfile.html822
-rw-r--r--paper/outfile.latex779
-rw-r--r--paper/outfile.odtbin0 -> 204439 bytes
-rw-r--r--paper/outfile.pdfbin0 -> 241723 bytes
-rw-r--r--paper/pandoc-options.inc.mk47
-rw-r--r--paper/scholar-filters/dkjson.lua714
-rw-r--r--paper/scholar-filters/json-ld.lua233
-rw-r--r--paper/scholar-filters/template-helper.lua43
-rw-r--r--paper/templates/images/arrow-down.pngbin0 -> 216 bytes
-rw-r--r--paper/templates/images/octocat-small.pngbin0 -> 357 bytes
-rw-r--r--paper/templates/images/pdf.pngbin0 -> 1029 bytes
-rw-r--r--paper/templates/pandoc-scholar.html119
-rw-r--r--paper/templates/pandoc-scholar.latex309
-rw-r--r--paper/templates/styles/pandoc-scholar.css478
-rw-r--r--paper/writers/jsonld.lua14
144 files changed, 13283 insertions, 0 deletions
diff --git a/paper/190501_Genotox-PA.docx b/paper/190501_Genotox-PA.docx
new file mode 100644
index 0000000..bb46cfb
--- /dev/null
+++ b/paper/190501_Genotox-PA.docx
Binary files differ
diff --git a/paper/190501_Genotox-PA.md b/paper/190501_Genotox-PA.md
new file mode 100644
index 0000000..acf64e9
--- /dev/null
+++ b/paper/190501_Genotox-PA.md
@@ -0,0 +1,1325 @@
+Prediction of the mutagenic potential of different pyrrolizidine
+alkaloids using LAZAR, Random Forest, Support Vector Machines, and Deep
+Learning
+
+Authors
+
+Verena Schöning, Christoph Helma, Philipp Boss, Jürgen Drewe
+
+**Manuscript in preparation.**
+
+Corresponding author:
+
+Prof. Dr. Jürgen Drewe, MSc
+
+Abstract
+========
+
+Pyrrolizidine alkaloids (PAs) are secondary plant metabolites of some
+plant families, which protect against predators and generally considered
+as genotoxic and mutagenic. This mutagenicity is also the point of
+concern in regulatory risk assessment of this substance group [EFSA
+2011](#_ENREF_36)[EMA 2014](#_ENREF_38)[2016](#_ENREF_39)(; ; ). Several
+investigations already showed that the mutagenic potential of PAs is
+different, and largely depends on the structure.
+
+Since only very few of over 600 known PAs are available for *in vitro*
+or *in vivo* experiments, the mutagenicity of PAs in this study was
+estimated using four different machine learning techniques LAZAR and
+Deep Learning, Random Forest and Support Vector Machines. However, all
+models were not optimal for predicting the genotoxic potential of PAs
+either due to problems with the applicability domain or due to low
+performance. Therefore, no estimation regarding the genotoxic potential
+of single PAs could be made. An analysis of the genotoxic potential of
+different structural groups, showed promising results. For necine base
+and necic acid, the results fitted well with literature for three
+models. However, the prediction of the toxic principle of PAs,
+dehydropyrrolizidine was only within expectation in one model
+(TensorFlow-generated Deep Learning model), but not in the other four
+models. This study shows convincingly the need to critically review and
+assess the predictions obtained from machine learning approaches by
+internal cross-validation, but also by external validation through
+comparison with literature.
+
+Introduction
+============
+
+Pyrrolizidine alkaloids (PAs) are secondary plant ingredients found in
+many plant species as protection against predators [Hartmann & Witte
+1995](#_ENREF_59)[Langel et al. 2011](#_ENREF_76)(; ). PAs are ester
+alkaloids, which are composed of a necine base (two fused five-membered
+rings joined by a nitrogen atom) and one or two necic acid (carboxylic
+ester arms). The necine base can have different structures and thereby
+divides PAs into several structural groups, e.g. otonecine, platynecine,
+and retronecine. The structural groups of the necic acid are macrocyclic
+diester, open-ring diester and monoester [Langel et al.
+2011](#_ENREF_76)().
+
+PA are mainly metabolised in the liver, which is at the same time the
+main target organ of toxicity [Bull & Dick 1959](#_ENREF_17)[Bull et al.
+1958](#_ENREF_18)[Butler et al. 1970](#_ENREF_20)[DeLeve et al.
+1996](#_ENREF_33)[Jago 1971](#_ENREF_65)[Li et al.
+2011](#_ENREF_78)[Neumann et al. 2015](#_ENREF_99)(; ; ; ; ; ; ). There
+are three principal metabolic pathways for 1,2-unsaturated PAs [Chen et
+al. 2010](#_ENREF_26)(): (i) Detoxification by hydrolysis: the ester
+bond on positions C7 and C9 are hydrolysed by non-specific esterases to
+release necine base and necic acid, which are then subjected to further
+phase II-conjugation and excretion. (ii) Detoxification by *N*-oxidation
+of the necine base (only possible for retronecine-type PAs): the
+nitrogen is oxidised to form a PA *N*-oxides, which can be conjugated by
+phase II enzymes e.g. glutathione and then excreted. PA *N*-oxides can
+be converted back into the corresponding parent PA [Wang et al.
+2005](#_ENREF_134)(). (iii) Metabolic activation or toxification: PAs
+are metabolic activated/ toxified by oxidation (for retronecine-type
+PAs) or oxidative *N*-demethylation (for otonecine-type PAs [Lin
+1998](#_ENREF_82)()). This pathway is mainly catalysed by cytochrome
+P450 isoforms CYP2B and 3A [Ruan et al. 2014b](#_ENREF_115)(), and
+results in the formation of dehydropyrrolizidines (DHP, also known as
+pyrrolic ester or reactive pyrroles). DHPs are highly reactive and cause
+damage in the cells where they are formed, usually hepatocytes. However,
+they can also pass from the hepatocytes into the adjacent sinusoids and
+damage the endothelial lining cells [Gao et al. 2015](#_ENREF_48)()
+predominantly by reaction with protein, lipids and DNA. There is even
+evidence, that conjugation of DHP to glutathione, which would generally
+be considered a detoxification step, could result in reactive
+metabolites, which might also lead to DNA adduct formation [Xia et al.
+2015](#_ENREF_138)(). Due to the ability to form DNA adducts, DNA
+crosslinks and DNA breaks 1,2-unsaturated PAs are generally considered
+genotoxic and carcinogenic [Chen et al. 2010](#_ENREF_26)[EFSA
+2011](#_ENREF_36)[Fu et al. 2004](#_ENREF_45)[Li et al.
+2011](#_ENREF_78)[Takanashi et al. 1980](#_ENREF_126)[Yan et al.
+2008](#_ENREF_140)[Zhao et al. 2012](#_ENREF_148)(; ; ; ; ; ; ). Still,
+there is no evidence yet that PAs are carcinogenic in humans [ANZFA
+2001](#_ENREF_4)[EMA 2016](#_ENREF_39)(; ). One general limitation of
+studies with PAs is the number of different PAs investigated. Around 30
+PAs are currently commercially available, therefore all studies focus on
+these PAs. This is also true for *in vitro* and *in vivo* tests on
+mutagenicity and genotoxicity. To gain a wider perspective, in this
+study over 600 different PAs were assessed on their mutagenic potential
+using four different machine learning techniques.
+
+Materials and Methods
+=====================
+
+Training dataset
+----------------
+
+For all methods, the same validated training dataset was used. The
+training dataset was compiled from the following sources:
+
+- Kazius/Bursi Dataset (4337 compounds, [Kazius et al.
+ 2005](#_ENREF_71)()):
+
+> <http://cheminformatics.org/datasets/bursi/cas_4337.zip>
+
+- Hansen Dataset (6513 compounds, [Hansen et al. 2009](#_ENREF_57)()):
+
+> <http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv>
+
+- EFSA Dataset (695 compounds, [EFSA 2011](#_ENREF_36)()):
+
+> <https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls>
+
+Mutagenicity classifications from Kazius and Hansen datasets were used
+without further processing. To achieve consistency between these
+datasets, EFSA compounds were classified as mutagenic, if at least one
+positive result was found for TA98 or T100 Salmonella strains.
+
+Dataset merges were based on unique SMILES (*Simplified Molecular Input
+Line Entry Specification*) strings of the compound structures.
+Duplicated experimental data with the same outcome was merged into a
+single value, because it is likely that it originated from the same
+experiment. Contradictory results were kept as multiple measurements in
+the database. The combined training dataset contains 8281 unique
+structures.
+
+Source code for all data download, extraction and merge operations is
+publicly available from the git repository
+<https://git.in-silico.ch/pyrrolizidine> under a GPL3 License.
+
+Testing dataset
+---------------
+
+The testing dataset consisted of 602 different PAs. The compilation of
+the PA dataset is described in detail in [Schöning et al.
+(2017)](#_ENREF_119). The PAs were assigned to groups according to
+structural features of the necine base and necic acid.
+
+For the necine base, following groups were assigned:
+
+- Retronecine-type (1,2-unstaturated necine base)
+
+- Otonecine-type (1,2-unstaturated necine base)
+
+- Platynecine-type (1,2-saturated necine base)
+
+For the modification of necine base, following groups were assigned:
+
+- *N*-oxide-type
+
+- Tertiary-type (PAs which were neither from the *N*-oxide- nor
+ > DHP-type)
+
+- DHP-type (dehydropyrrolizidine, pyrrolic ester)
+
+For the necic acid, following groups were assigned:
+
+- Monoester-type
+
+- Open-ring diester-type
+
+- Macrocyclic diester-type
+
+For the Random Forest (RF), Support Vector Machines (SVM), and Deep
+Learning (DL) models, molecular descriptors of the PAs were calculated
+using the program PaDEL-Descriptors (version 2.21) [Yap
+2011](#_ENREF_142)[2014](#_ENREF_143)(; ). From these descriptors were
+chosen, which were actually used for the generation of the DL model.
+
+LAZAR
+-----
+
+LAZAR (*lazy structure activity relationships*) is a modular framework
+for read-across model development and validation. It follows the
+following basic workflow: For a given chemical structure LAZAR:
+
+- searches in a database for similar structures (neighbours) with
+ experimental data,
+
+- builds a local QSAR model with these neighbours and
+
+- uses this model to predict the unknown activity of the query
+ compound.
+
+This procedure resembles an automated version of read across predictions
+in toxicology, in machine learning terms it would be classified as a
+k-nearest-neighbour algorithm.
+
+Apart from this basic workflow, LAZAR is completely modular and allows
+the researcher to use any algorithm for similarity searches and local
+QSAR (*Quantitative structure--activity relationship*) modelling.
+Algorithms used within this study are described in the following
+sections.
+
+### Neighbour identification
+
+Similarity calculations were based on MolPrint2D fingerprints [Bender et
+al. 2004](#_ENREF_8)() from the OpenBabel cheminformatics library
+[O\'Boyle et al. 2011](#_ENREF_104)(). The MolPrint2D fingerprint uses
+atom environments as molecular representation, which resembles basically
+the chemical concept of functional groups. For each atom in a molecule,
+it represents the chemical environment using the atom types of connected
+atoms.
+
+MolPrint2D fingerprints are generated dynamically from chemical
+structures and do not rely on predefined lists of fragments (such as
+OpenBabel FP3, FP4 or MACCs fingerprints or lists of
+toxicophores/toxicophobes). This has the advantage that they may capture
+substructures of toxicological relevance that are not included in other
+fingerprints.
+
+From MolPrint2D fingerprints a feature vector with all atom environments
+of a compound can be constructed that can be used to calculate chemical
+similarities.
+
+The chemical similarity between two compounds a and b is expressed as
+the proportion between atom environments common in both structures A ∩ B
+and the total number of atom environments A U B (Jaccard/Tanimoto
+index).
+
+$$sim = \frac{\left| A\ \cap B \right|}{\left| A\ \cup B \right|}$$
+
+Threshold selection is a trade-off between prediction accuracy (high
+threshold) and the number of predictable compounds (low threshold). As
+it is in many practical cases desirable to make predictions even in the
+absence of closely related neighbours, we follow a tiered approach:
+
+- First a similarity threshold of 0.5 is used to collect neighbours,
+ to create a local QSAR model and to make a prediction for the query
+ compound.
+
+- If any of these steps fails, the procedure is repeated with a
+ similarity threshold of 0.2 and the prediction is flagged with a
+ warning that it might be out of the applicability domain of the
+ training data.
+
+- Similarity thresholds of 0.5 and 0.2 are the default values chosen
+ > by the software developers and remained unchanged during the
+ > course of these experiments.
+
+Compounds with the same structure as the query structure are
+automatically eliminated from neighbours to obtain unbiased predictions
+in the presence of duplicates.
+
+### Local QSAR models and predictions
+
+Only similar compounds (neighbours) above the threshold are used for
+local QSAR models. In this investigation, we are using a weighted
+majority vote from the neighbour's experimental data for mutagenicity
+classifications. Probabilities for both classes
+(mutagenic/non-mutagenic) are calculated according to the following
+formula and the class with the higher probability is used as prediction
+outcome.
+
+$$p_{c} = \ \frac{\sum_{}^{}\text{sim}_{n,c}}{\sum_{}^{}\text{sim}_{n}}$$
+
+$p_{c}$ Probability of class c (e.g. mutagenic or non-mutagenic)\
+$\sum_{}^{}\text{sim}_{n,c}$ Sum of similarities of neighbours with
+class c\
+$\sum_{}^{}\text{sim}_{n}$ Sum of all neighbours
+
+### Applicability domain
+
+The applicability domain (AD) of LAZAR models is determined by the
+structural diversity of the training data. If no similar compounds are
+found in the training data no predictions will be generated. Warnings
+are issued if the similarity threshold had to be lowered from 0.5 to 0.2
+in order to enable predictions. Predictions without warnings can be
+considered as close to the applicability domain and predictions with
+warnings as more distant from the applicability domain. Quantitative
+applicability domain information can be obtained from the similarities
+of individual neighbours.
+
+### Availability
+
+- LAZAR experiments for this manuscript:
+ [https://git.in-silico.ch/pyrrolizidine](https://deref-gmx.net/mail/client/Yn0laI8dUvs/dereferrer/?redirectUrl=https%3A%2F%2Fgit.in-silico.ch%2Fpyrrolizidine)
+ (source code, GPL3)
+
+- LAZAR framework:
+ [https://git.in-silico.ch/lazar](https://deref-gmx.net/mail/client/v26UgZbKEpE/dereferrer/?redirectUrl=https%3A%2F%2Fgit.in-silico.ch%2Flazar)
+ (source code, GPL3)
+
+- LAZAR GUI:
+ [https://git.in-silico.ch/lazar-gui](https://deref-gmx.net/mail/client/QstEPrpbcqQ/dereferrer/?redirectUrl=https%3A%2F%2Fgit.in-silico.ch%2Flazar-gui)
+ (source code, GPL3)
+
+- Public web interface:
+ [https://lazar.in-silico.ch](https://deref-gmx.net/mail/client/Gln3hLem0DY/dereferrer/?redirectUrl=https%3A%2F%2Flazar.in-silico.ch)
+
+Random Forest, Support Vector Machines, and Deep Learning in R-project
+----------------------------------------------------------------------
+
+In comparison to LAZAR, three other models (Random Forest (RF), Support
+Vector Machines (SVM), and Deep Learning (DL)) were evaluated.
+
+For the generation of these models, molecular 1D and 2D descriptors of
+the training dataset were calculated using PaDEL-Descriptors (version
+2.21) [Yap 2011](#_ENREF_142)[2014](#_ENREF_143)(; ).
+
+As the training dataset contained over 8280 instances, it was decided to
+delete instances with missing values during data pre-processing.
+Furthermore, substances with equivocal outcome were removed. The final
+training dataset contained 8080 instances with known mutagenic
+potential. The RF, SVM, and DL models were generated using the R
+software (R-project for Statistical Computing,
+<https://www.r-project.org/>*;* version 3.3.1), specific R packages used
+are identified for each step in the description below. During feature
+selection, descriptor with near zero variance were removed using
+'*NearZeroVar*'-function (package 'caret'). If the percentage of the
+most common value was more than 90% or when the frequency ratio of the
+most common value to the second most common value was greater than 95:5
+(e.g. 95 instances of the most common value and only 5 or less instances
+of the second most common value), a descriptor was classified as having
+a near zero variance. After that, highly correlated descriptors were
+removed using the '*findCorrelation*'-function (package 'caret') with a
+cut-off of 0.9. This resulted in a training dataset with 516
+descriptors. These descriptors were scaled to be in the range between 0
+and 1 using the '*preProcess*'-function (package 'caret'). The scaling
+routine was saved in order to apply the same scaling on the testing
+dataset. As these three steps did not consider the outcome, it was
+decided that they do not need to be included in the cross-validation of
+the model. To further reduce the number of features, a LASSO (*least
+absolute shrinkage and selection operator*) regression was performed
+using the '*glmnet*'-function (package '*glmnet*'). The reduced dataset
+was used for the generation of the pre-trained models.
+
+For the RF model, the '*randomForest*'-function (package
+'*randomForest*') was used. A forest with 1000 trees with maximal
+terminal nodes of 200 was grown for the prediction.
+
+The '*svm*'-function (package 'e1071') with a *radial basis function
+kernel* was used for the SVM model.
+
+The DL model was generated using the '*h2o.deeplearning*'-function
+(package '*h2o*'). The DL contained four hidden layer with 70, 50, 50,
+and 10 neurons, respectively. Other hyperparameter were set as follows:
+l1=1.0E-7, l2=1.0E-11, epsilon = 1.0E-10, rho = 0.8, and quantile\_alpha
+= 0.5. For all other hyperparameter, the default values were used.
+Weights and biases were in a first step determined with an unsupervised
+DL model. These values were then used for the actual, supervised DL
+model.
+
+To validate these models, an internal cross-validation approach was
+chosen. The training dataset was randomly split in training data, which
+contained 95% of the data, and validation data, which contain 5% of the
+data. A feature selection with LASSO on the training data was performed,
+reducing the number of descriptors to approximately 100. This step was
+repeated five times. Based on each of the five different training data,
+the predictive models were trained and the performance tested with the
+validation data. This step was repeated 10 times. Furthermore, a
+y-randomisation using the RF model was performed. During
+y-randomisation, the outcome (y-variable) is randomly permuted. The
+theory is that after randomisation of the outcome, the model should not
+be able to correlate the outcome to the properties (descriptor values)
+of the substances. The performance of the model should therefore
+indicate a by change prediction with an accuracy of about 50%. If this
+is true, it can be concluded that correlation between actual outcome and
+properties of the substances is real and not by chance [Rücker et al.
+2007](#_ENREF_117)().
+
+![](./media/media/image1.png){width="6.26875in"
+height="5.486111111111111in"}
+
+Figure 1: Flowchart of the generation and validation of the models
+generated in R-project
+
+Deep Learning in TensorFlow
+---------------------------
+
+Alternatively, a DL model was established with Python-based TensorFlow
+program (<https://www.tensorflow.org/>) using the high-level API Keras
+(<https://www.tensorflow.org/guide/keras>) to build the models.
+
+Data pre-processing was done by rank transformation using the
+'*QuantileTransformer*' procedure. A sequential model has been used.
+Four layers have been used: input layer, two hidden layers (with 12, 8
+and 8 nodes, respectively) and one output layer. For the output layer, a
+sigmoidal activation function and for all other layers the ReLU
+('*Rectified Linear Unit*') activation function was used. Additionally,
+a L^2^-penalty of 0.001 was used for the input layer. For training of
+the model, the ADAM algorithm was used to minimise the cross-entropy
+loss using the default parameters of Keras. Training was performed for
+100 epochs with a batch size of 64. The model was implemented with
+Python 3.6 and Keras. For training of the model, a 6-fold
+cross-validation was used. Accuracy was estimated by ROC-AUC and
+confusion matrix.
+
+Results
+=======
+
+LAZAR
+-----
+
+For 46 PAs, no prediction could be made. 26 PAs had no neighbours and 20
+PAs had only one neighbour. For additional 396 PAs, the similarity
+threshold had to be reduced from 0.5 to 0.2 to obtain enough neighbours
+for a prediction. This means that these substances might not be within
+the applicability domain (AD). Therefore, only 160 of 602 PAs were well
+within the stricter AD with the similarity threshold of 0.5 and 556 PAs
+in the AD with the similarity threshold of 0.2.
+
+![](./media/media/image2.png){width="5.905511811023622in"
+height="3.868241469816273in"}
+
+Figure 2: Genotoxic potential of the different PA groups as predicted by
+LAZAR, using the **similarity threshold** **of 0.5**.
+
+*Genotoxic*: percentage number of compounds per group, which were
+predicted to be genotoxic.\
+*Not genotoxic*: percentage number of compounds per group, which were
+predicted to be not genotoxic\
+*Outside AD*: percentage number of compounds per group, which were
+outside the applicability domain (AD).
+
+![](./media/media/image3.png){width="5.905511811023622in"
+height="3.868241469816273in"}
+
+Figure 3: Genotoxic potential of the different PA groups as predicted by
+LAZAR, using the **similarity threshold of 0.2**
+
+*Genotoxic*: percentage number of compounds per group, which were
+predicted to be genotoxic.\
+*Not genotoxic*: percentage number of compounds per group, which were
+predicted to be not genotoxic\
+*Outside AD*: percentage number of compounds per group, which were
+outside the applicability domain (AD).
+
+Interestingly, using both similarity thresholds (e.g. 0.2 and 0.5), the
+majority of PAs in all groups except otonecine, were predicted to be not
+genotoxic.
+
+The following rank order for genotoxicity probability can be deduced
+from the results of both similarity thresholds:
+
+- Necine base: platynecine ≤ retronecine \<\< otonecine
+
+- Necic acid: monoester \< diester \< macrocyclic diester
+
+- Modification of necine base: *N*-oxide \< DHP \< tertiary PA
+
+Random Forest, Support Vector Machines, and Deep Learning
+---------------------------------------------------------
+
+Applicability domain
+
+The AD of the training dataset and the PA dataset was evaluated using
+the Jaccard distance. A Jaccard distance of '0' indicates that the
+substances are similar, whereas a value of '1' shows that the substances
+are different. The Jaccard distance was below 0.2 for all PAs relative
+to the training dataset. Therefore, PA dataset is within the AD of the
+training dataset and the models can be used to predict the genotoxic
+potential of the PA dataset.
+
+y-randomisation
+
+After y-randomisation of the outcome, the accuracy and CCR are around
+50%, indicating a chance in the distribution of the results. This shows,
+that the outcome is actually related to the predictors and not by
+chance.
+
+Random Forest
+
+The validation showed that the RF model has an accuracy of 64%, a
+sensitivity of 66% and a specificity of 63%. The confusion matrix of the
+model, calculated for 8080 instances, is provided in Table 1.
+
+Table 1: Confusion matrix of the RF model
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2274 1163 3437
+ ***TN*** 1736 2907 4643
+ ***Total*** 4010 4070 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+In general, the majority of PAs were considered to be not genotoxic by
+the RF model (Figure 4).
+
+![](./media/media/image4.png){width="6.063194444444444in"
+height="3.8756944444444446in"}
+
+Figure 4: Genotoxic potential of the different PA groups as predicted by
+**RF model**
+
+*Genotoxic*: percentage number of compounds per group, which was
+predicted to be genotoxic.\
+*Not genotoxic*: percentage number of compounds per group, which was
+predicted to be not genotoxic.
+
+From the results, the following rank orders of genotoxic potential could
+be deduced:
+
+- Necine base: platynecine \< retronecine \< otonecine
+
+- Necic acid: monoester (= 0%) \< diester \< macrocyclic diester
+
+- Modification of necine base: *N*-oxide = dehydropyrrolizidine (0%)
+ \< tertiary PA
+
+Support Vector Machines
+
+The validation showed that the SVM model has an accuracy of 62%, a
+sensitivity of 65% and a specificity of 60%. The confusion matrix of SVM
+model, calculated for 8080 instances, is provided in Table 2.
+
+Table 2: Confusion matrix of the SVM model
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2057 1107 3164
+ ***TN*** 1953 2963 4916
+ ***Total*** 4010 4070 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+In the SVM model, also the majority of PAs were considered to be not
+genotoxic (Figure 5).
+
+![](./media/media/image5.png){width="6.063194444444444in"
+height="3.9694444444444446in"}
+
+Figure 5: Genotoxic potential of the different PA groups as predicted by
+**SVM model**
+
+*Genotoxic*: percentage number of compounds per group, which was
+predicted to be genotoxic.\
+*Not genotoxic*: percentage number of compounds per group, which was
+predicted to be not genotoxic
+
+From the results, the following rank orders of genotoxic potential could
+be deduced:
+
+- Necine base: otonecine \< platynecine = retronecine
+
+- Necic acid: macrocyclic diester \< monoester = diester
+
+- Modification of necine base: dehydropyrrolizidine \< tertiary
+ PA \< *N*-oxide 
+
+Deep Learning (R-project)
+
+The validation showed that the DL model generated in R has an accuracy
+of 59%, a sensitivity of 89% and a specificity of 30%. The confusion
+matrix of the model, normalised to 8080 instances, is provided in Table
+3.
+
+Table 3: Confusion matrix of the DL model (R-project)
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 3575 435 4010
+ ***TN*** 2853 1217 4070
+ ***Total*** 6428 1652 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+In contrast, the majority of PAs were considered to be genotoxic by the
+DL model in R (Figure 6).
+
+![](./media/media/image6.png){width="6.063194444444444in"
+height="3.982638888888889in"}
+
+Figure 6: Genotoxic potential of the different PA groups as predicted by
+**DL model (R-project)**
+
+*Genotoxic*: percentage number of compounds per group, which was
+predicted to be genotoxic.\
+*Not genotoxic*: percentage number of compounds per group, which was
+predicted to be not genotoxic
+
+From the results, the following rank orders of genotoxic potential could
+be proposed:
+
+- Necine base: platynecine \< retronecine \< otonecine
+
+- Necic acid: monoester \< diester \< macrocyclic diester
+
+- Modification of necine base: tertiary PA = dehydropyrrolizidine \<
+ *N*-oxide.
+
+DL model (TensorFlow)
+
+The validation showed that the DL model generated in TensorFlow has an
+accuracy of 68%, a sensitivity of 70% and a specificity of 46%. The
+confusion matrix of the model, normalised to 8080 instances, is provided
+in Table 4.
+
+Table 4: Confusion matrix of the DL model (TensorFlow)
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2851 1227 4078
+ ***TN*** 1825 2177 4002
+ ***Total*** 4676 3404 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+The ROC curves from the 6-fold validation are shown in Figure 7.
+
+![C:\\Users\\JDrewe\\AppData\\Local\\Microsoft\\Windows\\INetCache\\Content.MSO\\7CFE5F13.tmp](./media/media/image7.png){width="3.825in"
+height="2.7327045056867894in"}
+
+Figure 7: Six-fold cross-validation of TensorFlow DL model show an
+average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68%.
+
+In contrast to the DL generated in R, the DL model generated in
+TensorFlow predicted the majority of PAs as not genotoxic.
+
+![C:\\Users\\JDrewe\\AppData\\Local\\Microsoft\\Windows\\INetCache\\Content.MSO\\4F678848.tmp](./media/media/image8.png){width="6.26875in"
+height="3.6993055555555556in"}
+
+Figure 8: Genotoxic potential of the different PA groups as predicted by
+**DL model (TensorFlow)**
+
+*Genotoxic*: percentage number of compounds per group, which was
+predicted to be genotoxic.\
+*Not genotoxic*: percentage number of compounds per group, which was
+predicted to be not genotoxic
+
+The following rank orders of genotoxic potential could be proposed based
+on the results:
+
+- Necine base: platynecine \< otonecine \< retronecine 
+
+- Necic acid: monoester \< diester \< macrocyclic diester
+
+- Modification of necine base: tertiary PA \< *N*-oxide \<\<
+ dehydropyrrolizidine.
+
+In summary, the validation results of the four methods are presented in
+the following table.
+
+Table 5 Results of the cross-validation of the four models and after
+y-randomisation
+
+ ----------------------------------------------------------------------
+ Accuracy CCR Sensitivity Specificity
+ ----------------------- ---------- ------- ------------- -------------
+ RF model 64.1% 64.4% 66.2% 62.6%
+
+ SVM model 62.1% 62.6% 65.0% 60.3%
+
+ DL model\ 59.3% 59.5% 89.2% 29.9%
+ (R-project)
+
+ DL model (TensorFlow) 68% 62.2% 69.9% 45.6%
+
+ y-randomisation 50.5% 50.4% 50.3% 50.6%
+ ----------------------------------------------------------------------
+
+CCR (correct classification rate)
+
+Discussion
+==========
+
+General model performance
+
+Based on the results of the cross-validation for all models, LAZAR, RF,
+SVM, DL (R-project) and DL (TensorFlow) it can be state that the
+prediction results are not optimal due to different reasons. The
+accuracy as measured during cross-validation of the four models (RF,
+SVM, DL (R-project and TensorFlow)) was partly low with CCR values
+between 59.3 and 68%, with the R-generated DL model and the
+TensorFlow-generated DL model showing the worst and the best
+performance, respectively. The validation of the R-generated DL model
+revealed a high sensitivity (89.2%) but an unacceptably low specificity
+of 29.9% indicating a high number of false positive estimates. The
+TensorFlow-generated DL model, however, showed an acceptable but not
+optimal accuracy of 68%, a sensitivity of 69.9% and a specificity of
+45.6%. The low specificity indicates that both DL models tends to
+predict too many instances as positive (genotoxic), and therefore have a
+high false positive rate. This allows at least with the TensorFlow
+generated DL model to make group statements, but the confidence for
+estimations of single PAs appears to be insufficiently low.
+
+Several factors have likely contributed to the low to moderate
+performance of the used methods as shown during the cross-validation:
+
+1. The outcome in the training dataset was based on the results of AMES
+ tests for genotoxicity [ICH 2011](#_ENREF_63)(), an *in vitro* test
+ in different strains of the bacteria *Salmonella typhimurium*. In
+ this test, mutagenicity is evaluated with and without prior
+ metabolic activation of the test substance. Metabolic activation
+ could result in the formation of genotoxic metabolites from
+ non-genotoxic parent compounds. However, no distinction was made in
+ the training dataset between substances that needed metabolic
+ activation before being mutagenic and those that were mutagenic
+ without metabolic activation. LAZAR is able to handle this
+ 'inaccuracy' in the training dataset well due to the way the
+ algorithm works: LAZAR predicts the genotoxic potential based on the
+ neighbours of substances with comparable structural features,
+ considering mutagenic and not mutagenic neighbours. Based on the
+ structural similarity, a probability for mutagenicity and no
+ mutagenicity is calculated independently from each other (meaning
+ that the sum of probabilities does not necessarily adds up to 100%).
+ The class with the higher outcome is then the overall outcome for
+ the substance.
+
+> In contrast, the other models need to be trained first to recognise
+> the structural features that are responsible for genotoxicity.
+> Therefore, the mixture of substances being mutagenic with and without
+> metabolic activation in the training dataset may have adversely
+> affected the ability to separate the dataset in two distinct classes
+> and thus explains the relatively low performance of these models.
+
+2. Machine learning algorithms try to find an optimized solution in a
+ high-dimensional (one dimension per each predictor) space. Sometimes
+ these methods do not find the global optimum of estimates but only
+ local (not optimal) solutions. Strategies to find the global
+ solutions are systematic variation (grid search) of the
+ hyperparameters of the methods, which may be very time consuming in
+ particular in large datasets.
+
+Mutagenicity of PAs
+
+Due to the low to moderate predictivity of all models, quantitative
+statement on the genotoxicity of single PAs cannot be made with
+sufficient confidence.
+
+The predictions of the SVM model did not fit with the other models or
+literature, and are therefore not further considered in the discussion.
+
+Necic acid
+
+The rank order of the necic acid is comparable in the four models
+considered (LAZAR, RF and DL (R-project and TensorFlow). PAs from the
+monoester type had the lowest genotoxic potential, followed by PAs from
+the open-ring diester type. PAs with macrocyclic diesters had the
+highest genotoxic potential. The result fit well with current state of
+knowledge: in general, PAs, which have a macrocyclic diesters as necic
+acid, are considered more toxic than those with an open-ring diester or
+monoester [EFSA 2011](#_ENREF_36)[Fu et al. 2004](#_ENREF_45)[Ruan et
+al. 2014b](#_ENREF_115)(; ; ).
+
+Necine base
+
+The rank order of necine base is comparable in LAZAR, RF, and DL
+(R-project) models: with platynecine being less or as genotoxic as
+retronecine, and otonecine being the most genotoxic. In the
+TensorFlow-generate DL model, platynecine also has the lowest genotoxic
+probability, but are then followed by the otonecines and last by
+retronecine. These results partly correspond to earlier published
+studies. Saturated PAs of the platynecine-type are generally accepted to
+be less or non-toxic and have been shown in *in vitro* experiments to
+form no DNA-adducts [Xia et al. 2013](#_ENREF_139)(). Therefore, it is
+striking, that 1,2-unsaturated PAs of the retronecine-type should have
+an almost comparable genotoxic potential in the LAZAR and DL (R-project)
+model. In literature, otonecine-type PAs were shown to be more toxic
+than those of the retronecine-type [Li et al. 2013](#_ENREF_80)().
+
+Modifications of necine base
+
+The group-specific results of the TensorFlow-generated DL model appear
+to reflect the expected relationship between the groups: the low
+genotoxic potential of *N*-oxides and the highest potential of
+dehydropyrrolizidines [Chen et al. 2010](#_ENREF_26)().
+
+In the LAZAR model, the genotoxic potential of dehydropyrrolizidines
+(DHP) (using the extended AD) is comparable to that of tertiary PAs.
+Since, DHP is regarded as the toxic principle in the metabolism of PAs,
+and known to produce protein- and DNA-adducts [Chen et al.
+2010](#_ENREF_26)(), the LAZAR model did not meet this expectation it
+predicted the majority of DHP as being not genotoxic. However, the
+following issues need to be considered. On the one hand, all DHP were
+outside of the stricter AD of 0.5. This indicates that in general, there
+might be a problem with the AD. In addition, DHP has two unsaturated
+double bounds in its necine base, making it highly reactive. DHP and
+other comparable molecules have a very short lifespan, and usually
+cannot be used in *in vitro* experiments. This might explain the absence
+of suitable neighbours in LAZAR.
+
+Furthermore, the probabilities for this substance groups needs to be
+considered, and not only the consolidated prediction. In the LAZAR
+model, all DHPs had probabilities for both outcomes (genotoxic and not
+genotoxic) mainly below 30%. Additionally, the probabilities for both
+outcomes were close together, often within 10% of each other. The fact
+that for both outcomes, the probabilities were low and close together,
+indicates a lower confidence in the prediction of the model for DHPs.
+
+In the DL (R-project) and RF model, *N*-oxides have a by far more
+genotoxic potential that tertiary PAs or dehydropyrrolizidines. As PA
+*N*-oxides are easily conjugated for extraction, they are generally
+considered as detoxification products, which are *in vivo* quickly
+renally eliminated [Chen et al. 2010](#_ENREF_26)(). On the other hand,
+*N*-oxides can be also back-transformed to the corresponding tertiary PA
+[Wang et al. 2005](#_ENREF_134)(). Therefore, it may be questioned,
+whether *N*-oxides themselves are generally less genotoxic than the
+corresponding tertiary PAs. However, in the groups of modification of
+the necine base, dehydropyrrolizidine, the toxic principle of PAs,
+should have had the highest genotoxic potential. Taken together, the
+predictions of the modifications of the necine base from the LAZAR, RF
+and R-generated DL model cannot -- in contrast to the TensorFlow DL
+model - be considered as reliable.
+
+Overall, when comparing the prediction results of the PAs to current
+published knowledge, it can be concluded that the performance of most
+models was low to moderate. This might be contributed to the following
+issues:
+
+1. In the LAZAR model, only 26.6% PAs were within the stricter AD. With
+ the extended AD, 92.3% of the PAs could be included in the
+ prediction. Even though the Jaccard distance between the training
+ dataset and the PA dataset for the RF, SVM, and DL (R-project and
+ TensorFlow) models was small, suggesting a high similarity, the
+ LAZAR indicated that PAs have only few local neighbours, which might
+ adversely affect the prediction of the mutagenic potential of PAs.
+
+2. All above-mentioned models were used to predict the mutagenicity of
+ PAs. PAs are generally considered to be genotoxic, and the mode of
+ action is also known. Therefore, the fact that some models predict
+ the majority of PAs as not genotoxic seems contradictory. To
+ understand this result, the basis, the training dataset, has to be
+ considered. The mutagenicity of in the training dataset are based on
+ data of mutagenicity in bacteria. There are some studies, which show
+ mutagenicity of PAs in the AMES test [Chen et al.
+ 2010](#_ENREF_26)(). Also, [Rubiolo et al. (1992)](#_ENREF_116)
+ examined several different PAs and several different extracts of
+ PA-containing plants in the AMES test. They found that the AMES test
+ was indeed able to detect mutagenicity of PAs, but in general,
+ appeared to have a low sensitivity. The pre-incubation phase for
+ metabolic activation of PAs by microsomal enzymes was the
+ sensitivity-limiting step. This could very well mean that this is
+ also reflected in the QSAR models.
+
+Conclusions
+===========
+
+In this study, an attempt was made to predict the genotoxic potential of
+PAs using five different machine learning techniques (LAZAR, RF, SVM, DL
+(R-project and TensorFlow). The results of all models fitted only partly
+to the findings in literature, with best results obtained with the
+TensorFlow DL model. Therefore, modelling allows statements on the
+relative risks of genotoxicity of the different PA groups. Individual
+predictions for selective PAs appear, however, not reliable on the
+current basis of the used training dataset.
+
+This study emphasises the importance of critical assessment of
+predictions by QSAR models. This includes not only extensive literature
+research to assess the plausibility of the predictions, but also a good
+knowledge of the metabolism of the test substances and understanding for
+possible mechanisms of toxicity.
+
+In further studies, additional machine learning techniques or a modified
+(extended) training dataset should be used for an additional attempt to
+predict the genotoxic potential of PAs.
+
+References
+==========
+
+[]{#_ENREF_4 .anchor}
+
+[]{#_ENREF_8 .anchor}
+
+[]{#_ENREF_17 .anchor}
+
+[]{#_ENREF_18 .anchor}
+
+[]{#_ENREF_20 .anchor}
+
+[]{#_ENREF_26 .anchor}
+
+[]{#_ENREF_33 .anchor}
+
+[]{#_ENREF_36 .anchor}
+
+[]{#_ENREF_38 .anchor}
+
+[]{#_ENREF_39 .anchor}
+
+[]{#_ENREF_45 .anchor}
+
+[]{#_ENREF_48 .anchor}
+
+[]{#_ENREF_57 .anchor}
+
+[]{#_ENREF_59 .anchor}
+
+[]{#_ENREF_63 .anchor}
+
+[]{#_ENREF_65 .anchor}
+
+[]{#_ENREF_71 .anchor}
+
+[]{#_ENREF_76 .anchor}
+
+[]{#_ENREF_78 .anchor}
+
+[]{#_ENREF_80 .anchor}
+
+[]{#_ENREF_82 .anchor}
+
+[]{#_ENREF_99 .anchor}
+
+[]{#_ENREF_104 .anchor}
+
+<https://openbabel.org/docs/dev/Fingerprints/intro.html>
+
+[]{#_ENREF_115 .anchor}
+
+[]{#_ENREF_116 .anchor}
+
+[]{#_ENREF_117 .anchor}
+
+[]{#_ENREF_119 .anchor}
+
+[]{#_ENREF_126 .anchor}
+
+[]{#_ENREF_134 .anchor}
+
+[]{#_ENREF_138 .anchor}
+
+[]{#_ENREF_139 .anchor}
+
+[]{#_ENREF_140 .anchor}
+
+[]{#_ENREF_142 .anchor}
+
+[]{#_ENREF_143
+.anchor}<http://www.yapcwsoft.com/dd/padeldescriptor/Descriptors.xls>
+
+[]{#_ENREF_148 .anchor}
+
+Aguer C, Gambarotta D, Mailloux RJ, Moffat C, Dent R, et al. 2011.
+Galactose enhances oxidative metabolism and reveals mitochondrial
+dysfunction in human primary muscle cells. PLoS One 6:e28536Ahmed SN,
+Siddiqi ZA. 2006. Antiepileptic drugs and liver disease. Seizure
+15:156-64Aleo MD, Luo Y, Swiss R, Bonin PD, Potter DM, Will Y. 2014.
+Human drug-induced liver injury severity is highly associated with dual
+inhibition of liver mitochondrial function and bile salt export pump.
+Hepatology (Baltimore, Md) 60:1015-22ANZFA. 2001. Pyrrolizidine
+alkaloids in food. A Toxicological Review and Risk Assessment. ed.
+Authority, ANZF, pp. 1-16Armstrong SJ, Zuckerman AJ, Bird RG. 1972.
+Induction of morphological changes in human embryo liver cells by the
+pyrrolizidine alkaloid lasiocarpine. British journal of experimental
+pathology 53:145-9Barysz M, Jashari G, Lall RS, Srivastava AK,
+Trinajstic N. 1983. On the distance matrix of molecules containing
+heteroatoms. In *Chemical Applications of Topology and Graph Theory*,
+pp. 222-30. Amsterdam, The Netherlands: ElsevierBasak SC, Harriss DK,
+Magnuson VR. Comparative Study of Lipophilicity \<em\>versus\</em\>
+Topological Molecular Descriptors in Biological Correlations. Journal of
+Pharmaceutical Sciences 73:429-37Bender A, Mussa HY, Glen RC, Reiling S.
+2004. Molecular similarity searching using atom environments,
+information-based feature selection, and a naive Bayesian classifier. J
+Chem Inf Comput Sci 44:170-8Benichou C, Danan G, Flahault A. 1993.
+Causality assessment of adverse reactions to drugs\--II. An original
+model for validation of drug causality assessment methods: case reports
+with positive rechallenge. J Clin Epidemiol 46:1331-6Bergmeir C, Benítez
+JM. 2012. Neural Networks in R Using the Stuttgart Neural Network
+Simulator: RSNNS. Journal of Statistical Software 46:1-26Bishop-Bailey
+D, Thomson S, Askari A, Faulkner A, Wheeler-Jones C. 2014.
+Lipid-metabolizing CYPs in the regulation and dysregulation of
+metabolism. Annu Rev Nutr 34:261-79Blower PE, Cross KP. 2006. Decision
+Tree Methods in Pharmaceutical Research. Current topics in medicinal
+chemistry 6:31-9Boelsterli UA, Lee KK. 2014. Mechanisms of
+isoniazid-induced idiosyncratic liver injury: emerging role of
+mitochondrial stress. Journal of gastroenterology and hepatology
+29:678-87Bramer M. 2013. Principles of Data Mining. p. 444:
+Springer-VerlagBreimann L. 2001. Random Forests. Machine Learning
+45:5-32Breimann L. 2003. Manual-Setting Up, Using, And Understanding
+Random Forests V4.0.1-33Bull LB, Dick AT. 1959. The chronic pathological
+effects on the liver of the rat of the pyrrolizidine alkaloids
+heliotrine, lasiocarpine and their N-oxides. J Path Bact 78:483-502Bull
+LB, Dick AT, McKenzie JS. 1958. The actue toxic effects of heliotrine
+and lasiocarpine, and their N-oxides, on the rat. J Path Bact
+75:17-25Burden FR. 1989. Molecular identification number for
+substructure searches. Journal of Chemical Information and Computer
+Sciences 29:225-7Butler WH, Mattocks AR, Barnes JM. 1970. Lesions in the
+liver and lungs of rats given pyrrole derivates of pyrrolizidine
+alkaloids. J Path 100:169-75Chai J, He Y, Cai SY, Jiang Z, Wang H, et
+al. 2012. Elevated hepatic multidrug resistance-associated protein
+3/ATP-binding cassette subfamily C 3 expression in human obstructive
+cholestasis is mediated through tumor necrosis factor alpha and c-Jun
+NH2-terminal kinase/stress-activated protein kinase-signaling pathway.
+Hepatology 55:1485-94Chalhoub WM, Sliman KD, Arumuganathan M, Lewis JH.
+2014. Drug-induced liver injury: what was new in 2013? Expert Opin Drug
+Metab Toxicol 10:959-80Chawla NV, Bowyer KW, Hall LO. 2002. SMOTE:
+Synthetic Minority Over-sampling Technique. Journal of Artificial
+Intelligence Research 16:321--57Chen M, Borlak J, Tong W. 2013. High
+lipophilicity and high daily dose of oral medications are associated
+with significant risk for drug-induced liver injury. Hepatology
+(Baltimore, Md) 58:388-96Chen M, Suzuki A, Thakkar S, Yu K, Hu C, Tong
+W. 2016. DILIrank: the largest reference drug list ranked by the risk
+for developing drug-induced liver injury in humans. Drug Discov Today
+21:648-53Chen T, Mei N, Fu PP. 2010. Genotoxicity of pyrrolizidine
+alkaloids. J Appl Toxicol 30:183-96Crabtree HG. 1928. The carbohydrate
+metabolism of certain pathological overgrowths Biochem J 22:1289-98Daly
+AK, Donaldson PT, Bhatnagar P, Shen Y, Pe\'er I, et al. 2009.
+HLA-B\*5701 genotype is a major determinant of drug-induced liver injury
+due to flucloxacillin. Nature genetics 41:816-9Danan G, Benichou C.
+1993. Causality assessment of adverse reactions to drugs\--I. A novel
+method based on the conclusions of international consensus meetings:
+application to drug-induced liver injuries. J Clin Epidemiol
+46:1323-30Dar AC, Shokat KM. 2011. The evolution of protein kinase
+inhibitors from antagonists to agonists of cellular signaling. Annu Rev
+Biochem 80:769-95de Wildt SN, Kearns GL, Leeder JS, van den Anker JN.
+1999. Cytochrome P450 3A: ontogeny and drug disposition. Clin
+Pharmacokinet 37:485-505DeLeve LD, Ito Y, Bethea NW, McCuskey MK, Wang
+X, McCuskey RS. 2003. Embolization by sinusoidal lining cells obstructs
+the microcirculation in rat sinusoidal obstruction syndrome. Am J
+Physiol Gastrointest Liver Physiol 284:G1045--G52DeLeve LD, Wang X,
+Kuhlenkamp JF, Kaplowitz N. 1996. Toxicity of Azathioprine and
+Monocrotaline in Murine Sinusoidal Endothelial Cells and Hepatocytes:
+The Role of Glutathione and Relevance to Hepatic Venoocclusive Disease.
+Hepatology 23:589-99Dong H, Haining RL, Thummel KE, Rettie AE, Nelson
+SD. 2000. Involvement of human cytochrome P450 2D6 in the bioactivation
+of acetaminophen. Drug Metab Dispos 28:1397-400Doostdar H, Grant MH,
+Melvin WT, Wolf CR, Burke MD. 1993. The effects of inducing agents on
+cytochrome P450 and UDP-glucuronyltransferase activities in human HEPG2
+hepatoma cells. Biochemical pharmacology 46:629-35EFSA. 2011. Scientific
+Opinion on Pyrrolizidine alkaloids in food and feed. EFSA Journal
+9:1-134Ekins S, Williams AJ, Xu JJ. 2010. A predictive ligand-based
+Bayesian model for human drug-induced liver injury. Drug Metab. Dispos.
+38:2302-8EMA. 2014. EMA/HMPC/893108/2011: Public statement on the use of
+herbal medicinal products containing toxic, unsaturated pyrrolizidine
+alkaloids (PAs).1-24EMA. 2016. EMA/HMPC/328782/2016: Public statement on
+contamination of herbal medicinal products/traditional herbal medicinal
+products with pyrrolizidine alkaloids.1-11Fashe MM, Juvonen RO, Petsalo
+A, Vepsalainen J, Pasanen M, Rahnasto-Rilla M. 2015. In silico
+prediction of the site of oxidation by cytochrome P450 3A4 that leads to
+the formation of the toxic metabolites of pyrrolizidine alkaloids. Chem
+Res Toxicol 28:702-10Field RA, Stegelmeier BL, Colegate SM, Brown AW,
+Green BT. 2015. An in vitro comparison of the cytotoxic potential of
+selected dehydropyrrolizidine alkaloids and some N-oxides. Toxicon
+97:36-45Fleming I. 2014. The pharmacology of the cytochrome P450
+epoxygenase/soluble epoxide hydrolase axis in the vasculature and
+cardiovascular disease. Pharmacol Rev 66:1106-40Fonti V. 2017. *Feature
+Selection using LASSO*. Research paper. VU Amsterdam. 26 pp.Fu PP, Chou
+MW, Churchwell M, Wang Y, Zhao Y, et al. 2010. High-Performance Liquid
+Chromatography Electrospray Ionization Tandem Mass Spectrometry for the
+Detection and Quantitation of Pyrrolizidine Alkaloid-Derived DNA Adducts
+in Vitro and in Vivo. Chem Res Toxicol 23:637--52Fu PP, Xia Q, Lin G,
+Chou MW. 2004. Pyrrolizidine alkaloids\--genotoxicity, metabolism
+enzymes, metabolic activation, and mechanisms. Drug Metab Rev
+36:1-55Galeotti N, Vivoli E, Bilia AR, Vincieri FF, Ghelardini C. 2010.
+St. John\'s wort reduces neuropathic pain through a hypericin-mediated
+inhibition of the protein kinase Cgamma and epsilon activity. Biochem
+Pharmacol 79:1327-36Ganesan S, Tekwani BL, Sahu R, Tripathi LM, Walker
+LA. 2009. Cytochrome P(450)-dependent toxic effects of primaquine on
+human erythrocytes. Toxicol Appl Pharmacol 241:14-22Gao H, Ruan JQ, Chen
+J, Li N, Ke CQ, et al. 2015. Blood pyrrole-protein adducts as a
+diagnostic and prognostic index in pyrrolizidine alkaloid-hepatic
+sinusoidal obstruction syndrome. Drug Des Devel Ther 9:4861-8Gitlin N.
+1980. Salicylate hepatotoxicity: the potential role of hypoalbuminemia.
+J Clin Gastroenterol 2:281-5Gordon GJ, Coleman WB, Grisham JW. 2000.
+Bax-mediated apoptosis in the livers of rats after partial hepatectomy
+in the retrorsine model of hepatocellular injury. Hepatology
+32:312-20Gradhand U, Lang T, Schaeffeler E, Glaeser H, Tegude H, et al.
+2008. Variability in human hepatic MRP4 expression: influence of
+cholestasis and genotype. Pharmacogenomics J 8:42-52Gramatica P, Corradi
+M, Consonni V. 2000. Modelling and prediction of soil sorption
+coefficients of non-ionic organic pesticides by molecular descriptors.
+Chemosphere 41:763-77Greene N, Fisk L, Naven RT, Note RR, Patel ML,
+Pelletier DJ. 2010. Developing structure-activity relationships for the
+prediction of hepatotoxicity. Chemical Research in Toxicology
+23:1215-22Guo YX, Xu XF, Zhang QZ, Li C, Deng Y, et al. 2015. The
+inhibition of hepatic bile acids transporters Ntcp and Bsep is involved
+in the pathogenesis of isoniazid/rifampicin-induced hepatotoxicity.
+Toxicology mechanisms and methods 25:382-7Hall LH, Kier LB. 1995.
+Electrotopological State Indices for Atom Types: A Novel Combination of
+Electronic, Topological, and Valence State Information. Journal of
+Chemical Information and Computer Sciences 35:1039-45Hammann F, Schoning
+V, Drewe J. 2018. Prediction of clinically relevant drug-induced liver
+injury from structure using machine learning. J Appl Toxicol Hansen K,
+Mika S, Schroeter T, Sutter A, ter Laak A, et al. 2009. Benchmark data
+set for in silico prediction of Ames mutagenicity. J Chem Inf Model
+49:2077-81Hartmann T, Ehmke A, Eilert U, yon Borstel K, Thcuring C.
+1989. Sites of synthesis, translocation and accumulation of
+pyrrolizidine alkaloid N-oxides in Senecio vulgaris L. Planta
+177:98-107Hartmann T, Witte L. 1995. Chemistry, Biology and Chemoecology
+of the Pyrrolizidine Alkaloids. In *Alkaloids: Chemical and Biological
+Perspectives*, ed. Pelletier, pp. 155-233. Pergamon, London, New
+YorkHessel S, Gottschalk C, Schumann D, These A, Preiss-Weigert A,
+Lampen A. 2014. Structure-activity relationship in the passage of
+different pyrrolizidine alkaloids through the gastrointestinal barrier:
+ABCB1 excretes heliotrine and echimidine. Mol Nutr Food Res
+58:995-1004Hunt CM, Westerkam WR, Stave GM. 1992. Effect of age and
+gender on the activity of human hepatic CYP3A. Biochemical pharmacology
+44:275-83Ibanez L, Perez E, Vidal X, Laporte JR, Grup d\'Estudi
+Multicenteric d\'Hepatotoxicitat Aguda de B. 2002. Prospective
+surveillance of acute serious liver disease unrelated to infectious,
+obstructive, or metabolic diseases: epidemiological and clinical
+features, and exposure to drugs. J Hepatol 37:592-600ICH. 2011.
+Guideance on genotoxicity testing and data interpretation for
+pharmaceuticals intended for human use S2(R1). p. 29Iyer VV, Yang H,
+Ierapetritou MG, Roth CM. 2010. Effects of glucose and insulin on
+HepG2-C3A cell metabolism. Biotechnol Bioeng 107:347-56Jago MV. 1971.
+Factors affecting the chronic hepatotoxicity of pyrrolizidine alkaloids.
+The Journal of Pathology 105:1-11Jeon JY, Sparreboom A, Baker SD. 2017.
+Kinase Inhibitors: The Reality Behind the Success. Clin Pharmacol Ther
+102:726-30Jeong W, Doroshow JH, Kummar S. 2013. United States Food and
+Drug Administration approved oral kinase inhibitors for the treatment of
+malignancies. Curr Probl Cancer 37:110-44Ji L, Chen Y, Liu T, Wang Z.
+2008. Involvement of Bcl-xL degradation and mitochondrial-mediated
+apoptotic pathway in pyrrolizidine alkaloids-induced apoptosis in
+hepatocytes. Toxicol Appl Pharmacol 231:393-400Jornil J, Nielsen TS,
+Rosendal I, Ahlner J, Zackrisson AL, et al. 2013. A poor metabolizer of
+both CYP2C19 and CYP2D6 identified by mechanistic pharmacokinetic
+simulation in a fatal drug poisoning case involving venlafaxine.
+Forensic Sci Int 226:e26-31Kalthoff S, Ehmer U, Freiberg N, Manns MP,
+Strassburg CP. 2010. Interaction between oxidative stress sensor Nrf2
+and xenobiotic-activated aryl hydrocarbon receptor in the regulation of
+the human phase II detoxifying UDP-glucuronosyltransferase 1A10. J Biol
+Chem 285:5993-6002Kazius J, McGuire R, Bursi R. 2005. Derivation and
+validation of toxicophores for mutagenicity prediction. J Med Chem
+48:312-20Khan D, Khan AU. 2016. Descriptors and their selection methods
+in QSAR analysis: paradigm for drug design. Drug Discov Today
+21:1291-302Kim HY, Stermitz FR, Molyneux RJ, Wilson DW, Taylor D,
+Coulombe RA, Jr. 1993. Structural influences on pyrrolizidine
+alkaloid-induced cytopathology. Toxicol Appl Pharmacol 122:61-9Kock K,
+Ferslew BC, Netterberg I, Yang K, Urban TJ, et al. 2014. Risk factors
+for development of cholestatic drug-induced liver injury: inhibition of
+hepatic basolateral bile acid transporters multidrug
+resistance-associated proteins 3 and 4. Drug Metab Dispos
+42:665-74Lammert C, Einarsson S, Saha C, Niklasson A, Bjornsson E,
+Chalasani N. 2008. Relationship between daily dose of oral medications
+and idiosyncratic drug-induced liver injury: search for signals.
+Hepatology 47:2003-9Langel D, Ober D, Pelser PB. 2011. The evolution of
+pyrrolizidine alkaloid biosynthesis and diversity in the Senecioneae.
+Phytochemistry Reviews 10:3-74Lasser KE, Allen PD, Woolhandler SJ,
+Himmelstein DU, Wolfe SM, Bor DH. 2002. Timing of new black box warnings
+and withdrawals for prescription medications. JAMA 287:2215-20Li N, Xia
+Q, Ruan J, Fu PP, Lin G. 2011. Hepatotoxicity and Tumorigenicity Induced
+by Metabolic Activation of Pyrrolizidine Alkaloids in Herbs. Current
+Drug Metabolism 12Li X, Cameron MD. 2012. Potential role of a quetiapine
+metabolite in quetiapine-induced neutropenia and agranulocytosis. Chem
+Res Toxicol 25:1004-11Li YH, Kan WL, Li N, Lin G. 2013. Assessment of
+pyrrolizidine alkaloid-induced toxicity in an in vitro screening model.
+J Ethnopharmacol 150:560-7Lima A, Bernardes M, Azevedo R, Medeiros R,
+Seabra V. 2015. Pharmacogenomics of Methotrexate Membrane Transport
+Pathway: Can Clinical Response to Methotrexate in Rheumatoid Arthritis
+Be Predicted? Int J Mol Sci 16:13760-80Lin G. 1998. Microsomal Formation
+of a Pyrrolic Alcohol Glutathione Conjugate of ClivorineFirm Evidence
+for the Formation of a Pyrrolic Metabolite of an Otonecine-Type
+Pyrrolizidine Alkaloid. Drug Metabolism and Disposition
+26:181-4Lindigkeit R, Biller A, Buch M, Schiebel H-M, Boppré M, Hartmann
+T. 1997. The two faces of pyrrolizidine alkaloids: the role of the
+tertiary amine and its N-oxide in chemical defense of insects with
+acquired plant alkaloids. Eur J Biochem 245Makhlouf HA, Helmy A, Fawzy
+E, El-Attar M, Rashed HA. 2008. A prospective study of antituberculous
+drug-induced hepatotoxicity in an area endemic for liver diseases.
+Hepatol Int 2:353-60Marin-Hernandez A, Rodriguez-Enriquez S,
+Vital-Gonzalez PA, Flores-Rodriguez FL, Macias-Silva M, et al. 2006.
+Determining and understanding the control of glycolysis in fast-growth
+tumor cells. Flux control by an over-expressed but strongly
+product-inhibited hexokinase. FEBS J 273:1975-88Marroquin LD, Hynes J,
+Dykens JA, Jamieson JD, Will Y. 2007. Circumventing the Crabtree effect:
+replacing media glucose with galactose increases susceptibility of HepG2
+cells to mitochondrial toxicants. Toxicol Sci 97:539-47Mattocks AR.
+1986. *Chemistry and Toxicology of Pyrrolizidine Alkaloids*: Academic
+PressMeharena HS, Chang P, Keshwani MM, Oruganty K, Nene AK, et al.
+2013. Deciphering the structural basis of eukaryotic protein kinase
+regulation. PLoS Biol 11:e1001680Merz KH, Schrenk D. 2016. Interim
+relative potency factors for the toxicological risk assessment of
+pyrrolizidine alkaloids in food and herbal medicines. Toxicol Lett
+263:44-57Miners JO, Birkett DJ. 1998. Cytochrome P4502C9: an enzyme of
+major importance in human drug metabolism. British Journal of Clinical
+Pharmacology 45:525-38Mingard C, Paech F, Bouitbir J, Krahenbuhl S.
+2018. Mechanisms of toxicity associated with six tyrosine kinase
+inhibitors in human hepatocyte cell lines. J Appl Toxicol
+38:418-31Mingatto FE, Dorta DJ, dos Santos AB, Carvalho I, da Silva CH,
+et al. 2007. Dehydromonocrotaline inhibits mitochondrial complex I. A
+potential mechanism accounting for hepatotoxicity of monocrotaline.
+Toxicon 50:724-30Mitchell JB. 2014. Machine learning methods in
+chemoinformatics. Wiley Interdiscip Rev Comput Mol Sci 4:468-81Morgan
+RE, Trauner M, van Staden CJ, Lee PH, Ramachandran B, et al. 2010.
+Interference with bile salt export pump function is a susceptibility
+factor for human liver injury in drug development. Toxicol Sci
+118:485-500Muegge I, Mukherjee P. 2016. An overview of molecular
+fingerprint similarity search in virtual screening. Expert Opin Drug
+Discov 11:137-48Najibi A, Heidari R, Zarifi J, Jamshidzadeh A,
+Firoozabadi N, Niknahad H. 2016. Evaluating the Role of Drug Metabolism
+and Reactive Intermediates in Trazodone-Induced Cytotoxicity toward
+Freshly-Isolated Rat Hepatocytes. Drug Res (Stuttg) 66:592-6Nantasenamat
+C, Isarankura-Na-Ayudhya C, Naenna T, Prachayasittikul V. 2009. A
+Practical Overview of Quantitative Structure-Activity Relationship.
+EXCLI Journal 8:74-88National Cancer Institute. 2006. Common Terminology
+Criteria for Adverse Events v3.0 (CTCAE). ed. Program, CTENeumann MG,
+Cohen LB, Opris M, Nanau R, Jeong H. 2015. Hepatotoxicity of
+Pyrrolizidine Alkaloids. J Pharm Pharm Sci 18:825-43Newby D, Freitas AA,
+Ghafourian T. 2015. Decision trees to characterise the roles of
+permeability and solubility on the prediction of oral absorption. Eur J
+Med Chem 90:751-65Niederer C, Behra R, Harder A, Schwarzenbach RP,
+Escher BI. 2004. Mechanistic approaches for evaluating the toxicity of
+reactive organochlorines and epoxides in green algae. Environmental
+Toxicology and Chemistry 23:697-704NTP. 1978. Bioassay of lasiocarpine
+for possible carcinogenicity. pp. 1-82NTP. 2003. Toxicology and
+Carcinogenesis Studies of Riddelliine (CAS No. 23246-96-0) in F344/N
+Rats And B6c3F~1~ Mice (Gavage Studies). ed. Health, NIoO\'Boyle NM,
+Banck M, James CA, Morley C, Vandermeersch T, Hutchison GR. 2011. Open
+Babel: An open chemical toolbox. J Cheminform 3:33Open Babel community.
+2011. *Molecular fingerprints and similarity searching --- Open Babel
+v2.3.1 documentation. Openbabel.org*. , Dececmber 31, 2018Paech F,
+Bouitbir J, Krahenbuhl S. 2017. Hepatocellular Toxicity Associated with
+Tyrosine Kinase Inhibitors: Mitochondrial Damage and Inhibition of
+Glycolysis. Front Pharmacol 8:367Parkinson A, Mudra DR, Johnson C, Dwyer
+A, Carroll KM. 2004. The effects of gender, age, ethnicity, and liver
+cirrhosis on cytochrome P450 enzyme activity in human liver microsomes
+and inducibility in cultured human hepatocytes. Toxicol Appl Pharmacol
+199:193-209Pellinen P, Honkakoski P, Stenback F, Niemitz M, Alhava E, et
+al. 1994. Cocaine N-demethylation and the metabolism-related
+hepatotoxicity can be prevented by cytochrome P450 3A inhibitors. Eur J
+Pharmacol 270:35-43Regev A, Seeff LB, Merz M, Ormarsdottir S, Aithal GP,
+et al. 2014. Causality assessment for suspected DILI during clinical
+phases of drug development. Drug Saf 37 Suppl 1:S47-56Rendic S. 2002.
+Summary of information on human CYP enzymes: human P450 metabolism data.
+Drug Metab Rev 34:83-448Reuben A, Koch DG, Lee WM, Acute Liver Failure
+Study G. 2010. Drug-induced acute liver failure: results of a U.S.
+multicenter, prospective study. Hepatology 52:2065-76Rodrigues AC. 2010.
+Efflux and uptake transporters as determinants of statin response.
+Expert Opin Drug Metab Toxicol 6:621-32Roskoski R, Jr. 2015. A
+historical overview of protein kinases and their targeted small molecule
+inhibitors. Pharmacol Res 100:1-23Ruan J, Liao C, Ye Y, Lin G. 2014a.
+Lack of metabolic activation and predominant formation of an excreted
+metabolite of nontoxic platynecine-type pyrrolizidine alkaloids. Chem
+Res Toxicol 27:7-16Ruan J, Yang M, Fu P, Ye Y, Lin G. 2014b. Metabolic
+activation of pyrrolizidine alkaloids: insights into the structural and
+enzymatic basis. Chem Res Toxicol 27:1030-9Rubiolo P, Pieters L, Calomme
+M, Bicchi C, Vlietinck A, Vanden Berghe D. 1992. Mutagenicity of
+pyrrolizidine alkaloids in the Salmonella typhimurium/mammalian
+microsome system. Mutat Res 281:143-7Rücker C, Rücker G, Meringer M.
+2007. y-Randomization and Its Variants in QSPR/QSAR. J. Chem. Inf.
+Model. 47:2345-57Schoental R, Head MA. 1957. Progression of liver
+lesions produced in rats by temporary treatment with pyrrolizidine
+(senecio) alkaloids, and the effects of betaine and high casein diet. Br
+J Cancer 11:535-44Schöning V, Hammann F, Peinl M, Drewe J. 2017.
+Editor\'s Highlight: Identification of Any Structure-Specific
+Hepatotoxic Potential of Different Pyrrolizidine Alkaloids Using Random
+Forests and Artificial Neural Networks. Toxicol Sci 160:361-70Shah RR,
+Morganroth J, Shah DR. 2013. Hepatotoxicity of tyrosine kinase
+inhibitors: clinical and regulatory perspectives. Drug Saf
+36:491-503Spjuth O, Alvarsson J, Berg A, Eklund M, Kuhn S, et al. 2009.
+Bioclipse 2: A scriptable integration platform for the life sciences.
+BMC Bioinformatics 10:1-5Spjuth O, Helmus T, Willighagen EL, Kuhn S,
+Eklund M, et al. 2007. Bioclipse: an open source workbench for chemo-
+and bioinformatics. BMC Bioinformatics 8:1-10Srinivas N, Sandeep KS,
+Anusha Y, Devendra BN. 2014. In Vitro Cytotoxic Evaluation and
+Detoxification of Monocrotaline (Mct) Alkaloid: An In Silico Approach.
+International Invention Journal of Biochemistry and Bioinformatics
+2:20-9Stine JG, Chalasani NP. 2017. Drug Hepatotoxicity: Environmental
+Factors. Clin Liver Dis 21:103-13Stine JG, Lewis JH. 2011. Drug-induced
+liver injury: a summary of recent advances. Expert Opin Drug Metab
+Toxicol 7:875-90Takanashi H, Umeda M, Hirono I. 1980. Chromosomal
+aberrations and mutations in cultured mammalidan cells induced by
+pyrrolizidine alkaloids. Mutation Research 78:67-77Takeda M, Okamoto I,
+Nakagawa K. 2015. Pooled safety analysis of EGFR-TKI treatment for EGFR
+mutation-positive non-small cell lung cancer. Lung Cancer 88:74-9Tamta
+H, Pawar RS, Wamer WG, Grundel E, Krynitsky AJ, Rader JI. 2012.
+Comparison of metabolism-mediated effects of pyrrolizidine alkaloids in
+a HepG2/C3A cell-S9 co-incubation system and quantification of their
+glutathione conjugates. Xenobiotica 42:1038-48Teh LK, Bertilsson L.
+2012. Pharmacogenomics of CYP2D6: molecular genetics, interethnic
+differences and clinical importance. Drug Metab Pharmacokinet
+27:55-67Teo YL, Ho HK, Chan A. 2013. Risk of tyrosine kinase
+inhibitors-induced hepatotoxicity in cancer patients: a meta-analysis.
+Cancer Treat Rev 39:199-206Teo YL, Ho HK, Chan A. 2015. Formation of
+reactive metabolites and management of tyrosine kinase inhibitor-induced
+hepatotoxicity: a literature review. Expert Opin Drug Metab Toxicol
+11:231-42Thompson RA, Isin EM, Ogese MO, Mettetal JT, Williams DP. 2016.
+Reactive Metabolites: Current and Emerging Risk and Hazard Assessments.
+Chem Res Toxicol 29:505-33Walker K, Ginsberg G, Hattis D, Johns DO,
+Guyton KZ, Sonawane B. 2009. Genetic polymorphism in N-Acetyltransferase
+(NAT): Population distribution of NAT1 and NAT2 activity. Journal of
+toxicology and environmental health. Part B, Critical reviews
+12:440-72Wang YP, Yan J, Fu PP, Chou MW. 2005. Human liver microsomal
+reduction of pyrrolizidine alkaloid N-oxides to form the corresponding
+carcinogenic parent alkaloid. Toxicol Lett 155:411-20Weininger D. 1988.
+SMILES, a chemical language and information system. 1. Introduction to
+methodology and encoding rules. J Chem Inf Comput Sci 28:31-6Westerink
+WM, Schoonen WG. 2007. Phase II enzyme levels in HepG2 cells and
+cryopreserved primary human hepatocytes and their induction in HepG2
+cells. Toxicol In Vitro 21:1592-602Wu P, Nielsen TE, Clausen MH. 2015.
+FDA-approved small-molecule kinase inhibitors. Trends Pharmacol Sci
+36:422-39Xia Q, Ma L, He X, Cai L, Fu PP. 2015. 7-glutathione pyrrole
+adduct: a potential DNA reactive metabolite of pyrrolizidine alkaloids.
+Chem Res Toxicol 28:615-20Xia Q, Zhao Y, Von Tungeln LS, Doerge DR, Lin
+G, et al. 2013. Pyrrolizidine alkaloid-derived DNA adducts as a common
+biological biomarker of pyrrolizidine alkaloid-induced tumorigenicity.
+Chem Res Toxicol 26:1384-96Yan J, Xia Q, Chou MW, Fu P. 2008. Metabolic
+activation of retronecine and retronecine N-oxide -- formation of
+DHP-derived DNA adducts. Toxicology and Industrial Health 24Yang X, Li
+W, Sun Y, Guo X, Huang W, et al. 2017. Comparative Study of
+Hepatotoxicity of Pyrrolizidine Alkaloids Retrorsine and Monocrotaline.
+Chem Res Toxicol 30:532-9Yap CW. 2011. PaDEL-descriptor: an open source
+software to calculate molecular descriptors and fingerprints. Journal of
+computational chemistry 32:1466-74Yap CW. 2014. *Descriptors*. ,
+27.10.2016Yu K, Geng X, Chen M, Zhang J, Wang B, et al. 2014a. High
+daily dose and being a substrate of cytochrome P450 enzymes are two
+important predictors of drug-induced liver injury. Drug Metab Dispos
+42:744-50Yu K, Geng X, Chen M, Zhang J, Wang B, et al. 2014b. High daily
+dose and being a substrate of cytochrome P450 enzymes are two important
+predictors of drug-induced liver injury. Drug Metab. Dispos.
+42:744-50Zanger UM, Turpeinen M, Klein K, Schwab M. 2008. Functional
+pharmacogenetics/genomics of human cytochromes P450 involved in drug
+biotransformation. Anal Bioanal Chem 392:1093-108Zhang J, Sheng Y, Shi
+L, Zheng Z, Chen M, et al. 2017. Quercetin and baicalein suppress
+monocrotaline-induced hepatic sinusoidal obstruction syndrome in rats.
+Eur J Pharmacol 795:160-8Zhao Y, Xia Q, Gamboa da Costa G, Yu H, Cai L,
+Fu PP. 2012. Full structure assignments of pyrrolizidine alkaloid DNA
+adducts and mechanism of tumor initiation. Chem Res Toxicol
+25:1985-96Zheng Z, Shi L, Sheng Y, Zhang J, Lu B, Ji L. 2016.
+Chlorogenic acid suppresses monocrotaline-induced sinusoidal obstruction
+syndrome: The potential contribution of NFkappaB, Egr1, Nrf2, MAPKs and
+PI3K signals. Environ Toxicol Pharmacol 46:80-9Zhu XW, Xin YJ, Ge HL.
+2015. Recursive Random Forests Enable Better Predictive Performance and
+Model Interpretation than Variable Selection by LASSO. J Chem Inf Model
+55:736-46
diff --git a/paper/LICENSE b/paper/LICENSE
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/paper/LICENSE
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/paper/Makefile b/paper/Makefile
new file mode 100644
index 0000000..a5c9bbc
--- /dev/null
+++ b/paper/Makefile
@@ -0,0 +1,102 @@
+## The path to the directory in which this file resides. This allows users to
+## include this Makefile into theirs and to reuse all rules, given that they set
+## this variable to the correct value.
+PANDOC_SCHOLAR_PATH ?= $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+
+# include local makefile to allow easy overwriting of variables
+-include local.mk
+include $(PANDOC_SCHOLAR_PATH)/pandoc-options.inc.mk
+
+LUA_FILTERS_PATH ?= $(PANDOC_SCHOLAR_PATH)/lua-filters
+
+# Configuration (overwrite using Makefile.local.in if necessary)
+ARTICLE_FILE ?= mutagenicity.md
+OUTFILE_PREFIX ?= outfile
+DEFAULT_EXTENSIONS ?= latex pdf docx odt epub html
+JSON_FILE ?= $(OUTFILE_PREFIX).enriched.json
+FLATTENED_JSON_FILE ?= $(OUTFILE_PREFIX).flattened.json
+LUA_FILTERS ?= $(LUA_FILTERS_PATH)/cito/cito.lua \
+ $(LUA_FILTERS_PATH)/abstract-to-meta/abstract-to-meta.lua \
+ $(LUA_FILTERS_PATH)/scholarly-metadata/scholarly-metadata.lua
+
+
+all: $(addprefix $(OUTFILE_PREFIX).,$(DEFAULT_EXTENSIONS))
+
+$(JSON_FILE): $(ARTICLE_FILE) $(LUA_FILTERS)
+ pandoc $(PANDOC_READER_OPTIONS) \
+ $(foreach filter, $(LUA_FILTERS), --lua-filter=$(filter)) \
+ --to=json \
+ --output=$@ $<
+
+$(OUTFILE_PREFIX).pdf $(OUTFILE_PREFIX).latex: \
+ $(JSON_FILE) \
+ $(TEMPLATE_FILE_LATEX) \
+ $(PANDOC_SCHOLAR_PATH)/scholar-filters/template-helper.lua
+ pandoc $(PANDOC_WRITER_OPTIONS) \
+ $(PANDOC_LATEX_OPTIONS) \
+ --lua-filter=$(PANDOC_SCHOLAR_PATH)/scholar-filters/template-helper.lua \
+ --output $@ $<
+
+$(OUTFILE_PREFIX).docx: $(JSON_FILE) \
+ $(ODT_REFERENCE_FILE) \
+ $(LUA_FILTERS_PATH)/author-info-blocks/author-info-blocks.lua
+ pandoc $(PANDOC_WRITER_OPTIONS) \
+ $(PANDOC_DOCX_OPTIONS) \
+ --lua-filter=$(LUA_FILTERS_PATH)/author-info-blocks/author-info-blocks.lua \
+ --output $@ $<
+
+$(OUTFILE_PREFIX).odt: $(JSON_FILE) \
+ $(ODT_REFERENCE_FILE) \
+ $(LUA_FILTERS_PATH)/author-info-blocks/author-info-blocks.lua
+ pandoc $(PANDOC_WRITER_OPTIONS) \
+ $(PANDOC_ODT_OPTIONS) \
+ --lua-filter=$(LUA_FILTERS_PATH)/author-info-blocks/author-info-blocks.lua \
+ --output $@ $<
+
+$(OUTFILE_PREFIX).epub: $(JSON_FILE) \
+ $(TEMPLATE_FILE_EPUB) \
+ $(LUA_FILTERS_PATH)/author-info-blocks/author-info-blocks.lua
+ pandoc $(PANDOC_WRITER_OPTIONS) \
+ $(PANDOC_EPUB_OPTIONS) \
+ --lua-filter=$(LUA_FILTERS_PATH)/author-info-blocks/author-info-blocks.lua \
+ --output $@ $<
+
+$(OUTFILE_PREFIX).html: $(JSON_FILE) \
+ $(TEMPLATE_FILE_HTML) \
+ $(TEMPLATE_STYLE_HTML) \
+ $(PANDOC_SCHOLAR_PATH)/scholar-filters/template-helper.lua
+ pandoc $(PANDOC_WRITER_OPTIONS) \
+ $(PANDOC_HTML_OPTIONS) \
+ --lua-filter=$(PANDOC_SCHOLAR_PATH)/scholar-filters/template-helper.lua \
+ --css=$(TEMPLATE_STYLE_HTML) \
+ --self-contained \
+ --mathjax \
+ --output $@ $<
+
+$(OUTFILE_PREFIX).jsonld: $(JSON_FILE) \
+ $(BIBLIOGRAPHY_FILE) \
+ $(PANDOC_SCHOLAR_PATH)/scholar-filters/json-ld.lua \
+ $(PANDOC_SCHOLAR_PATH)/writers/jsonld.lua
+ pandoc --to $(PANDOC_SCHOLAR_PATH)/writers/jsonld.lua \
+ --metadata "bibliography:$(BIBLIOGRAPHY_FILE)" \
+ --lua-filter=$(PANDOC_SCHOLAR_PATH)/scholar-filters/json-ld.lua \
+ --output=$@ $<
+
+$(OUTFILE_PREFIX).txt: $(ARTICLE_FILE)
+ pandoc $(PANDOC_WRITER_OPTIONS) \
+ --output $@ $<
+
+## Advanced JATS support is temporarily disabled.
+$(OUTFILE_PREFIX).jats: $(JSON_FILE)
+ pandoc $(PANDOC_WRITER_OPTIONS) \
+ $(PANDOC_JATS_OPTIONS) \
+ --output $@ $<
+
+clean:
+ rm -f $(OUTFILE_PREFIX).*
+
+.PHONY: all clean
+
+# Include archive-generating targets. This makefile is not included in the
+# distributed archives
+-include archives.inc.mk
diff --git a/paper/README.md b/paper/README.md
new file mode 100644
index 0000000..abff1e0
--- /dev/null
+++ b/paper/README.md
@@ -0,0 +1,269 @@
+Pandoc Scholar
+==============
+
+[![release shield]](https://github.com/pandoc-scholar/pandoc-scholar/releases)
+[![DOI]](https://zenodo.org/badge/latestdoi/82204858)
+[![license shield]](./LICENSE)
+[![build status]](https://travis-ci.org/pandoc-scholar/pandoc-scholar)
+
+Create beautiful, semantically enriched articles with pandoc. This
+package provides utilities to make publishing of scientific articles as
+simple and pleasant as possible. It simplifies setting authors' metadata
+in YAML blocks, allows to add semantic annotation to citations, and only
+requires the programs pandoc and make.
+
+[release shield]: https://img.shields.io/github/release/pandoc-scholar/pandoc-scholar.svg
+[license shield]: https://img.shields.io/github/license/pandoc-scholar/pandoc-scholar.svg
+[build status]: https://img.shields.io/travis/pandoc-scholar/pandoc-scholar/master.svg
+[DOI]: https://zenodo.org/badge/82204858.svg
+[paper]: https://peerj.com/articles/cs-112/
+
+Overview
+--------
+
+Plain pandoc is already excellent at document conversion, but it lacks
+in metadata handling. Pandoc scholar offers simple ways to include
+metadata on authors, affiliations, contact details, and citations. The
+data is included into the final output as document headers. Additionally
+all entries can be exported as [JSON-LD], a standardized format for the
+semantic web.
+
+The background leading to the development of pandoc scholar is described
+in the [paper] published in PeerJ Computer Science.
+
+Note that since version 2.0, most of the functionality of pandoc Scholar
+is now provided via [pandoc Lua filters]. If you prefer to mix-and-match
+selected functionalities provided by pandoc scholar, you can now use the
+respective Lua filters directly. Integration with tools like RMarkdown
+is possible this way.
+
+[JSON-LD]: https://en.wikipedia.org/wiki/JSON-LD
+[pandoc Lua filters]: https://github.com/pandoc/lua-filters
+
+### Demo
+
+An example document plus bibliography is provided in the *example*
+folder. Running `make` in the project's root folder will process the
+example article, generating output like below:
+
+![example article screenshot](https://pandoc-scholar.github.io/example/header.png)
+
+Get the full output as [pdf], [docx], or [epub], or take a look at the
+metadata in [JSON-LD] format.
+
+[pdf]: https://pandoc-scholar.github.io/example/example.pdf
+[docx]: https://pandoc-scholar.github.io/example/example.docx
+[epub]: https://pandoc-scholar.github.io/example/example.epub
+[JSON-LD]: https://pandoc-scholar.github.io/example/example.jsonld
+
+
+Prerequisites
+-------------
+
+This package builds on [pandoc](http://pandoc.org/), the universal
+document converter, version 2.1 or later. See the pandoc website for
+[installation instructions](http://pandoc.org/installing.html) and
+suggestions for LaTeX packages, which we use for PDF generation.
+
+
+Installation
+------------
+
+Archives containing all required files are provided for each release.
+Use the *release* button above (or directly go to the [latest release])
+and download a `pandoc-scholar` archive; both archive files, `.zip` and
+`.tar.gz`, contain the same files. Choose the filetype that is the
+easiest to unpack on you system.
+
+A `pandoc-scholar` folder will be created on unpacking. The folder
+contains all required scripts and templates.
+
+[latest release]: https://github.com/pandoc-scholar/pandoc-scholar/releases/latest
+
+
+Usage
+-----
+
+### Quickstart
+
+Run `make` to convert the example article into all supported output formats. The
+markdown file used to create the output files can be configured via the
+`ARTICLE_FILE` variable, either directly in the Makefile or by specifying the
+value on the command line.
+
+ make ARTICLE_FILE=your-file.md
+
+### Includable Makefile
+
+The *Makefile*, which does most of the work, is written in a style that makes it
+simple to include it from within other Makefiles. This method allows to keep
+`pandoc-scholar` installed in a central location and to use the same instance
+for multiple projects. The `ARTICLE_FILE` and `PANDOC_SCHOLAR_PATH` variables
+must be defined in the including Makefile:
+
+``` Makefile
+ARTICLE_FILE = your-file.md
+PANDOC_SCHOLAR_PATH = ../path-to-pandoc-scholar-folder
+include $(PANDOC_SCHOLAR_PATH)/Makefile
+```
+
+Calling `make` as usual will create all configured output formats. Per default,
+this creates *pdf*, *latex*, *docx*, *odt*, *epub*, *html*, and *jats* output.
+The set of output files can be reduced by setting the `DEFAULT_EXTENSIONS`
+variable to a subset of the aforementioned formats.
+
+Alternative template files can be set using `TEMPLATE_FILE_<FORMAT>` variables,
+where `<FORMAT>` is one of *HTML*, *EPUB*, *JATS*, or *LATEX*. The reference
+files for ODT and DOCX output can be changed using `ODT_REFERENCE_FILE` and
+`DOCX_REFERENCE_FILE`, respectively.
+
+Additional pandoc options can be given on a per-format basis using
+`PANDOC_<FORMAT>_OPTIONS` variables. The following uses an actual Makefile as an
+example to demonstrate usage of those options.
+
+``` Makefile
+ARTICLE_FILE = open-science-formatting.md
+
+PANDOC_LATEX_OPTIONS = --latex-engine=xelatex
+PANDOC_LATEX_OPTIONS += --csl=peerj.csl
+PANDOC_LATEX_OPTIONS += --filter=pandoc-citeproc
+PANDOC_LATEX_OPTIONS += -M fontsize=10pt
+PANDOC_LATEX_OPTIONS += -M classoption=fleqn
+
+PANDOC_HTML_OPTIONS = --toc
+PANDOC_EPUB_OPTIONS = --toc
+
+DOCX_REFERENCE_FILE = pandoc-manuscript.docx
+ODT_REFERENCE_FILE = pandoc-manuscript.odt
+TEMPLATE_FILE_LATEX = pandoc-peerj.latex
+
+PANDOC_SCHOLAR_PATH = pandoc-scholar
+include $(PANDOC_SCHOLAR_PATH)/Makefile
+```
+
+
+Metadata Features
+-----------------
+
+Pandoc scholar supports additional functionality via metadata fields. Most
+notably, the augmentation of articles with author and affiliation data, which is
+essential for academic publishing, is greatly simplified when using pandoc
+scholar.
+
+### Authors and affiliations
+
+Most metadata should be specified in the YAML block at the top of the article.
+Author data and affiliations are taken from the *author* and *institute* field,
+respectively. Institutes can be given via user-defined abbreviations, saving
+unnecessary repetitions while preserving readability.
+
+Example:
+
+``` yaml
+author:
+ - James Dewey Watson:
+ institute: cavendish
+ - Francis Harry Compton Crick:
+ institute: cavendish
+institute:
+ - cavendish: Cavendish Laboratory, Cambridge
+```
+
+Authors are given in the order in which they are listed, while institute order
+follows from author order.
+
+The separate institute field may add unwanted complexity in some cases. It is
+hence possible to omit it and to give the affiliations name directly in the
+author entry:
+
+``` yaml
+author:
+ - John MacFarlane:
+ institute: University of California, Berkeley
+```
+
+### Institute address
+
+Often it is not enough to give just a name for institutes. It is hence possible
+to add arbitrary fields. The name must then explicitly be set via the *name*
+field of the institute entry:
+
+``` yaml
+author:
+ - Robert Winkler:
+ institute: cinvestav
+institute:
+ - cinvestav:
+ name: 'CINVESTAV Unidad Irapuato, Department of Biochemistry and Biotechnology'
+ address: 'Km. 9.6 Libramiento Norte Carr. Irapuato-León, 36821 Irapuato Gto. México'
+ phone: +52 (462) 623 9635
+```
+
+Currently only the institute's address is used in the default template, but
+future extensions will be based on this convention.
+
+### Semantic citations
+
+Understanding the reason a citations is included in scholarly articles usually
+requires natural language processing of the article. However, navigating the
+current literature landscape can be improved and by having that information
+accesible and in a machine-readable form. Pandoc scholar supports the CiTO
+ontology, allowing authors to specify important meta-information on the citation
+directly while writing the text. The property is simply prepended to the
+citation key, separated by a colon: `@<property>:citationKey`.
+
+The following table contains all supported keywords and the respective
+CiTO properties. Authors are free to use the short-form, the full-length
+property, or any of the alternatives listed below (i.e., all word in a
+row denote the property and have the same effect).
+
+CiTO property | Keyword | alternatives
+------------------------------ | ------------------- | ---------------------
+agrees\_with | agrees\_with | agree\_with
+citation | |
+cites | |
+cites\_as\_authority | authority | as\_authority
+cites\_as\_data\_source | data\_source | as\_data_source
+cites\_as\_evidence | evidence | as\_evidence
+cites\_as\_metadata\_document | metadata | as\_metadata_document
+cites\_as\_recommended_reading | recommended_reading | as\_recommended\_reading
+disputes | |
+documents | |
+extends | |
+includes\_excerpt\_from | excerpt | excerpt\_from
+includes\_quotation\_from | quotation | quotation\_from
+obtaines\_background\_from | background | background\_from
+refutes | |
+replies\_to | |
+updates | |
+uses\_data\_from | data\_from | data
+uses\_method\_in | method | method\_in
+
+Example:
+
+ DNA strands form a double-helix [@evidence:watson_crick_1953].
+
+
+License
+-------
+
+Copyright © 2016–2018 Albert Krewinkel and Robert Winkler except for the
+following components:
+
+- HTML template: © 2016 Andrew G. York and Diana Mounter
+- dkjson: © 2010-2013 David Heiko Kolf
+- lua-filters: © 2017-2018 John MacFarlane, Albert Krewinkel, Jesse Rosenthal,
+ and Greg Tucker-Kellogg
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
+Street, Fifth Floor, Boston, MA 02110-1301 USA.
diff --git a/paper/bibliography.bib b/paper/bibliography.bib
new file mode 100644
index 0000000..7c660ea
--- /dev/null
+++ b/paper/bibliography.bib
@@ -0,0 +1,366 @@
+@Article{Kazius2005,
+ author = "Kazius, J. and McGuire, R. and Bursi, R.",
+ year = 2005,
+ title = "Derivation and validation of toxicophores for mutagenicity prediction",
+ journal = "J Med Chem",
+ number = 48,
+ pages = "312-20",
+}
+
+@article{Hansen2009,
+ author = {Hansen, Katja and Mika, Sebastian and Schroeter, Timon and Sutter, Andreas and ter Laak, Antonius and Steger-Hartmann, Thomas and Heinrich, Nikolaus and Müller, Klaus-Robert},
+ title = {Benchmark Data Set for in Silico Prediction of Ames Mutagenicity},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {9},
+ pages = {2077-2081},
+ year = {2009},
+ doi = {10.1021/ci900161g},
+ note ={PMID: 19702240},
+ URL = { https://doi.org/10.1021/ci900161g },
+ eprint = { https://doi.org/10.1021/ci900161g }
+}
+
+@article{Yap2011,
+ author = "Yap, CW.",
+ year = 2011,
+ title = "PaDEL-descriptor: an open source software to calculate molecular descriptors and fingerprints",
+ journal = "Journal of computational chemistry",
+ number = 32,
+ pages = "1466-74"
+}
+
+@Article{Bender2004,
+ author = "Andreas Bender and Hamse Y. Mussa and Robert C.
+ Glen and Stephan Reiling",
+ title = "Molecular Similarity Searching Using Atom
+ Environments, Information-Based Feature Selection, and
+ a Naïve Bayesian Classifier",
+ journal = "Journal of Chemical Information and Computer
+ Sciences",
+ volume = "44",
+ number = "1",
+ pages = "170--178",
+ year = "2004",
+ DOI = "10.1021/ci034207y",
+ note = "PMID: 14741025",
+ URL = "http://dx.doi.org/10.1021/ci034207y",
+ eprint = "http://dx.doi.org/10.1021/ci034207y",
+}
+
+@article{OBoyle2011a,
+ abstract = {{BACKGROUND: A frequent problem in computational modeling is the interconversion of chemical structures between different formats. While standard interchange formats exist (for example, Chemical Markup Language) and de facto standards have arisen (for example, SMILES format), the need to interconvert formats is a continuing problem due to the multitude of different application areas for chemistry data, differences in the data stored by different formats (0D versus 3D, for example), and competition between software along with a lack of vendor-neutral formats.RESULTS: We discuss, for the first time, Open Babel, an open-source chemical toolbox that speaks the many languages of chemical data. Open Babel version 2.3 interconverts over 110 formats. The need to represent such a wide variety of chemical and molecular data requires a library that implements a wide range of cheminformatics algorithms, from partial charge assignment and aromaticity detection, to bond order perception and canonicalization. We detail the implementation of Open Babel, describe key advances in the 2.3 release, and outline a variety of uses both in terms of software products and scientific research, including applications far beyond simple format interconversion.CONCLUSIONS: Open Babel presents a solution to the proliferation of multiple chemical file formats. In addition, it provides a variety of useful utilities from conformer searching and 2D depiction, to filtering, batch conversion, and substructure and similarity searching. For developers, it can be used as a programming library to handle chemical data in areas such as organic chemistry, drug design, materials science, and computational chemistry. It is freely available under an open-source license from http://openbabel.org.}},
+ added-at = {2019-03-11T21:00:05.000+0100},
+ author = {O'Boyle, Noel and Banck, Michael and James, Craig and Morley, Chris and Vandermeersch, Tim and Hutchison, Geoffrey},
+ biburl = {https://www.bibsonomy.org/bibtex/27ab2699fef73132efcfa6853c3031bf0/fairybasslet},
+ booktitle = {Journal of Cheminformatics},
+ citeulike-article-id = {9866193},
+ citeulike-linkout-0 = {http://dx.doi.org/doi:10.1186/1758-2946-3-33},
+ citeulike-linkout-1 = {http://www.jcheminf.com/content/3/1/33},
+ citeulike-linkout-2 = {http://dx.doi.org/10.1186/1758-2946-3-33},
+ citeulike-linkout-3 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3198950/},
+ citeulike-linkout-4 = {http://view.ncbi.nlm.nih.gov/pubmed/21982300},
+ citeulike-linkout-5 = {http://www.hubmed.org/display.cgi?uids=21982300},
+ citeulike-linkout-6 = {http://link.springer.com/article/10.1186/1758-2946-3-33},
+ day = 07,
+ doi = {doi:10.1186/1758-2946-3-33},
+ interhash = {c20842ab14c8a3bbd2dcf3e8072b82d1},
+ intrahash = {7ab2699fef73132efcfa6853c3031bf0},
+ issn = {1758-2946},
+ journal = {J. Cheminf.},
+ keywords = {chemical-file-formats computer-program cpsst open-babel open-source software-library toolkit},
+ month = oct,
+ number = 1,
+ pages = 33,
+ pdf = {file:///H:/publications/OBoyle2011a.pdf},
+ pmcid = {PMC3198950},
+ pmid = {21982300},
+ posted-at = {2011-11-08 10:20:24},
+ priority = {2},
+ publisher = {Chemistry Central Ltd},
+ timestamp = {2019-03-11T21:06:37.000+0100},
+ title = {{Open Babel: An open chemical toolbox}},
+ url = {http://www.jcheminf.com/content/3/1/33},
+ volume = 3,
+ year = 2011
+}
+
+@article{Rücker2007,
+ author = "Rücker, C and Rücker, G and Meringer, M.",
+ year = 2007,
+ title = "y-Randomization and Its Variants in QSPR/QSAR",
+ journal = "J. Chem. Inf. Model.",
+ number = 47,
+ pages = "2345-57"
+}
+
+@Article{Maunz2013,
+ DOI = "10.3389/fphar.2013.00038",
+ URL = "http://dx.doi.org/10.3389/fphar.2013.00038",
+ year = "2013",
+ publisher = "Frontiers Media {SA}",
+ volume = "4",
+ author = "Andreas Maunz and Martin G{\"{u}}tlein and Micha
+ Rautenberg and David Vorgrimmler and Denis Gebele and
+ Christoph Helma",
+ title = "lazar: a modular predictive toxicology framework",
+ journal = "Frontiers in Pharmacology",
+}
+
+@Article{doi:10.1021/ci00057a005,
+ author = "David Weininger",
+ title = "SMILES, a chemical language and information system. 1.
+ Introduction to methodology and encoding rules",
+ journal = "Journal of Chemical Information and Computer
+ Sciences",
+ volume = "28",
+ number = "1",
+ pages = "31--36",
+ year = "1988",
+ DOI = "10.1021/ci00057a005",
+ URL = "http://dx.doi.org/10.1021/ci00057a005",
+ eprint = "http://dx.doi.org/10.1021/ci00057a005",
+}
+
+@Article{OBoyle2011,
+ DOI = "10.1186/1758-2946-3-33",
+ URL = "http://dx.doi.org/10.1186/1758-2946-3-33",
+ year = "2011",
+ publisher = "Springer Science and Business Media",
+ volume = "3",
+ number = "1",
+ pages = "33",
+ author = "Noel M OBoyle and Michael Banck and Craig A James and
+ Chris Morley and Tim Vandermeersch and Geoffrey R
+ Hutchison",
+ title = "Open Babel: An open chemical toolbox",
+ journal = "Journal of Cheminformatics",
+}
+
+@Article{mazzatorta08,
+ author = "Paolo Mazzatorta and Manuel Dominguez Estevez and
+ Myriam Coulet and Benoit Schilter",
+ title = "Modeling Oral Rat Chronic Toxicity",
+ journal = "Journal of Chemical Information and Modeling",
+ volume = "48",
+ number = "10",
+ pages = "1949--1954",
+ year = "2008",
+ DOI = "10.1021/ci8001974",
+ note = "PMID: 18803370",
+ URL = "http://dx.doi.org/10.1021/ci8001974",
+ eprint = "http://dx.doi.org/10.1021/ci8001974",
+}
+
+@Manual{pls,
+ title = "pls: Partial Least Squares and Principal Component
+ Regression",
+ author = "Bjørn-Helge Mevik and Ron Wehrens and Kristian Hovde
+ Liland",
+ year = "2015",
+ note = "R package version 2.5-0",
+ URL = "https://CRAN.R-project.org/package=pls",
+}
+
+@Article{Kuhn08,
+ author = "Max Kuhn",
+ title = "Building predictive models in R using the caret
+ package",
+ journal = "J. of Stat. Soft",
+ year = "2008",
+}
+
+@Article{Jeliazkova15,
+ author = "Nina Jeliazkova and Charalampos Chomenidis and Philip
+ Doganis and Bengt Fadeel and Roland Grafström and
+ Barry Hardy and Janna Hastings and Markus Hegi and
+ Vedrin Jeliazkov and Nikolay Kochev and Pekka Kohonen
+ and Cristian R. Munteanu and Haralambos Sarimveis and
+ Bart Smeets and Pantelis Sopasakis and Georgia Tsiliki
+ and David Vorgrimmler and Egon Willighagen",
+ title = "The eNanoMapper database for nanomaterial safety
+ information",
+ journal = "Beilstein J. Nanotechnol.",
+ pages = "1609–1634",
+ number = "6",
+ year = "2015",
+ DOI = "doi:10.3762/bjnano.6.165",
+}
+
+@TechReport{Fowler2011,
+ author = "B. Fowler and S. Savage and B. Mendez",
+ year = "2011",
+ title = "White paper: Protecting public health in the 21st
+ century: the case for computational toxicology",
+ institution = "ICF International, Inc.icfi.com.",
+}
+
+@Article{Cotterill2008,
+ author = "Cotterill, J.V. and Chaudry, M.Q. and Mattews, W. and R.
+ W. Watkins",
+ year = "2008",
+ title = "In silico assessment of toxicity of heat-generated
+ food contaminants",
+ journal = "Food Chemical Toxicology",
+ number = "46(6)",
+ pages = "1905--1918",
+}
+
+@Article{Grob2006,
+ author = "Grob, K. and Biedermann, M. and Scherbaum, E. and Roth, M.
+ and K. Rieger",
+ year = "2006",
+ title = "Food contamination with organic materials in
+ perspective: packaging materials as the largest and
+ least controlled source? A view focusing on the
+ European situation",
+ journal = "Crit. Rev. Food. Sci. Nutr.",
+ number = "46",
+ pages = "529--35",
+ DOI = "10.1080/10408390500295490",
+}
+
+@Article{LoPiparo2011,
+ author = "Lo Piparo, E. and Worth, A. and Manibusan, A. and Yang, C. and
+ Schilter, B. and Mazzatorta, P. and Jacobs, M.N. and
+ Steinkelner, H. and L. Mohimont",
+ year = "2011",
+ title = "Use of Computational tools in the field of food
+ safety",
+ journal = "Regulatory Toxicology and Pharmacology",
+ number = "60(3)",
+ pages = "354--362",
+}
+
+@Article{LoPiparo2014,
+ author = "Lo Piparo, E. and Maunz, A. and Helma, C. and Vorgrimmler, D. and Schilter, B.",
+ year = "2014",
+ title = "Automated and reproducible read-across like models for
+ predicting carcinogenic potency",
+ journal = "Regulatory Toxicology and Pharmacology",
+ number = "70",
+ pages = "370--378",
+}
+
+@Article{Schilter2014,
+ author = "Schilter, B. and Benigni, R. and Boobis, A. and Chiodini, A. and
+ Cockburn, A. and Cronin, M.T. and Lo Piparo, E. and Modi, S. and
+ Thiel A. and A. Worth",
+ title = "Establishing the level of safety concern for chemicals
+ in food without the need for toxicity testing",
+ year = "2014",
+ journal = "Regulatory Toxicology and Pharmacology",
+ number = "68",
+ pages = "275--298",
+}
+
+@Article{Stanton2016,
+ author = "Stanton, K. and F.H. Krusezewski",
+ year = "2016",
+ title = "Quantifying the benefits of using read-across and in
+ silico techniques to fullfill hazard data requirements
+ for chemical categories",
+ journal = "Regulatory Toxicology and Pharmacology",
+ number = "81",
+ pages = "250--259",
+ DOI = "10.1016/j-yrtph.2016.09.004.",
+}
+
+@Article{Zarn2011,
+ author = "Zarn, J.A. and B.E. Engeli and J.R. Schlatter",
+ year = "2011",
+ title = "Study parameters influencing {NOAEL} and {LOAEL} in
+ toxicity feeding studies for pesticides: exposure
+ duration versus dose decrement, dose spacing, group
+ size and chemical class",
+ journal = "Regul. Toxicol. Pharmacol.",
+ number = "61",
+ pages = "243--250",
+}
+
+@Article{Zarn2013,
+ author = "J.A. Zarn and B.E. Engeli and J.R. Schlatter",
+ year = "2013",
+ title = "Characterization of the dose decrement in regulatory
+ rat pesticide toxicity feeding studies",
+ journal = "Regul. Toxicol. Pharmacol.",
+ number = "67",
+ pages = "215--220",
+}
+
+@Article{EFSA2016,
+ author = {EFSA},
+ year = "2016",
+ title = "Guidance on the establishment of the residue
+ definition for dietary assessment: {EFSA} panel on Plant
+ Protect Products and their Residues ({PPR})",
+ journal = "EFSA Journal",
+ number = "14",
+ pages = "1--12",
+}
+
+@TechReport{ECHA2008,
+ author = "{ECHA}",
+ year = "2008",
+ title = "Guidance on information requirements and chemical
+ safety assessment, Chapter R.6: {QSARs} and grouping of
+ chemicals",
+ institution = "ECHA",
+}
+
+@Misc{EFSA2014,
+ author = "{EFSA}",
+ year = "2014",
+ title = "Rapporteur Member State assessment reports submitted
+ for the {EU} peer review of active substances used in
+ plant protection products",
+ URL = "http://dar.efsa.europa.eu/dar-web/provision",
+ note = "accessed 8.1.2015",
+}
+
+@Misc{HealthCanada2016,
+ author = "{Health Canada}",
+ year = "2016",
+ URL = "https://www.canada.ca/en/health-canada/services/chemical-substances/chemicals-management-plan.html",
+}
+
+@InCollection{OECD2015,
+ author = "{OECD}",
+ year = "2015",
+ title = "Fundamental and guiding principles for {(Q)SAR} analysis
+ of chemicals carcinogens with mechanistic
+ considerations Monograph 229 {ENV/JM/MONO(2015)46}",
+ chapter = "229",
+ booktitle = "Series on Testing and Assessment No 229",
+}
+
+@InCollection{Schilter2013,
+ author = "Schilter, B. and Constable, A. and Perrin, I.",
+ title = "Naturally occurring toxicants of plant origin: risk
+ assessment and management considerations",
+ year = "2013",
+ booktitle = "Food Safety Management: a practical guide for
+ industry",
+ chapter = "3",
+ editor = "Y. Motarjemi",
+ publisher = "Elsevier",
+ pages = "45--57",
+}
+
+@Misc{EPA2011,
+ title = "Fact Sheets on New Active Ingredients",
+ author = "{US EPA}",
+ year = "2011",
+ note = "this database was not further maintained by US EPA.
+ Only data used until 27.11.2014, accessed 27.4.11
+ A.D.",
+}
+
+@TechReport{WHO2011,
+ author = "WHO",
+ year = "2011",
+ title = "Joint {FAO/WHO} Meeting on Pesticide Residues ({JMPR})
+ publications",
+ URL = "http://www.who.int/foodsafety/publications/jmpr-monographs/en/",
+ note = "accessed 20.3.15 A.D.",
+}
diff --git a/paper/example/article.md b/paper/example/article.md
new file mode 100644
index 0000000..06061c3
--- /dev/null
+++ b/paper/example/article.md
@@ -0,0 +1,136 @@
+---
+title: Example article written in pandoc-flavored Markdown
+author:
+ - Jane Doe:
+ institute:
+ - fosg
+ - fop
+ email: jane.doe@example.com
+ orcid: 0000-0000-0000-0000
+ equal_contributor: "yes"
+ correspondence: "yes"
+ - John Q. Doe:
+ institute: fosg
+ equal_contributor: "yes"
+ - Peder Ås:
+ institute: fosg
+ - Juan Pérez:
+ institute: acme
+ email: juan.perez@example.edu
+ correspondence: "yes"
+institute:
+ - fosg:
+ name: Formatting Open Science Group
+ address: 23 Science Street, Eureka, Mississippi, USA
+ phone: +1 (555) 423 1338
+ email: '{firstname}.{lastname}\@fosg.example.com'
+ - fop: Federation of Planets
+ - acme:
+ name: Acme Corporation
+bibliography: example/bibliography.bib
+project:
+ title: Pandoc Scholar Example
+ zip-url: https://github.com/pandoc-scholar/pandoc-scholar/releases
+ github-url: https://github.com/pandoc-scholar/pandoc-scholar/
+...
+
+# Abstract
+
+This is an example article. There is not much to see but filler text.
+
+
+# Further reading
+
+See the [pandoc manual](http://pandoc.org/MANUAL.html) for more information on
+pandoc.
+
+Authors struggling to fill this document with content are referred to
+@Upper_writers_1974.
+
+
+# Lorem Ipsum
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+
+
+## Dolor sit amet
+
+Deserunt excepturi commodi sit qui velit quis. Delectus sit omnis culpa
+accusamus repellat iusto vel. Quod deserunt quasi nisi dolor. Quo eum id
+reiciendis dolor. Est qui illum et.
+
+Quo dolore molestiae et laboriosam occaecati explicabo corrupti. Earum expedita
+ducimus quaerat est quam ut molestiae. Illum deleniti vel labore facilis et cum
+est. Est nemo est vel ad. Assumenda consequatur rerum officiis atque officia.
+Est nihil iste cumque ad qui.
+
+Eaque sed sit totam enim. Et explicabo illum rerum aut. Aspernatur sit dolor
+animi tempora cum. Maxime in soluta aut. Explicabo id maiores voluptates aut
+voluptas id. Dolore sed labore voluptatem omnis doloribus mollitia aliquid
+cupiditate.
+
+In rerum saepe placeat. Deleniti suscipit sed quam fugit assumenda sit et
+tempora. Veniam illum expedita quia error qui quibusdam rerum. Qui ut sunt est
+eos.
+
+Recusandae et sit ut. Impedit deserunt consequatur et dignissimos vel et.
+Eveniet voluptatem magni quis est dolore excepturi officia nihil. Debitis quae
+commodi error.
+
+Modi debitis et ut saepe saepe dolorem. Quis sed autem expedita est voluptate
+esse neque. Quod aspernatur quam velit placeat nihil omnis debitis. Corporis sit
+rerum consectetur possimus rerum consequuntur. Rerum quas ut repellendus
+tenetur. Consequuntur adipisci dolores eveniet qui est ipsum.
+
+Atque deserunt necessitatibus unde facere amet molestiae. Ipsam at quia placeat
+aliquam autem. Enim corporis accusamus consequatur.
+
+Et vitae unde perferendis tenetur cupiditate non exercitationem. Aut molestiae
+sed est. Deserunt repudiandae non quia esse ad vitae vel in. Et reprehenderit
+dolore et aut distinctio.
+
+Vel quia molestiae quod sint fuga omnis est fuga. Minus quaerat repellat quod.
+Rerum rerum enim repellendus rerum consequatur non perspiciatis. Illo sapiente
+sed natus ipsa quia temporibus. Est nostrum fugit odio non voluptatem odit
+rerum. Et consequatur aut nostrum accusamus earum.
+
+Sit explicabo iure eligendi consequatur. Consequatur atque praesentium
+consequatur dolores quam. Neque eius provident harum placeat. Quo aut pariatur
+illum laborum porro minima. Dolorem nobis esse laudantium. Perspiciatis
+voluptate deleniti voluptatem et.
+
+Quos assumenda magnam non inventore. Adipisci repellendus eligendi possimus
+voluptate numquam voluptatem natus. Deleniti cupiditate facilis commodi aliquid
+voluptatem laudantium autem similique. Vel sunt cupiditate consequatur. Dolorum
+voluptatem nihil culpa fugiat non itaque animi iusto. Unde incidunt numquam
+vitae.
+
+Eius provident voluptatem animi quidem quia. Velit omnis voluptas atque.
+Voluptatem accusamus atque blanditiis commodi aspernatur ullam ad. Nulla quidem
+fugiat explicabo quo dolor hic.
+
+Rerum dolore quo ratione sed aspernatur doloremque. Ut neque laudantium quae
+enim dolores et. Laudantium dolores id assumenda autem aspernatur. Accusamus
+doloribus nihil rerum et atque est aut delectus.
+
+Nulla itaque mollitia vitae accusamus. Eveniet soluta praesentium dolore harum
+culpa. Totam voluptatem non aspernatur.
+
+Eveniet in illo consequatur. Fugiat et totam unde nihil quis. Non et velit
+recusandae blanditiis unde. Eaque fugiat id pariatur. Non numquam minima aut.
+Iste eos et autem et exercitationem velit officiis vero.
+
+Ullam minima quisquam est ducimus iste. Commodi occaecati inventore provident
+voluptatem repudiandae. Quia est qui dolore sit nisi officia doloremque dolor.
+Perspiciatis tempore laudantium quia repellendus quia deleniti. Sed consequuntur
+autem quisquam aliquam.
+
+Ut dolores natus et sunt delectus nulla. Ipsum eum quia ex est ut quia. Ratione
+et eius consequatur veritatis hic expedita ea.
+
+# References
diff --git a/paper/example/bibliography.bib b/paper/example/bibliography.bib
new file mode 100644
index 0000000..876eea1
--- /dev/null
+++ b/paper/example/bibliography.bib
@@ -0,0 +1,13 @@
+@article {Upper_writers_1974,
+ author = {Upper, Dennis},
+ title = {The unsuccessful self-treatment of a case of “writer's block”},
+ journal = {Journal of Applied Behavior Analysis},
+ volume = {7},
+ number = {3},
+ publisher = {Blackwell Publishing Ltd},
+ issn = {1938-3703},
+ url = {http://dx.doi.org/10.1901/jaba.1974.7-497a},
+ doi = {10.1901/jaba.1974.7-497a},
+ pages = {497--497},
+ year = {1974},
+}
diff --git a/paper/lua-filters/.travis.yml b/paper/lua-filters/.travis.yml
new file mode 100644
index 0000000..196eb28
--- /dev/null
+++ b/paper/lua-filters/.travis.yml
@@ -0,0 +1,55 @@
+# Use new container infrastructure to enable caching
+sudo: false
+
+# Do not choose a language; we provide our own build tools.
+language: minimal
+
+addons:
+ apt:
+ packages:
+ - aspell
+ - aspell-fr
+ - aspell-en
+ - imagemagick
+ - default-jre
+ - graphviz
+ - inkscape
+ - python3
+ - python3-pip
+ - python3-tk
+ - python3-numpy
+ - python3-matplotlib
+ - latex-xcolor
+ - lmodern
+ - texlive-bibtex-extra
+ - texlive-fonts-recommended
+ - texlive-generic-recommended
+ - texlive-latex-recommended
+ - texlive-latex-extra
+ - pgf
+
+before_install:
+- unset CC
+# Download and unpack the pandoc binary
+- |
+ pushd $HOME
+ RELEASES_URL='https://github.com/jgm/pandoc/releases'
+ export PANDOCVERSION=$(curl -I "$RELEASES_URL/latest" | sed -ne 's#Location:.*tag/\(.*\)$#\1#p' | tr -d "\n\r")
+ echo $PANDOCVERSION
+ wget $RELEASES_URL/download/$PANDOCVERSION/pandoc-$PANDOCVERSION-linux.tar.gz
+ tar xvzf pandoc-$PANDOCVERSION-linux.tar.gz
+ popd
+# Download plantuml.jar for plantuml filter
+- |
+ pushd $HOME
+ wget http://sourceforge.net/projects/plantuml/files/plantuml.jar
+ popd
+- export PATH=$HOME/pandoc-$PANDOCVERSION/bin:$PATH
+- export PLANTUML=$HOME/plantuml.jar
+
+install: []
+
+script:
+- |
+ make test
+
diff --git a/paper/lua-filters/LICENSE b/paper/lua-filters/LICENSE
new file mode 100644
index 0000000..720865c
--- /dev/null
+++ b/paper/lua-filters/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017-2019 John MacFarlane and contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/paper/lua-filters/Makefile b/paper/lua-filters/Makefile
new file mode 100644
index 0000000..0de553d
--- /dev/null
+++ b/paper/lua-filters/Makefile
@@ -0,0 +1,5 @@
+FILTERS=$(wildcard $(shell find * -type d | grep -v '[/\\]'))
+.PHONY: test
+
+test:
+ bash runtests.sh $(FILTERS)
diff --git a/paper/lua-filters/README.md b/paper/lua-filters/README.md
new file mode 100644
index 0000000..736ed8f
--- /dev/null
+++ b/paper/lua-filters/README.md
@@ -0,0 +1,21 @@
+[![travis build
+status](https://img.shields.io/travis/pandoc/lua-filters/master.svg?label=travis+build)](https://travis-ci.org/pandoc/lua-filters)
+
+# Lua Filters
+
+This repository collects Lua filters for pandoc.
+
+To learn about Lua filters, see the
+[documentation](http://pandoc.org/lua-filters.html).
+
+Each filter goes in its own subdirectory. Each subdirectory contains:
+
+- the filter itself (e.g. `wordcount.lua`)
+- a `README.md` describing the use of the filter
+- a `Makefile` with a `test` target to test the filter
+- some data files used for the tests, which may also serve
+ as examples
+
+PRs for new filters are welcome, provided they conform to
+these guidelines.
+
diff --git a/paper/lua-filters/abstract-to-meta/Makefile b/paper/lua-filters/abstract-to-meta/Makefile
new file mode 100644
index 0000000..1ea3124
--- /dev/null
+++ b/paper/lua-filters/abstract-to-meta/Makefile
@@ -0,0 +1,8 @@
+test: sample.md abstract-to-meta.lua
+ @pandoc --lua-filter=abstract-to-meta.lua --standalone --to=markdown $< \
+ | diff -u expected.md -
+
+expected.md: sample.md abstract-to-meta.lua
+ pandoc --lua-filter=abstract-to-meta.lua --standalone --output $@ $<
+
+.PHONY: test
diff --git a/paper/lua-filters/abstract-to-meta/README.md b/paper/lua-filters/abstract-to-meta/README.md
new file mode 100644
index 0000000..42c0d29
--- /dev/null
+++ b/paper/lua-filters/abstract-to-meta/README.md
@@ -0,0 +1,37 @@
+# abstract-to-meta
+
+This moves a document's abstract from the main text into the
+metadata. Metadata elements usually allow for finer placement
+control in the final output, but writing body text is easier and
+more natural.
+
+## Defining an Abstract
+
+A document abstract can either be put directly in the document
+metadata, for example by inserting an *abstract* attribute into a
+YAML block.
+
+ ---
+ abstract: |
+ Place abstract here.
+
+ Multiple paragraphs are possible.
+ ---
+
+The additional indentation and formatting requirements in YAML
+headers can be confusing or annoying for authors. It is hence
+preferable to allow abstracts be written as normal sections.
+
+ # Abstract
+
+ Place abstract here.
+
+ Multiple paragraphs are possible.
+
+This filter turns the latter into the former by looking for a
+top-level header whose ID is `abstract`. Pandoc auto-creates IDs
+based on header contents, so a header titled *Abstract* will
+satisfy this condition.^[1]
+
+[1]: This requires the `auto_identifier` extension. It is
+ enabled by default.
diff --git a/paper/lua-filters/abstract-to-meta/abstract-to-meta.lua b/paper/lua-filters/abstract-to-meta/abstract-to-meta.lua
new file mode 100644
index 0000000..dbf9b03
--- /dev/null
+++ b/paper/lua-filters/abstract-to-meta/abstract-to-meta.lua
@@ -0,0 +1,23 @@
+local looking_at_abstract = false
+local abstract = {}
+
+function Block (elem)
+ if looking_at_abstract then
+ abstract[#abstract + 1] = elem
+ return {}
+ end
+end
+
+function Header (elem)
+ if elem.level == 1 and elem.identifier == 'abstract' then
+ looking_at_abstract = true
+ return {}
+ else
+ looking_at_abstract = looking_at_abstract and elem.level ~= 1
+ end
+end
+
+function Meta (meta)
+ meta.abstract = meta.abstract or pandoc.MetaBlocks(abstract)
+ return meta
+end
diff --git a/paper/lua-filters/abstract-to-meta/expected.md b/paper/lua-filters/abstract-to-meta/expected.md
new file mode 100644
index 0000000..638e268
--- /dev/null
+++ b/paper/lua-filters/abstract-to-meta/expected.md
@@ -0,0 +1,19 @@
+---
+abstract: |
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
+ tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim
+ veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea
+ commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
+ velit esse cillum dolore eu fugiat nulla pariatur.
+
+ Excepteur sint occaecat cupidatat non proident, sunt in culpa qui
+ officia deserunt mollit anim id est laborum.
+---
+
+Lorem Ipsum
+===========
+
+Quo dolore molestiae et laboriosam occaecati explicabo corrupti. Earum
+expedita ducimus quaerat est quam ut molestiae. Illum deleniti vel
+labore facilis et cum est. Est nemo est vel ad. Assumenda consequatur
+rerum officiis atque officia. Est nihil iste cumque ad qui.
diff --git a/paper/lua-filters/abstract-to-meta/sample.md b/paper/lua-filters/abstract-to-meta/sample.md
new file mode 100644
index 0000000..27aabcd
--- /dev/null
+++ b/paper/lua-filters/abstract-to-meta/sample.md
@@ -0,0 +1,17 @@
+# Abstract
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur.
+
+Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia
+deserunt mollit anim id est laborum.
+
+# Lorem Ipsum
+
+Quo dolore molestiae et laboriosam occaecati explicabo corrupti. Earum expedita
+ducimus quaerat est quam ut molestiae. Illum deleniti vel labore facilis et cum
+est. Est nemo est vel ad. Assumenda consequatur rerum officiis atque officia.
+Est nihil iste cumque ad qui.
diff --git a/paper/lua-filters/author-info-blocks/Makefile b/paper/lua-filters/author-info-blocks/Makefile
new file mode 100644
index 0000000..341c44d
--- /dev/null
+++ b/paper/lua-filters/author-info-blocks/Makefile
@@ -0,0 +1,8 @@
+test: sample.md author-info-blocks.lua
+ @pandoc --lua-filter=author-info-blocks.lua --standalone --to=native $< \
+ | diff -u expected.native -
+
+expected.native: sample.md author-info-blocks.lua
+ pandoc --lua-filter=author-info-blocks.lua --standalone --output $@ $<
+
+.PHONY: test
diff --git a/paper/lua-filters/author-info-blocks/README.md b/paper/lua-filters/author-info-blocks/README.md
new file mode 100644
index 0000000..3386bac
--- /dev/null
+++ b/paper/lua-filters/author-info-blocks/README.md
@@ -0,0 +1,59 @@
+# author-info-blocks
+
+This filter adds author-related header blocks usually included in
+scholarly articles, such as a list of author affiliations,
+correspondence information, and on notes equal contributors.
+
+
+## Dependencies
+
+This filter assumes metadata in the canonical format generated by
+the [scholarly-metadata filter](../scholarly-metadata).
+
+## Usage
+
+The filter should be run after *scholarly-metadata.lua*:
+
+ pandoc --lua-filter=scholarly-metadata/scholarly-metadata.lua \
+ --lua-filter=author-info-blocks/author-inffo-blocks.lua \
+ --output=outfile.pdf --pdf-engine=xelatex \
+ article.md
+
+The ways in which affiliation data should be given is described
+in the docs for **scholarly-metadata.lua*. Additionally, authors
+who contributed equally to an article can be marked by adding
+`equal_contributor: yes` to the respective YAML objects.
+Similarly, corresponding authors should be marked with
+`correspondence: yes` and have an `email` listed.
+
+### Example
+
+Take the following example YAML block:
+
+``` yaml
+---
+title: Affiliation Blocks Example
+author:
+ - Jane Doe:
+ institute:
+ - federation
+ equal_contributor: "yes"
+ correspondence: "yes"
+ email: jane.doe@example.com
+ - John Q. Doe:
+ institute: [federation, acme]
+ equal_contributor: "yes"
+ - Juan Pérez:
+ institute: acme
+institute:
+ - federation: Federation of Planets
+ - acme:
+ name: Acme Corporation
+---
+```
+
+This will mark Jane Doe and John Q. Doe as equal contributors and
+Jane Doe as the sole corresponding author. Below is a screenshot
+of a document header created from this metadata.
+
+![example document screenshot](document-screenshot.jpg)
diff --git a/paper/lua-filters/author-info-blocks/author-info-blocks.lua b/paper/lua-filters/author-info-blocks/author-info-blocks.lua
new file mode 100644
index 0000000..27e32bc
--- /dev/null
+++ b/paper/lua-filters/author-info-blocks/author-info-blocks.lua
@@ -0,0 +1,176 @@
+--[[
+affiliation-blocks – generate title components
+
+Copyright © 2017–2019 Albert Krewinkel
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+]]
+local List = require 'pandoc.List'
+local utils = require 'pandoc.utils'
+local stringify = utils.stringify
+
+local default_marks
+local default_marks = {
+ corresponding_author = FORMAT == 'latex'
+ and {pandoc.RawInline('latex', '*')}
+ or {pandoc.Str '✉'},
+ equal_contributor = FORMAT == 'latex'
+ and {pandoc.RawInline('latex', '$\\dagger{}$')}
+ or {pandoc.Str '*'},
+}
+
+local function intercalate(lists, elem)
+ local result = List:new{}
+ for i = 1, (#lists - 1) do
+ result:extend(lists[i])
+ result:extend(elem)
+ end
+ if #lists > 0 then
+ result:extend(lists[#lists])
+ end
+ return result
+end
+
+--- Check whether the given author is a corresponding author
+local function is_corresponding_author(author)
+ return author.correspondence and author.email
+end
+
+--- Create inlines for a single author (includes all author notes)
+local function author_inline_generator (get_mark)
+ return function (author)
+ local author_marks = List:new{}
+ if author.equal_contributor then
+ author_marks[#author_marks + 1] = get_mark 'equal_contributor'
+ end
+ local idx_str
+ for _, idx in ipairs(author.institute) do
+ if type(idx) ~= 'table' then
+ idx_str = tostring(idx)
+ else
+ idx_str = stringify(idx)
+ end
+ author_marks[#author_marks + 1] = {pandoc.Str(idx_str)}
+ end
+ if is_corresponding_author(author) then
+ author_marks[#author_marks + 1] = get_mark 'corresponding_author'
+ end
+ local res = List.clone(author.name)
+ res[#res + 1] = pandoc.Superscript(intercalate(author_marks, {pandoc.Str ','}))
+ return res
+ end
+end
+
+local function is_equal_contributor (author)
+ return author.equal_contributor
+end
+
+--- Create equal contributors note.
+local function create_equal_contributors_block(authors, mark)
+ local has_equal_contribs = List:new(authors):find_if(is_equal_contributor)
+ if not has_equal_contribs then
+ return nil
+ end
+ local contributors = {
+ pandoc.Superscript(mark'equal_contributor'),
+ pandoc.Space(),
+ pandoc.Str 'These authors contributed equally to this work.'
+ }
+ return List:new{pandoc.Para(contributors)}
+end
+
+--- Generate a block list all affiliations, marked with arabic numbers.
+local function create_affiliations_blocks(affiliations)
+ local affil_lines = List:new(affiliations):map(
+ function (affil, i)
+ local num_inlines = List:new{
+ pandoc.Superscript{pandoc.Str(tostring(i))},
+ pandoc.Space()
+ }
+ return num_inlines .. affil.name
+ end
+ )
+ return {pandoc.Para(intercalate(affil_lines, {pandoc.LineBreak()}))}
+end
+
+--- Generate a block element containing the correspondence information
+local function create_correspondence_blocks(authors, mark)
+ local corresponding_authors = List:new{}
+ for _, author in ipairs(authors) do
+ if is_corresponding_author(author) then
+ local mailto = 'mailto:' .. pandoc.utils.stringify(author.email)
+ local author_with_mail = List:new(
+ author.name .. List:new{pandoc.Space(), pandoc.Str '<'} ..
+ author.email .. List:new{pandoc.Str '>'}
+ )
+ local link = pandoc.Link(author_with_mail, mailto)
+ table.insert(corresponding_authors, {link})
+ end
+ end
+ if #corresponding_authors == 0 then
+ return nil
+ end
+ local correspondence = List:new{
+ pandoc.Superscript(mark'corresponding_author'),
+ pandoc.Space(),
+ pandoc.Str'Correspondence:',
+ pandoc.Space()
+ }
+ local sep = List:new{pandoc.Str',', pandoc.Space()}
+ return {
+ pandoc.Para(correspondence .. intercalate(corresponding_authors, sep))
+ }
+end
+
+--- Generate a list of inlines containing all authors.
+local function create_authors_inlines(authors, mark)
+ local inlines_generator = author_inline_generator(mark)
+ local inlines = List:new(authors):map(inlines_generator)
+ local and_str = List:new{pandoc.Space(), pandoc.Str'and', pandoc.Space()}
+
+ local last_author = inlines[#inlines]
+ inlines[#inlines] = nil
+ local result = intercalate(inlines, {pandoc.Str ',', pandoc.Space()})
+ if #authors > 1 then
+ result:extend(List:new{pandoc.Str ","} .. and_str)
+ end
+ result:extend(last_author)
+ return result
+end
+
+return {
+ {
+ Pandoc = function (doc)
+ local meta = doc.meta
+ local body = List:new{}
+
+ local mark = function (mark_name) return default_marks[mark_name] end
+
+ body:extend(create_equal_contributors_block(doc.meta.author, mark) or {})
+ body:extend(create_affiliations_blocks(doc.meta.institute) or {})
+ body:extend(create_correspondence_blocks(doc.meta.author, mark) or {})
+ body:extend(doc.blocks)
+
+ -- Overwrite authors with formatted values. We use a single, formatted
+ -- string for most formats. LaTeX output, however, looks nicer if we
+ -- provide a authors as a list.
+ meta.author = FORMAT:match 'latex'
+ and pandoc.MetaList(doc.meta.author):map(author_inline_generator(mark))
+ or pandoc.MetaInlines(create_authors_inlines(doc.meta.author, mark))
+ -- Institute info is now baked into the affiliations block.
+ meta.institute = nil
+
+ return pandoc.Pandoc(body, meta)
+ end
+ }
+}
diff --git a/paper/lua-filters/author-info-blocks/document-screenshot.jpg b/paper/lua-filters/author-info-blocks/document-screenshot.jpg
new file mode 100644
index 0000000..9e30e9f
--- /dev/null
+++ b/paper/lua-filters/author-info-blocks/document-screenshot.jpg
Binary files differ
diff --git a/paper/lua-filters/author-info-blocks/expected.native b/paper/lua-filters/author-info-blocks/expected.native
new file mode 100644
index 0000000..6608de7
--- /dev/null
+++ b/paper/lua-filters/author-info-blocks/expected.native
@@ -0,0 +1,5 @@
+Pandoc (Meta {unMeta = fromList [("author",MetaInlines [Str "Jane",Space,Str "Doe",Superscript [Str "*",Str ",",Str "1",Str ",",Str "\9993"],Str ",",Space,Str "John",Space,Str "Q.",Space,Str "Doe",Superscript [Str "*",Str ",",Str "1",Str ",",Str "2"],Str ",",Space,Str "and",Space,Str "Juan",Space,Str "P\233rez",Superscript [Str "2"]]),("title",MetaInlines [Str "Affiliation",Space,Str "Blocks",Space,Str "Example"])]})
+[Para [Superscript [Str "*"],Space,Str "These authors contributed equally to this work."]
+,Para [Superscript [Str "1"],Space,Str "Federation",Space,Str "of",Space,Str "Planets",LineBreak,Superscript [Str "2"],Space,Str "Acme",Space,Str "Corporation"]
+,Para [Superscript [Str "\9993"],Space,Str "Correspondence:",Space,Link ("",[],[]) [Str "Jane",Space,Str "Doe",Space,Str "<",Str "jane.doe@example.com",Str ">"] ("mailto:jane.doe@example.com","")]
+,Para [Str "Lorem",Space,Str "ipsum",Space,Str "dolor",Space,Str "sit",Space,Str "amet."]]
diff --git a/paper/lua-filters/author-info-blocks/sample.md b/paper/lua-filters/author-info-blocks/sample.md
new file mode 100644
index 0000000..541aa26
--- /dev/null
+++ b/paper/lua-filters/author-info-blocks/sample.md
@@ -0,0 +1,28 @@
+---
+author:
+- correspondence: yes
+ email: 'jane.doe\@example.com'
+ equal_contributor: yes
+ id: Jane Doe
+ institute:
+ - 1
+ name: Jane Doe
+- equal_contributor: yes
+ id: 'John Q. Doe'
+ institute:
+ - 1
+ - 2
+ name: 'John Q. Doe'
+- id: Juan Pérez
+ institute:
+ - 2
+ name: Juan Pérez
+institute:
+- id: federation
+ name: Federation of Planets
+- id: acme
+ name: Acme Corporation
+title: Affiliation Blocks Example
+---
+
+Lorem ipsum dolor sit amet.
diff --git a/paper/lua-filters/bibexport/Makefile b/paper/lua-filters/bibexport/Makefile
new file mode 100644
index 0000000..67081b0
--- /dev/null
+++ b/paper/lua-filters/bibexport/Makefile
@@ -0,0 +1,5 @@
+test:
+ @pandoc --lua-filter=bibexport.lua sample.md > /dev/null
+ @diff --strip-trailing-cr -u bibexport.aux expected.bibexport.aux
+ @diff --strip-trailing-cr -u bibexport.bib expected.bibexport.bib
+ @rm -f bibexport.aux bibexport.bib
diff --git a/paper/lua-filters/bibexport/README.md b/paper/lua-filters/bibexport/README.md
new file mode 100644
index 0000000..ea1b54d
--- /dev/null
+++ b/paper/lua-filters/bibexport/README.md
@@ -0,0 +1,31 @@
+# bibexport
+
+Export all cited references into a single bibtex file. This is
+most useful when writing collaboratively while using a large,
+private bibtex collection. Using the bibexport filter allows to
+create a reduced bibtex file suitable for sharing with
+collaborators.
+
+## Prerequisites
+
+This filter expects the `bibexport` executable to be installed
+and in the user's PATH.
+
+## Usage
+
+The filter runs `bibexport` on a temporary *aux* file, creating
+the file *bibexport.bib* on success. The name of the temporary
+*.aux* file can be set via the `auxfile` meta value; if no value
+is specified, *bibexport.aux* will be used as filename.
+
+Please note that `bibexport` prints messages to stdout. Pandoc
+should be called with the `-o` or `--output` option instead of
+redirecting stdout to a file. E.g.
+
+ pandoc --lua-filter=bibexport.lua article.md -o article.html
+
+or, when the filter is called in a one-off fashion
+
+ pandoc --lua-filter=bibexport.lua article.md -o /dev/null
+
+
diff --git a/paper/lua-filters/bibexport/bibexport.lua b/paper/lua-filters/bibexport/bibexport.lua
new file mode 100644
index 0000000..9174e69
--- /dev/null
+++ b/paper/lua-filters/bibexport/bibexport.lua
@@ -0,0 +1,82 @@
+local utils = require 'pandoc.utils'
+local List = require 'pandoc.List'
+
+local citation_id_set = {}
+
+-- Collect all citation IDs.
+function Cite (c)
+ local cs = c.citations
+ for i = 1, #cs do
+ citation_id_set[cs[i].id or cs[i].citationId] = true
+ end
+end
+
+--- Return a list of citation IDs
+function citation_ids ()
+ local citations = {};
+ for cid, _ in pairs(citation_id_set) do
+ citations[#citations + 1] = cid
+ end
+ return citations
+end
+
+function bibdata (bibliography)
+ function bibname (bibitem)
+ if type(bibitem) == 'string' then
+ return bibitem:gsub('%.bib$', '')
+ else
+ -- bibitem is assumed to be a list of inlines
+ return utils.stringify(pandoc.Span(bibitem)):gsub('%.bib$', '')
+ end
+ end
+
+ local bibs = bibliography.t == 'MetaList'
+ and List.map(bibliography, bibname)
+ or {bibname(bibliography)}
+ return table.concat(bibs, ',')
+end
+
+function aux_content(bibliography)
+ local cites = citation_ids()
+ table.sort(cites)
+ local citations = table.concat(cites, ',')
+ return table.concat(
+ {
+ '\\bibstyle{alpha}',
+ '\\bibdata{' .. bibdata(bibliography) .. '}',
+ '\\citation{' .. citations .. '}',
+ '',
+ },
+ '\n'
+ )
+end
+
+function write_dummy_aux (bibliography, auxfile)
+ local filename
+ if type(auxfile) == 'string' then
+ filename = auxfile
+ elseif type(auxfile) == 'table' then
+ -- assume list of inlines
+ filename = utils.stringify(pandoc.Span(auxfile))
+ else
+ filename = 'bibexport.aux'
+ end
+ local fh = io.open(filename, 'w')
+ fh:write(aux_content(bibliography))
+ fh:close()
+ io.stdout:write('Aux written to ' .. filename .. '\n')
+ return filename
+end
+
+function Pandoc (doc)
+ local meta = doc.meta
+ if not meta.bibliography then
+ return nil
+ else
+ -- create a dummy .aux file
+ local auxfile_name = write_dummy_aux(meta.bibliography, meta.auxfile)
+ os.execute('bibexport ' .. auxfile_name)
+ io.stdout:write('Output written to bibexport.bib\n')
+ return nil
+ end
+end
diff --git a/paper/lua-filters/bibexport/coffee.bib b/paper/lua-filters/bibexport/coffee.bib
new file mode 100644
index 0000000..80b723c
--- /dev/null
+++ b/paper/lua-filters/bibexport/coffee.bib
@@ -0,0 +1,48 @@
+@article{BrSm02,
+ author = {C. F. Brice and A. P. Smith},
+ title = {Effects of caffeine on mood and performance: a study on
+ realistic consumption},
+ journal = {Psychopharmacology (Berlin)},
+ year = 2002,
+ volume = 164,
+ pages = {188--192}
+}
+
+@article{LoSnMuKo95,
+ author = {M. M. Lorist and J. Snel and G. Mulder and A. Kok},
+ title = {Aging, caffeine, and information processing: an
+ event-related potential analysis},
+ journal = {Electroencephalogr Clin Neurophysiol},
+ year = 1995,
+ volume = 96,
+ pages = {453--467}
+}
+
+@article{Li95,
+ author = {L. Linde},
+ title = {Mental effects of caffeine in fatigued and non-fatigued
+ female and male subjects},
+ journal = {Ergonomics},
+ year = 1995,
+ volume = 38,
+ pages = {864--885}
+}
+
+@article{KjOs07,
+ author = {Dannie Kjeldgaard and Jacob Ostberg},
+ title = {Coffee grounds and the global cup: glocal consumer culture
+ in scandinavia},
+ journal = {Consumption, Markets and Culture},
+ year = 2007,
+ pages = {175--187},
+ volume = 10,
+ issue = 2
+}
+
+@book{De92,
+ author = {G Debry},
+ title = {Coffee and Health: Composition, Consumption and Effect upon Health},
+ publisher = {John Libbey Eurotext},
+ year = 1992,
+ ISBN = {2-7420-0037-2}
+}
diff --git a/paper/lua-filters/bibexport/expected.bibexport.aux b/paper/lua-filters/bibexport/expected.bibexport.aux
new file mode 100644
index 0000000..d5f3144
--- /dev/null
+++ b/paper/lua-filters/bibexport/expected.bibexport.aux
@@ -0,0 +1,3 @@
+\bibstyle{alpha}
+\bibdata{sample,coffee}
+\citation{Li95,Upper_writers_1974}
diff --git a/paper/lua-filters/bibexport/expected.bibexport.bib b/paper/lua-filters/bibexport/expected.bibexport.bib
new file mode 100644
index 0000000..57448ed
--- /dev/null
+++ b/paper/lua-filters/bibexport/expected.bibexport.bib
@@ -0,0 +1,27 @@
+
+
+@article{Li95,
+ author = {L. Linde},
+ journal = {Ergonomics},
+ pages = {864--885},
+ title = {Mental effects of caffeine in fatigued and
+ non-fatigued female and male subjects},
+ volume = {38},
+ year = {1995},
+}
+
+@article{Upper_writers_1974,
+ author = {Upper, Dennis},
+ journal = {Journal of Applied Behavior Analysis},
+ number = {3},
+ pages = {497--497},
+ publisher = {Blackwell Publishing Ltd},
+ title = {The unsuccessful self-treatment of a case of
+ “writer's block”},
+ volume = {7},
+ year = {1974},
+ doi = {10.1901/jaba.1974.7-497a},
+ issn = {1938-3703},
+ url = {http://dx.doi.org/10.1901/jaba.1974.7-497a},
+}
+
diff --git a/paper/lua-filters/bibexport/sample.bib b/paper/lua-filters/bibexport/sample.bib
new file mode 100644
index 0000000..876eea1
--- /dev/null
+++ b/paper/lua-filters/bibexport/sample.bib
@@ -0,0 +1,13 @@
+@article {Upper_writers_1974,
+ author = {Upper, Dennis},
+ title = {The unsuccessful self-treatment of a case of “writer's block”},
+ journal = {Journal of Applied Behavior Analysis},
+ volume = {7},
+ number = {3},
+ publisher = {Blackwell Publishing Ltd},
+ issn = {1938-3703},
+ url = {http://dx.doi.org/10.1901/jaba.1974.7-497a},
+ doi = {10.1901/jaba.1974.7-497a},
+ pages = {497--497},
+ year = {1974},
+}
diff --git a/paper/lua-filters/bibexport/sample.md b/paper/lua-filters/bibexport/sample.md
new file mode 100644
index 0000000..d458b37
--- /dev/null
+++ b/paper/lua-filters/bibexport/sample.md
@@ -0,0 +1,18 @@
+---
+bibliography:
+- sample.bib
+- coffee.bib
+...
+
+# Abstract
+
+This is an example article. It was written under the influence of
+coffee, which acts to counter fatigue [@Li95].
+
+
+# Further reading
+
+Authors struggling to fill their document with content are referred to
+@Upper_writers_1974.
+
+# References
diff --git a/paper/lua-filters/cito/Makefile b/paper/lua-filters/cito/Makefile
new file mode 100644
index 0000000..1997818
--- /dev/null
+++ b/paper/lua-filters/cito/Makefile
@@ -0,0 +1,6 @@
+test:
+ @pandoc --lua-filter=cito.lua --output=output.md --standalone sample.md
+ @diff -u expected.md output.md
+ @rm -f output.md
+
+.PHONY: test
diff --git a/paper/lua-filters/cito/README.md b/paper/lua-filters/cito/README.md
new file mode 100644
index 0000000..b0e2cd3
--- /dev/null
+++ b/paper/lua-filters/cito/README.md
@@ -0,0 +1,76 @@
+# cito
+
+This filter extracts optional CiTO (Citation Typing Ontology)
+information from citations and stores the information in the
+document's metadata. The extracted info is intended to be used in
+combination with other filters, templates, or custom writers. It
+is mandatory to run pandoc-citeproc *after* this filter if CiTO
+data is embedded in the document; otherwise pandoc-citeproc will
+interpret CiTO properties as part of the citation ID.
+
+## Using the Citation Typing Ontology
+
+The [citation typing ontology] (CiTO) allows authors to specify the
+reason a citation is given. This is helpful for the authors and
+their co-authors, and furthermore adds data that can be used by
+readers to search and navigate relevant publications.
+
+A CiTO annotation must come before the citation key and be
+followed by a colon. E.g., `@method_in:towbin_1979` signifies
+that the citation with ID *towbin_1979* is cited because the
+method described in that paper has been used in the paper at
+hand.
+
+[citation typing ontology]: http://purl.org/spar/cito
+
+## Recognized CiTO properties
+
+Below is the list of CiTO properties recognized by the filter,
+together with the aliases that can be used as shorthands.
+
+- agrees_with
+ - agree_with
+- citation
+- cites
+- cites_as_authority
+ - as_authority
+ - authority
+- cites_as_data_source
+- cites_as_evidence
+ - as_evidence
+ - evidence
+- cites_as_metadata_document
+ - as_metadata_document
+ - metadata_document
+ - metadata
+- cites_as_recommended_reading
+ - as_recommended_reading
+ - recommended_reading
+- disagrees_with
+ - disagree
+ - disagrees
+- disputes
+- documents
+- extends
+- includes_excerpt_from
+ - excerpt
+ - excerpt_from
+- includes_quotation_from
+ - quotation
+ - quotation_from
+- obtains_background_from
+ - background
+ - background_from
+- refutes
+- replies_to
+- updates
+- uses_data_from
+ - data
+ - data_from
+- uses_method_in
+ - method
+ - method_in
+
+## References
+
+This approach was described in <https://doi.org/10.7717/peerj-cs.112>.
diff --git a/paper/lua-filters/cito/cito.lua b/paper/lua-filters/cito/cito.lua
new file mode 100644
index 0000000..a1a3421
--- /dev/null
+++ b/paper/lua-filters/cito/cito.lua
@@ -0,0 +1,138 @@
+-- Copyright © 2017–2019 Albert Krewinkel, Robert Winkler
+--
+-- This library is free software; you can redistribute it and/or modify it
+-- under the terms of the MIT license. See LICENSE for details.
+
+local _version = '1.0.0'
+local properties_and_aliases = {
+ agrees_with = {
+ 'agree_with'
+ },
+ citation = {
+ },
+ cites = {
+ },
+ cites_as_authority = {
+ 'as_authority',
+ 'authority'
+ },
+ cites_as_data_source = {
+ "as_data_source",
+ "data_source"
+ },
+ cites_as_evidence = {
+ 'as_evidence',
+ 'evidence'
+ },
+ cites_as_metadata_document = {
+ 'as_metadata_document',
+ 'metadata_document',
+ 'metadata'
+ },
+ cites_as_recommended_reading = {
+ 'as_recommended_reading',
+ 'recommended_reading'
+ },
+ disagrees_with = {
+ 'disagree',
+ 'disagrees'
+ },
+ disputes = {
+ },
+ documents = {
+ },
+ extends = {
+ },
+ includes_excerpt_from = {
+ 'excerpt',
+ 'excerpt_from'
+ },
+ includes_quotation_from = {
+ 'quotation',
+ 'quotation_from'
+ },
+ obtains_background_from = {
+ 'background',
+ 'background_from'
+ },
+ refutes = {
+ },
+ replies_to = {
+ },
+ updates = {
+ },
+ uses_data_from = {
+ 'data',
+ 'data_from'
+ },
+ uses_method_in = {
+ 'method',
+ 'method_in'
+ },
+}
+
+local default_cito_property = 'citation'
+
+--- Map from cito aliases to the actual cito property.
+local properties_by_alias = {}
+for property, aliases in pairs(properties_and_aliases) do
+ -- every property is an alias for itself
+ properties_by_alias[property] = property
+ for _, alias in pairs(aliases) do
+ properties_by_alias[alias] = property
+ end
+end
+
+--- Split citation ID into cito property and the actual citation ID. If
+--- the ID does not seem to contain a CiTO property, the
+--- `default_cito_property` will be returned, together with the
+--- unchanged input ID.
+local function split_cito_from_id (citation_id)
+ local pattern = '^(.+):(.+)$'
+ local prop_alias, split_citation_id = citation_id:match(pattern)
+
+ if properties_by_alias[prop_alias] then
+ return properties_by_alias[prop_alias], split_citation_id
+ end
+
+ return default_cito_property, citation_id
+end
+
+--- Citations by CiTO properties.
+local function store_cito (cito_cites, prop, cite_id)
+ if not prop then
+ return
+ end
+ if not cito_cites[prop] then
+ cito_cites[prop] = {}
+ end
+ table.insert(cito_cites[prop], cite_id)
+end
+
+--- Returns a Cite filter function which extracts CiTO information and
+--- add it to the given collection table.
+local function extract_cito (cito_cites)
+ return function (cite)
+ for k, citation in pairs(cite.citations) do
+ local cito_prop, cite_id = split_cito_from_id(citation.id)
+ store_cito(cito_cites, cito_prop, cite_id)
+ citation.id = cite_id
+ end
+ return cite
+ end
+end
+
+--- Lists of citation IDs, indexed by CiTO properties.
+local citations_by_property = {}
+
+return {
+ {
+ Cite = extract_cito(citations_by_property)
+ },
+ {
+ Meta = function (meta)
+ meta.cito_cites = citations_by_property
+ return meta
+ end
+ }
+}
diff --git a/paper/lua-filters/cito/expected.md b/paper/lua-filters/cito/expected.md
new file mode 100644
index 0000000..707ff74
--- /dev/null
+++ b/paper/lua-filters/cito/expected.md
@@ -0,0 +1,19 @@
+---
+cito_cites:
+ cites_as_evidence:
+ - Li95
+ cites_as_recommended_reading:
+ - 'Upper\_writers\_1974'
+---
+
+Abstract
+========
+
+This is an example article. It was written under the influence of
+coffee, which acts to counter fatigue [@Li95].
+
+Further reading
+===============
+
+Authors struggling to fill their document with content are referred to
+@Upper_writers_1974.
diff --git a/paper/lua-filters/cito/sample.bib b/paper/lua-filters/cito/sample.bib
new file mode 100644
index 0000000..4a4ff62
--- /dev/null
+++ b/paper/lua-filters/cito/sample.bib
@@ -0,0 +1,24 @@
+@article{Li95,
+ author = {L. Linde},
+ journal = {Ergonomics},
+ pages = {864--885},
+ title = {Mental effects of caffeine in fatigued and
+ non-fatigued female and male subjects},
+ volume = {38},
+ year = {1995},
+}
+
+@article{Upper_writers_1974,
+ author = {Upper, Dennis},
+ journal = {Journal of Applied Behavior Analysis},
+ number = {3},
+ pages = {497--497},
+ publisher = {Blackwell Publishing Ltd},
+ title = {The unsuccessful self-treatment of a case of
+ “writer's block”},
+ volume = {7},
+ year = {1974},
+ doi = {10.1901/jaba.1974.7-497a},
+ issn = {1938-3703},
+ url = {http://dx.doi.org/10.1901/jaba.1974.7-497a},
+}
diff --git a/paper/lua-filters/cito/sample.md b/paper/lua-filters/cito/sample.md
new file mode 100644
index 0000000..a9989cb
--- /dev/null
+++ b/paper/lua-filters/cito/sample.md
@@ -0,0 +1,10 @@
+# Abstract
+
+This is an example article. It was written under the influence of
+coffee, which acts to counter fatigue [@cites_as_evidence:Li95].
+
+
+# Further reading
+
+Authors struggling to fill their document with content are referred to
+@recommended_reading:Upper_writers_1974.
diff --git a/paper/lua-filters/diagram-generator/.gitignore b/paper/lua-filters/diagram-generator/.gitignore
new file mode 100644
index 0000000..01e67c0
--- /dev/null
+++ b/paper/lua-filters/diagram-generator/.gitignore
@@ -0,0 +1,2 @@
+sample.html
+tmp-latex
diff --git a/paper/lua-filters/diagram-generator/Makefile b/paper/lua-filters/diagram-generator/Makefile
new file mode 100644
index 0000000..ccd79c8
--- /dev/null
+++ b/paper/lua-filters/diagram-generator/Makefile
@@ -0,0 +1,13 @@
+.PHONY: test
+test: sample.html
+
+sample.html: sample.md
+ @pandoc --self-contained \
+ --lua-filter=diagram-generator.lua \
+ --metadata=pythonPath:"python3" \
+ --metadata=title:"README" \
+ --output=$@ $<
+
+clean:
+ rm -f sample.html
+ rm -rf tmp-latex
diff --git a/paper/lua-filters/diagram-generator/README.md b/paper/lua-filters/diagram-generator/README.md
new file mode 100644
index 0000000..d04e204
--- /dev/null
+++ b/paper/lua-filters/diagram-generator/README.md
@@ -0,0 +1,252 @@
+# Diagram Generator Lua Filter
+
+## Introduction
+This Lua filter is used to create images with or without captions from code
+blocks. Currently PlantUML, Graphviz, Ti*k*Z and Python can be processed.
+This document also serves as a test document, which is why the subsequent
+test diagrams are integrated in every supported language.
+
+## Prerequisites
+To be able to use this Lua filter, the respective external tools must be
+installed. However, it is sufficient if the tools to be used are installed.
+If you only want to use PlantUML, you don't need LaTeX or Python, etc.
+
+### PlantUML
+To use PlantUML, you must install PlantUML itself. See the
+[PlantUML website](http://plantuml.com/) for more details. It should be
+noted that PlantUML is a Java program and therefore Java must also
+be installed.
+
+By default, this filter expects the plantuml.jar file to be in the
+working directory. Alternatively, the environment variable
+`PLANTUML` can be set with a path. If, for example, a specific
+PlantUML version is to be used per pandoc document, the
+`plantumlPath` meta variable can be set.
+
+Furthermore, this filter assumes that Java is located in the
+system or user path. This means that from any place of the system
+the `java` command is understood. Alternatively, the `JAVA_HOME`
+environment variable gets used. To use a specific Java version per
+pandoc document, use the `javaPath` meta variable. Please notice
+that `JAVA_HOME` must be set to the java's home directory e.g.
+`c:\Program Files\Java\jre1.8.0_201\` whereas `javaPath` must be
+set to the absolute path of `java.exe` e.g.
+`c:\Program Files\Java\jre1.8.0_201\bin\java.exe`.
+
+Example usage:
+
+~~~~~~~~~~~~~~~~
+```{.plantuml caption="This is an image, created by **PlantUML**."}
+@startuml
+Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response
+Alice -> Bob: Another authentication Request Alice <-- Bob: another Response
+@enduml
+```
+~~~~~~~~~~~~~~~~
+
+### Graphviz
+To use Graphviz you only need to install Graphviz, as you can read
+on its [website](http://www.graphviz.org/). There are no other
+dependencies.
+
+This filter assumes that the `dot` command is located in the path
+and therefore can be used from any location. Alternatively, you can
+set the environment variable `DOT` or use the pandoc's meta variable
+`dotPath`.
+
+Example usage from [the Graphviz
+gallery](https://graphviz.gitlab.io/_pages/Gallery/directed/fsm.html):
+
+~~~~~~~~~~~~~~~~
+```{.graphviz caption="This is an image, created by **Graphviz**'s dot."}
+digraph finite_state_machine {
+ rankdir=LR;
+ size="8,5"
+ node [shape = doublecircle]; LR_0 LR_3 LR_4 LR_8;
+ node [shape = circle];
+ LR_0 -> LR_2 [ label = "SS(B)" ];
+ LR_0 -> LR_1 [ label = "SS(S)" ];
+ LR_1 -> LR_3 [ label = "S($end)" ];
+ LR_2 -> LR_6 [ label = "SS(b)" ];
+ LR_2 -> LR_5 [ label = "SS(a)" ];
+ LR_2 -> LR_4 [ label = "S(A)" ];
+ LR_5 -> LR_7 [ label = "S(b)" ];
+ LR_5 -> LR_5 [ label = "S(a)" ];
+ LR_6 -> LR_6 [ label = "S(b)" ];
+ LR_6 -> LR_5 [ label = "S(a)" ];
+ LR_7 -> LR_8 [ label = "S(b)" ];
+ LR_7 -> LR_5 [ label = "S(a)" ];
+ LR_8 -> LR_6 [ label = "S(b)" ];
+ LR_8 -> LR_5 [ label = "S(a)" ];
+}
+```
+~~~~~~~~~~~~~~~~
+
+### Ti*k*Z
+Ti*k*Z (cf. [Wikipedia](https://en.wikipedia.org/wiki/PGF/TikZ)) is a
+description language for graphics of any kind that can be used within
+LaTeX (cf. [Wikipedia](https://en.wikipedia.org/wiki/LaTeX)).
+
+Therefore a LaTeX system must be installed on the system. The Ti*k*Z code is
+embedded into a dynamic LaTeX document. This temporary document gets
+translated into a PDF document using LaTeX (`pdflatex`). Finally,
+Inkscape is used to convert the PDF file to the desired format.
+
+Note: We are using Inkscape here to use a stable solution for the
+convertion. Formerly ImageMagick was used instead. ImageMagick is
+not able to convert PDF files. Hence, it uses Ghostscript to do
+so, cf. [1](https://stackoverflow.com/a/6599718/2258393).
+Unfortunately, Ghostscript behaves unpredictable during Windows and
+Linux tests cases, cf. [2](https://stackoverflow.com/questions/21774561/some-pdfs-are-converted-improperly-using-imagemagick),
+[3](https://stackoverflow.com/questions/9064706/imagemagic-convert-command-pdf-convertion-with-bad-size-orientation), [4](https://stackoverflow.com/questions/18837093/imagemagic-renders-image-with-black-background),
+[5](https://stackoverflow.com/questions/37392798/pdf-to-svg-is-not-perfect),
+[6](https://stackoverflow.com/q/10288065/2258393), etc. By using Inkscape,
+we need one dependency less and get rid of unexpected Ghostscript issues.
+
+Due to this more complicated process, the use of Ti*k*Z is also more
+complicated overall. The process is error-prone: An insufficiently
+configured LaTeX installation or an insufficiently configured
+Inkscape installation can lead to errors. Overall, this results in
+the following dependencies:
+
+- Any LaTeX installation. This should be configured so that
+missing packages are installed automatically. This filter uses the
+`pdflatex` command which is available by the system's path. Alternatively,
+you can set the `PDFLATEX` environment variable. In case you have to use
+a specific LaTeX version on a pandoc document basis, you might set the
+`pdflatexPath` meta variable.
+
+- An installation of [Inkscape](https://inkscape.org/).
+It is assumed that the `inkscape` command is in the path and can be
+executed from any location. Alternatively, the environment
+variable `INKSCAPE` can be set with a path. If a specific
+version per pandoc document is to be used, the `inkscapePath`
+meta-variable can be set.
+
+In order to use additional LaTeX packages, use the optional
+`additionalPackages` attribute in your document, as in the
+example below.
+
+Example usage from [TikZ
+examples](http://www.texample.net/tikz/examples/parallelepiped/) by
+[Kjell Magne Fauske](http://www.texample.net/tikz/examples/nav1d/):
+
+~~~~~~~~~~~~~~~~
+```{.tikz caption="This is an image, created by **TikZ i.e. LaTeX**."
+ additionalPackages="\usepackage{adjustbox}"}
+\usetikzlibrary{arrows}
+\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
+\tikzstyle{init} = [pin edge={to-,thin,black}]
+
+\resizebox{16cm}{!}{%
+ \trimbox{3.5cm 0cm 0cm 0cm}{
+ \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+ \node [int, pin={[init]above:$v_0$}] (a) {$\frac{1}{s}$};
+ \node (b) [left of=a,node distance=2cm, coordinate] {a};
+ \node [int, pin={[init]above:$p_0$}] at (0,0) (c)
+ [right of=a] {$\frac{1}{s}$};
+ \node [coordinate] (end) [right of=c, node distance=2cm]{};
+ \path[->] (b) edge node {$a$} (a);
+ \path[->] (a) edge node {$v$} (c);
+ \draw[->] (c) edge node {$p$} (end) ;
+ \end{tikzpicture}
+ }
+}
+```
+~~~~~~~~~~~~~~~~
+
+### Python
+In order to use Python to generate a diagram, your Python code must store the
+final image data in a temporary file with the correct format. In case you use
+matplotlib for a diagram, add the following line to do so:
+
+```python
+plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$")
+```
+
+The placeholder `$FORMAT$` gets replace by the necessary format. Most of the
+time, this will be `png` or `svg`. The second placeholder, `$DESTINATION$`
+gets replaced by the path and file name of the destination. Both placeholders
+can be used as many times as you want. Example usage from the [Matplotlib
+examples](https://matplotlib.org/gallery/lines_bars_and_markers/cohere.html#sphx-glr-gallery-lines-bars-and-markers-cohere-py):
+
+~~~~~~~~~~~~~~~~
+```{.py2image caption="This is an image, created by **Python**."}
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Fixing random state for reproducibility
+np.random.seed(19680801)
+
+dt = 0.01
+t = np.arange(0, 30, dt)
+nse1 = np.random.randn(len(t)) # white noise 1
+nse2 = np.random.randn(len(t)) # white noise 2
+
+# Two signals with a coherent part at 10Hz and a random part
+s1 = np.sin(2 * np.pi * 10 * t) + nse1
+s2 = np.sin(2 * np.pi * 10 * t) + nse2
+
+fig, axs = plt.subplots(2, 1)
+axs[0].plot(t, s1, t, s2)
+axs[0].set_xlim(0, 2)
+axs[0].set_xlabel('time')
+axs[0].set_ylabel('s1 and s2')
+axs[0].grid(True)
+
+cxy, f = axs[1].cohere(s1, s2, 256, 1. / dt)
+axs[1].set_ylabel('coherence')
+
+fig.tight_layout()
+plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$")
+```
+~~~~~~~~~~~~~~~~
+
+Precondition to use Python is a Python environment which contains all
+necessary libraries you want to use. To use, for example, the standard
+[Anaconda Python](https://www.anaconda.com/distribution/) environment
+on a Microsoft Windows system ...
+
+- set the environment variable `PYTHON` or the meta key `pythonPath`
+to `c:\ProgramData\Anaconda3\python.exe`
+
+- set the environment variable `PYTHON_ACTIVATE` or the meta
+key `activatePythonPath` to `c:\ProgramData\Anaconda3\Scripts\activate.bat`.
+
+Pandoc will activate this Python environment and starts Python with your code.
+
+## How to run pandoc
+This section will show, how to call Pandoc in order to use this filter with
+meta keys. The following command assume, that the filters are stored in the
+subdirectory `filters`. Further, this is a example for a Microsoft Windows
+system.
+
+Command to use PlantUML (a single line):
+
+```
+pandoc.exe README.md -f markdown -t docx --self-contained --standalone --lua-filter=filters\diagram-generator.lua --metadata=plantumlPath:"c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar" --metadata=javaPath:"c:\Program Files\Java\jre1.8.0_201\bin\java.exe" -o README.docx
+```
+
+All available environment variables:
+
+- `PLANTUML` e.g. `c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar`; Default: `plantuml.jar`
+- `INKSCAPE` e.g. `c:\Program Files\Inkscape\inkscape.exe`; Default: `inkscape`
+- `PYTHON` e.g. `c:\ProgramData\Anaconda3\python.exe`; Default: n/a
+- `PYTHON_ACTIVATE` e.g. `c:\ProgramData\Anaconda3\Scripts\activate.bat`; Default: n/a
+- `JAVA_HOME` e.g. `c:\Program Files\Java\jre1.8.0_201`; Default: n/a
+- `DOT` e.g. `c:\ProgramData\chocolatey\bin\dot.exe`; Default: `dot`
+- `PDFLATEX` e.g. `c:\Program Files\MiKTeX 2.9\miktex\bin\x64\pdflatex.exe`; Default: `pdflatex`
+
+All available meta keys:
+
+- `plantumlPath`
+- `inkscapePath`
+- `pythonPath`
+- `activatePythonPath`
+- `javaPath`
+- `dotPath`
+- `pdflatexPath`
diff --git a/paper/lua-filters/diagram-generator/diagram-generator.lua b/paper/lua-filters/diagram-generator/diagram-generator.lua
new file mode 100644
index 0000000..aef2ea4
--- /dev/null
+++ b/paper/lua-filters/diagram-generator/diagram-generator.lua
@@ -0,0 +1,295 @@
+--[[
+ This Lua filter is used to create images with or without captions from
+ code blocks. Currently PlantUML, GraphViz, Tikz, and Python can be
+ processed. For further details, see README.md.
+
+ Thanks to @floriandd2ba and @jgm for the initial implementation of
+ the PlantUML filter, which I used as a template. Thanks also @muxueqz
+ for the code to generate a GraphViz image.
+]]
+
+-- The PlantUML path. If set, uses the environment variable PLANTUML or the
+-- value "plantuml.jar" (local PlantUML version). In order to define a
+-- PlantUML version per pandoc document, use the meta data to define the key
+-- "plantumlPath".
+local plantumlPath = os.getenv("PLANTUML") or "plantuml.jar"
+
+-- The Inkscape path. In order to define an Inkscape version per pandoc
+-- document, use the meta data to define the key "inkscapePath".
+local inkscapePath = os.getenv("INKSCAPE") or "inkscape"
+
+-- The Python path. In order to define a Python version per pandoc document,
+-- use the meta data to define the key "pythonPath".
+local pythonPath = os.getenv("PYTHON")
+
+-- The Python environment's activate script. Can be set on a per document
+-- basis by using the meta data key "activatePythonPath".
+local pythonActivatePath = os.getenv("PYTHON_ACTIVATE")
+
+-- The Java path. In order to define a Java version per pandoc document,
+-- use the meta data to define the key "javaPath".
+local javaPath = os.getenv("JAVA_HOME")
+if javaPath then
+ javaPath = javaPath .. package.config:sub(1,1) .. "bin"
+ .. package.config:sub(1,1) .. "java"
+else
+ javaPath = "java"
+end
+
+-- The dot (Graphviz) path. In order to define a dot version per pandoc
+-- document, use the meta data to define the key "dotPath".
+local dotPath = os.getenv("DOT") or "dot"
+
+-- The pdflatex path. In order to define a pdflatex version per pandoc
+-- document, use the meta data to define the key "pdflatexPath".
+local pdflatexPath = os.getenv("PDFLATEX") or "pdflatex"
+
+-- The default format is SVG i.e. vector graphics:
+local filetype = "svg"
+local mimetype = "image/svg+xml"
+
+-- Check for output formats that potentially cannot use SVG
+-- vector graphics. In these cases, we use a different format
+-- such as PNG:
+if FORMAT == "docx" then
+ filetype = "png"
+ mimetype = "image/png"
+elseif FORMAT == "pptx" then
+ filetype = "png"
+ mimetype = "image/png"
+elseif FORMAT == "rtf" then
+ filetype = "png"
+ mimetype = "image/png"
+end
+
+-- Execute the meta data table to determine the paths. This function
+-- must be called first to get the desired path. If one of these
+-- meta options was set, it gets used instead of the corresponding
+-- environment variable:
+function Meta(meta)
+ plantumlPath = meta.plantumlPath or plantumlPath
+ inkscapePath = meta.inkscapePath or inkscapePath
+ pythonPath = meta.pythonPath or pythonPath
+ pythonActivatePath = meta.activatePythonPath or pythonActivatePath
+ javaPath = meta.javaPath or javaPath
+ dotPath = meta.dotPath or dotPath
+ pdflatexPath = meta.pdflatexPath or pdflatexPath
+end
+
+-- Call plantuml.jar with some parameters (cf. PlantUML help):
+local function plantuml(puml, filetype)
+ local final = pandoc.pipe(javaPath, {"-jar", plantumlPath, "-t" .. filetype, "-pipe", "-charset", "UTF8"}, puml)
+ return final
+end
+
+-- Call dot (GraphViz) in order to generate the image
+-- (thanks @muxueqz for this code):
+local function graphviz(code, filetype)
+ local final = pandoc.pipe(dotPath, {"-T" .. filetype}, code)
+ return final
+end
+
+-- Compile LaTeX with Tikz code to an image:
+local function tikz2image(src, filetype, additionalPackages)
+
+ -- Define file names:
+ local outfile = string.format("./tmp-latex/file.%s", filetype)
+ local tmp = "./tmp-latex/file"
+ local tmpDir = "./tmp-latex/"
+
+ -- Ensure, that the tmp directory exists:
+ os.execute("mkdir -p tmp-latex")
+
+ -- Build and write the LaTeX document:
+ local f = io.open(tmp .. ".tex", 'w')
+ f:write("\\documentclass{standalone}\n\\usepackage{tikz}\n")
+
+ -- Any additional package(s) are desired?
+ if additionalPackages then
+ f:write(additionalPackages)
+ end
+
+ f:write("\\begin{document}\n")
+ f:write(src)
+ f:write("\n\\end{document}\n")
+ f:close()
+
+ -- Execute the LaTeX compiler:
+ pandoc.pipe(pdflatexPath, {'-output-directory', tmpDir, tmp}, '')
+
+ -- Build the basic Inkscape command for the conversion:
+ local baseCommand = " --without-gui --file=" .. tmp .. ".pdf"
+ local knownFormat = false
+
+ if filetype == "png" then
+
+ -- Append the subcommands to convert into a PNG file:
+ baseCommand = baseCommand .. " --export-png="
+ .. tmp .. ".png --export-dpi=300"
+ knownFormat = true
+
+ elseif filetype == "svg" then
+
+ -- Append the subcommands to convert into a SVG file:
+ baseCommand = baseCommand .. " --export-plain-svg=" .. tmp .. ".svg"
+ knownFormat = true
+
+ end
+
+ -- Unfortunately, continuation is only possible, if we know the actual
+ -- format:
+ local imgData = nil
+ if knownFormat then
+
+ -- We know the desired format. Thus, execute Inkscape:
+ os.execute("\"" .. inkscapePath .. "\"" .. baseCommand)
+
+ -- Try to open the image:
+ local r = io.open(tmp .. "." .. filetype, 'rb')
+
+ -- Read the image, if available:
+ if r then
+ imgData = r:read("*all")
+ r:close()
+ end
+
+ -- Delete the image tmp file:
+ os.remove(outfile)
+ end
+
+ -- Remove the temporary files:
+ os.remove(tmp .. ".tex")
+ os.remove(tmp .. ".pdf")
+ os.remove(tmp .. ".log")
+ os.remove(tmp .. ".aux")
+
+ return imgData
+end
+
+-- Run Python to generate an image:
+local function py2image(code, filetype)
+
+ -- Define the temp files:
+ local outfile = string.format('%s.%s', os.tmpname(), filetype)
+ local pyfile = os.tmpname()
+
+ -- Replace the desired destination's file type in the Python code:
+ local extendedCode = string.gsub(code, "%$FORMAT%$", filetype)
+
+ -- Replace the desired destination's path in the Python code:
+ extendedCode = string.gsub(extendedCode, "%$DESTINATION%$", outfile)
+
+ -- Write the Python code:
+ local f = io.open(pyfile, 'w')
+ f:write(extendedCode)
+ f:close()
+
+ -- Execute Python in the desired environment:
+ local pycmd = pythonPath .. ' ' .. pyfile
+ local command = pythonActivatePath
+ and pythonActivatePath .. ' && ' .. pycmd
+ or pycmd
+ os.execute(command)
+
+ -- Try to open the written image:
+ local r = io.open(outfile, 'rb')
+ local imgData = nil
+
+ -- When the image exist, read it:
+ if r then
+ imgData = r:read("*all")
+ r:close()
+ else
+ io.stderr:write(string.format("File '%s' could not be opened", outfile))
+ end
+
+ -- Delete the tmp files:
+ os.remove(pyfile)
+ os.remove(outfile)
+
+ return imgData
+end
+
+-- Executes each document's code block to find matching code blocks:
+function CodeBlock(block)
+
+ -- Predefine a potential image:
+ local fname = nil
+
+ -- Using a table with all known generators i.e. converters:
+ local converters = {
+ plantuml = plantuml,
+ graphviz = graphviz,
+ tikz = tikz2image,
+ py2image = py2image,
+ }
+
+ -- Check if a converter exists for this block. If not, return the block
+ -- unchanged.
+ local img_converter = converters[block.classes[1]]
+ if not img_converter then
+ return nil
+ end
+
+ -- Call the correct converter which belongs to the used class:
+ local success, img = pcall(img_converter, block.text,
+ filetype, block.attributes["additionalPackages"] or nil)
+
+ -- Was ok?
+ if success and img then
+ -- Hash the figure name and content:
+ fname = pandoc.sha1(img) .. "." .. filetype
+
+ -- Store the data in the media bag:
+ pandoc.mediabag.insert(fname, mimetype, img)
+
+ else
+
+ -- an error occured; img contains the error message
+ io.stderr:write(tostring(img))
+ io.stderr:write('\n')
+
+ end
+
+ -- Case: This code block was an image e.g. PlantUML or dot/Graphviz, etc.:
+ if fname then
+
+ -- Define the default caption:
+ local caption = {}
+ local enableCaption = nil
+
+ -- If the user defines a caption, use it:
+ if block.attributes["caption"] then
+ caption = pandoc.read(block.attributes.caption).blocks[1].content
+
+ -- This is pandoc's current hack to enforce a caption:
+ enableCaption = "fig:"
+ end
+
+ -- Create a new image for the document's structure. Attach the user's
+ -- caption. Also use a hack (fig:) to enforce pandoc to create a
+ -- figure i.e. attach a caption to the image.
+ local imgObj = pandoc.Image(caption, fname, enableCaption)
+
+ -- Now, transfer the attribute "name" from the code block to the new
+ -- image block. It might gets used by the figure numbering lua filter.
+ -- If the figure numbering gets not used, this additional attribute
+ -- gets ignored as well.
+ if block.attributes["name"] then
+ imgObj.attributes["name"] = block.attributes["name"]
+ end
+
+ -- Finally, put the image inside an empty paragraph. By returning the
+ -- resulting paragraph object, the source code block gets replaced by
+ -- the image:
+ return pandoc.Para{ imgObj }
+ end
+end
+
+-- Normally, pandoc will run the function in the built-in order Inlines ->
+-- Blocks -> Meta -> Pandoc. We instead want Meta -> Blocks. Thus, we must
+-- define our custom order:
+return {
+ {Meta = Meta},
+ {CodeBlock = CodeBlock},
+}
diff --git a/paper/lua-filters/diagram-generator/sample.md b/paper/lua-filters/diagram-generator/sample.md
new file mode 100644
index 0000000..231ccdc
--- /dev/null
+++ b/paper/lua-filters/diagram-generator/sample.md
@@ -0,0 +1,244 @@
+# Diagram Generator Lua Filter
+
+## Introduction
+This Lua filter is used to create images with or without captions from code
+blocks. Currently PlantUML, Graphviz, Ti*k*Z and Python can be processed.
+This document also serves as a test document, which is why the subsequent
+test diagrams are integrated in every supported language.
+
+## Prerequisites
+To be able to use this Lua filter, the respective external tools must be
+installed. However, it is sufficient if the tools to be used are installed.
+If you only want to use PlantUML, you don't need LaTeX or Python, etc.
+
+### PlantUML
+To use PlantUML, you must install PlantUML itself. See the
+[PlantUML website](http://plantuml.com/) for more details. It should be
+noted that PlantUML is a Java program and therefore Java must also
+be installed.
+
+By default, this filter expects the plantuml.jar file to be in the
+working directory. Alternatively, the environment variable
+`PLANTUML` can be set with a path. If, for example, a specific
+PlantUML version is to be used per pandoc document, the
+`plantumlPath` meta variable can be set.
+
+Furthermore, this filter assumes that Java is located in the
+system or user path. This means that from any place of the system
+the `java` command is understood. Alternatively, the `JAVA_HOME`
+environment variable gets used. To use a specific Java version per
+pandoc document, use the `javaPath` meta variable. Please notice
+that `JAVA_HOME` must be set to the java's home directory e.g.
+`c:\Program Files\Java\jre1.8.0_201\` whereas `javaPath` must be
+set to the absolute path of `java.exe` e.g.
+`c:\Program Files\Java\jre1.8.0_201\bin\java.exe`.
+
+Example usage:
+
+```{.plantuml caption="This is an image, created by **PlantUML**."}
+@startuml
+Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response
+Alice -> Bob: Another authentication Request Alice <-- Bob: another Response
+@enduml
+```
+
+### Graphviz
+To use Graphviz you only need to install Graphviz, as you can read
+on its [website](http://www.graphviz.org/). There are no other
+dependencies.
+
+This filter assumes that the `dot` command is located in the path
+and therefore can be used from any location. Alternatively, you can
+set the environment variable `DOT` or use the pandoc's meta variable
+`dotPath`.
+
+Example usage from [the Graphviz
+gallery](https://graphviz.gitlab.io/_pages/Gallery/directed/fsm.html):
+
+```{.graphviz caption="This is an image, created by **Graphviz**'s dot."}
+digraph finite_state_machine {
+ rankdir=LR;
+ size="8,5"
+ node [shape = doublecircle]; LR_0 LR_3 LR_4 LR_8;
+ node [shape = circle];
+ LR_0 -> LR_2 [ label = "SS(B)" ];
+ LR_0 -> LR_1 [ label = "SS(S)" ];
+ LR_1 -> LR_3 [ label = "S($end)" ];
+ LR_2 -> LR_6 [ label = "SS(b)" ];
+ LR_2 -> LR_5 [ label = "SS(a)" ];
+ LR_2 -> LR_4 [ label = "S(A)" ];
+ LR_5 -> LR_7 [ label = "S(b)" ];
+ LR_5 -> LR_5 [ label = "S(a)" ];
+ LR_6 -> LR_6 [ label = "S(b)" ];
+ LR_6 -> LR_5 [ label = "S(a)" ];
+ LR_7 -> LR_8 [ label = "S(b)" ];
+ LR_7 -> LR_5 [ label = "S(a)" ];
+ LR_8 -> LR_6 [ label = "S(b)" ];
+ LR_8 -> LR_5 [ label = "S(a)" ];
+}
+```
+
+### Ti*k*Z
+Ti*k*Z (cf. [Wikipedia](https://en.wikipedia.org/wiki/PGF/TikZ)) is a
+description language for graphics of any kind that can be used within
+LaTeX (cf. [Wikipedia](https://en.wikipedia.org/wiki/LaTeX)).
+
+Therefore a LaTeX system must be installed on the system. The Ti*k*Z code is
+embedded into a dynamic LaTeX document. This temporary document gets
+translated into a PDF document using LaTeX (`pdflatex`). Finally,
+Inkscape is used to convert the PDF file to the desired format.
+
+Note: We are using Inkscape here to use a stable solution for the
+convertion. Formerly ImageMagick was used instead. ImageMagick is
+not able to convert PDF files. Hence, it uses Ghostscript to do
+so, cf. [1](https://stackoverflow.com/a/6599718/2258393).
+Unfortunately, Ghostscript behaves unpredictable during Windows and
+Linux tests cases, cf. [2](https://stackoverflow.com/questions/21774561/some-pdfs-are-converted-improperly-using-imagemagick),
+[3](https://stackoverflow.com/questions/9064706/imagemagic-convert-command-pdf-convertion-with-bad-size-orientation), [4](https://stackoverflow.com/questions/18837093/imagemagic-renders-image-with-black-background),
+[5](https://stackoverflow.com/questions/37392798/pdf-to-svg-is-not-perfect),
+[6](https://stackoverflow.com/q/10288065/2258393), etc. By using Inkscape,
+we need one dependency less and get rid of unexpected Ghostscript issues.
+
+Due to this more complicated process, the use of Ti*k*Z is also more
+complicated overall. The process is error-prone: An insufficiently
+configured LaTeX installation or an insufficiently configured
+Inkscape installation can lead to errors. Overall, this results in
+the following dependencies:
+
+- Any LaTeX installation. This should be configured so that
+missing packages are installed automatically. This filter uses the
+`pdflatex` command which is available by the system's path. Alternatively,
+you can set the `PDFLATEX` environment variable. In case you have to use
+a specific LaTeX version on a pandoc document basis, you might set the
+`pdflatexPath` meta variable.
+
+- An installation of [Inkscape](https://inkscape.org/).
+It is assumed that the `inkscape` command is in the path and can be
+executed from any location. Alternatively, the environment
+variable `INKSCAPE` can be set with a path. If a specific
+version per pandoc document is to be used, the `inkscapePath`
+meta-variable can be set.
+
+In order to use additional LaTeX packages, use the optional
+`additionalPackages` attribute in your document, as in the
+example below.
+
+Example usage from [TikZ
+examples](http://www.texample.net/tikz/examples/parallelepiped/) by
+[Kjell Magne Fauske](http://www.texample.net/tikz/examples/nav1d/):
+
+```{.tikz caption="This is an image, created by **TikZ i.e. LaTeX**."
+ additionalPackages="\usepackage{adjustbox}"}
+\usetikzlibrary{arrows}
+\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
+\tikzstyle{init} = [pin edge={to-,thin,black}]
+
+\resizebox{16cm}{!}{%
+ \trimbox{3.5cm 0cm 0cm 0cm}{
+ \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+ \node [int, pin={[init]above:$v_0$}] (a) {$\frac{1}{s}$};
+ \node (b) [left of=a,node distance=2cm, coordinate] {a};
+ \node [int, pin={[init]above:$p_0$}] at (0,0) (c)
+ [right of=a] {$\frac{1}{s}$};
+ \node [coordinate] (end) [right of=c, node distance=2cm]{};
+ \path[->] (b) edge node {$a$} (a);
+ \path[->] (a) edge node {$v$} (c);
+ \draw[->] (c) edge node {$p$} (end) ;
+ \end{tikzpicture}
+ }
+}
+```
+
+### Python
+In order to use Python to generate an diagram, your Python code must store the
+final image data in a temporary file with the correct format. In case you use
+matplotlib for a diagram, add the following line to do so:
+
+```python
+plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$")
+```
+
+The placeholder `$FORMAT$` gets replace by the necessary format. Most of the
+time, this will be `png` or `svg`. The second placeholder, `$DESTINATION$`
+gets replaced by the path and file name of the destination. Both placeholders
+can be used as many times as you want. Example usage from the [Matplotlib
+examples](https://matplotlib.org/gallery/lines_bars_and_markers/cohere.html#sphx-glr-gallery-lines-bars-and-markers-cohere-py):
+
+```{.py2image caption="This is an image, created by **Python**."}
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Fixing random state for reproducibility
+np.random.seed(19680801)
+
+dt = 0.01
+t = np.arange(0, 30, dt)
+nse1 = np.random.randn(len(t)) # white noise 1
+nse2 = np.random.randn(len(t)) # white noise 2
+
+# Two signals with a coherent part at 10Hz and a random part
+s1 = np.sin(2 * np.pi * 10 * t) + nse1
+s2 = np.sin(2 * np.pi * 10 * t) + nse2
+
+fig, axs = plt.subplots(2, 1)
+axs[0].plot(t, s1, t, s2)
+axs[0].set_xlim(0, 2)
+axs[0].set_xlabel('time')
+axs[0].set_ylabel('s1 and s2')
+axs[0].grid(True)
+
+cxy, f = axs[1].cohere(s1, s2, 256, 1. / dt)
+axs[1].set_ylabel('coherence')
+
+fig.tight_layout()
+plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$")
+```
+
+Precondition to use Python is a Python environment which contains all
+necessary libraries you want to use. To use, for example, the standard
+[Anaconda Python](https://www.anaconda.com/distribution/) environment
+on a Microsoft Windows system ...
+
+- set the environment variable `PYTHON` or the meta key `pythonPath`
+to `c:\ProgramData\Anaconda3\python.exe`
+
+- set the environment variable `PYTHON_ACTIVATE` or the meta
+key `activatePythonPath` to `c:\ProgramData\Anaconda3\Scripts\activate.bat`.
+
+Pandoc will activate this Python environment and starts Python with your code.
+
+## How to run pandoc
+This section will show, how to call Pandoc in order to use this filter with
+meta keys. The following command assume, that the filters are stored in the
+subdirectory `filters`. Further, this is a example for a Microsoft Windows
+system.
+
+Command to use PlantUML (a single line):
+
+```
+pandoc.exe README.md -f markdown -t docx --self-contained --standalone --lua-filter=filters\diagram-generator.lua --metadata=plantumlPath:"c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar" --metadata=javaPath:"c:\Program Files\Java\jre1.8.0_201\bin\java.exe" -o README.docx
+```
+
+All available environment variables:
+
+- `PLANTUML` e.g. `c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar`; Default: `plantuml.jar`
+- `INKSCAPE` e.g. `c:\Program Files\Inkscape\inkscape.exe`; Default: `inkscape`
+- `PYTHON` e.g. `c:\ProgramData\Anaconda3\python.exe`; Default: n/a
+- `PYTHON_ACTIVATE` e.g. `c:\ProgramData\Anaconda3\Scripts\activate.bat`; Default: n/a
+- `JAVA_HOME` e.g. `c:\Program Files\Java\jre1.8.0_201`; Default: n/a
+- `DOT` e.g. `c:\ProgramData\chocolatey\bin\dot.exe`; Default: `dot`
+- `PDFLATEX` e.g. `c:\Program Files\MiKTeX 2.9\miktex\bin\x64\pdflatex.exe`; Default: `pdflatex`
+
+All available meta keys:
+
+- `plantumlPath`
+- `inkscapePath`
+- `pythonPath`
+- `activatePythonPath`
+- `javaPath`
+- `dotPath`
+- `pdflatexPath` \ No newline at end of file
diff --git a/paper/lua-filters/minted/Makefile b/paper/lua-filters/minted/Makefile
new file mode 100644
index 0000000..5d509f9
--- /dev/null
+++ b/paper/lua-filters/minted/Makefile
@@ -0,0 +1,65 @@
+.PHONY: all
+all: sample_beamer.pdf sample_latex.pdf sample.html
+
+# NOTE: `pandoc_inputs` can have multiple filenames if you want to send `pandoc`
+# more than one input file at once. In the commands for the targets that depend
+# on `pandoc_inputs` you will see a pattern `$^ > $@`. It's less magic than it
+# seems, but useful to point out if you have not seen these before. They are
+# called "Automatic Variables", and more documentation can be found here:
+#
+# https://www.gnu.org/software/make/manual/html_node/Automatic-Variables.html
+#
+# So by depending on $(pandoc_inputs) and using $^ as the input files to
+# `pandoc`, $^ will expand to all filenames in `pandoc_inputs` and the target
+# will re-run when the timestamp of _any_ file listed in `pandoc_inputs` is
+# updated. By redirecting the output to $@, it will send the `pandoc` output to
+# the target name. In the examples below, $@ expands to either
+# `sample_beamer.tex`, `sample_latex.tex`, or `sample.html` (depending on the
+# target name).
+#
+# TL;DR: You should be able to copy-paste the commands below and just rename the
+# target names to match whatever output filenames you want.
+pandoc_inputs := sample.md
+
+# Sample beamer presentation.
+sample_beamer.tex: $(pandoc_inputs)
+ pandoc -s -t beamer --no-highlight --lua-filter=minted.lua $^ > $@
+
+sample_beamer.pdf: sample_beamer.tex
+ latexmk -pdf -shell-escape -jobname=sample_beamer sample_beamer
+
+# Sample latex document.
+sample_latex.tex: $(pandoc_inputs)
+ pandoc -s -t latex --no-highlight --lua-filter=minted.lua $^ > $@
+
+sample_latex.pdf: sample_latex.tex
+ latexmk -pdf -shell-escape -jobname=sample_latex sample_latex
+
+# Sample html5 document.
+sample.html: $(pandoc_inputs)
+ pandoc -s -t html5 --lua-filter=minted.lua $^ > $@
+
+# ---
+
+.PHONY: clean realclean
+clean:
+ @# latexmk errors if no auxiliary files exist to cleanup. Using `|| true`
+ @# just makes it so that the subsequent commands will also execute.
+ latexmk -c sample_beamer >/dev/null 2>&1 || true
+ @# latexmk does not clean all beamer files
+ rm -f sample_beamer.{nav,snm,vrb}
+ rm -rf _minted-sample_beamer/
+ latexmk -c sample_latex >/dev/null 2>&1 || true
+ rm -rf _minted-sample_latex/
+
+realclean: clean
+ rm -f sample_beamer.{tex,pdf}
+ rm -f sample_latex.{tex,pdf}
+ rm -f sample.html
+
+.PHONY: test lint
+lint:
+ flake8 --max-line-length=80 run_minted_tests.py background_color.py
+
+test:
+ @./run_minted_tests.py
diff --git a/paper/lua-filters/minted/README.md b/paper/lua-filters/minted/README.md
new file mode 100644
index 0000000..b26491d
--- /dev/null
+++ b/paper/lua-filters/minted/README.md
@@ -0,0 +1,316 @@
+# minted
+
+This filter enables users to use the [`minted`][minted] package with the
+`beamer` and `latex` writers. Users may attach any desired `minted` specific
+styling / attributes to their code-blocks (or via document metadata). These
+`minted` specific attributes will be _removed_ for any writers that are not
+`beamer` or `latex`, since many of the `minted` options require using `latex`
+specific syntax that can cause problems in other output formats. For example,
+if the `fontsize=\footnotesize` attribute were applied to a code block, an
+`html` export would include `data-fontsize="\footnotesize"`, which may produce
+errors or more commonly be entirely meaningless for non-latex writers.
+
+The `minted` package will be used as a _replacement_ for the existing `pandoc`
+inline code and code block elements. Behind the scenes, `minted` builds on top
+of the `fancyvrb` latex package, using [pygments][pygments] to perform the
+highlighting. The `minted` package contains _many_ options for customizing
+output, users are encouraged to read / review section 5.3 of the
+[minted documentation][minted_docs]. **This filter does not make any attempts
+to validate arguments supplied to the `minted` package**. Invalid / conflicting
+arguments are a usage error.
+
+**Contents**
+
+- [Setup](#setup)
+ - [LaTeX Preamble Configuration](#latex-preamble-configuration)
+ - [PDF Compilation](#pdf-compilation)
+- [Minted Filter Settings](#minted-filter-settings)
+ - [Default Settings](#default-settings)
+ - [All Metadata Settings](#all-metadata-settings)
+ - [`no_default_autogobble`](#no_default_autogobble-boolean)
+ - [`no_mintinline`](#no_mintinline-boolean)
+ - [`default_block_language`](#default_block_language-string)
+ - [`default_inline_language`](#default_inline_language-string)
+ - [`block_attributes`](#block_attributes-list-of-strings)
+ - [`inline_attributes`](#inline_attributes-list-of-strings)
+- [Important Usage Notes](#important-usage-notes)
+- [Bonus](#bonus)
+
+# Setup
+
+## LaTeX Preamble Configuration
+
+Since this filter will emit `\mintline` commands for inline code, and
+`\begin{minted} ... \end{minted}` environments for code blocks, you must ensure
+that your document includes the `minted` package in the preamble of your
+`beamer` or `latex` document. The filter cannot accomplish this for you.
+
+**Option 1**
+
+Use the `header-includes` feature of `pandoc` (`-H` / `--include-in-header`).
+This will be injected into the preamble section of your `beamer` or `latex`
+document. The bare minimum you need in this file is
+
+```latex
+\usepackage{minted}
+```
+
+However, there are many other things you can set here (related or unrelated to
+this filter), and this is a good opportunity to perform some global setup on the
+`minted` package. Some examples:
+
+```latex
+\usepackage{minted}
+
+% Set the `style=tango` attribute for all minted blocks. Can still be overriden
+% per block (e.g., you want to change just one). Run `pygmentize -L` to see
+% all available options.
+\usemintedstyle{tango}
+
+% Depending on which pygments style you choose, comments and preprocessor
+% directives may be italic. The `tango` style is one of these. This disables
+% all italics in the `minted` environment.
+\AtBeginEnvironment{minted}{\let\itshape\relax}
+
+% This disables italics for the `\mintinline` commands.
+% Credit: https://tex.stackexchange.com/a/469702/113687
+\usepackage{xpatch}
+\xpatchcmd{\mintinline}{\begingroup}{\begingroup\let\itshape\relax}{}{}
+```
+
+The `minted` package has many options, see the
+[minted documentation][minted_docs] for more information. For example, see the
+`bgcolor` option for the `minted` package. In this "header-include" file would
+be an excellent location to `\definecolor`s that you want to use with `bgcolor`.
+
+**Option 1.5**
+
+You can also set `header-includes` in the metadata of your document. The above
+example could be set as (noting the escaped backslashes):
+
+```yaml
+colorlinks: true
+header-includes:
+ # Include the minted package, set global style, define colors, etc.
+ - "\\usepackage{minted}"
+ - "\\usemintedstyle{tango}"
+ # Prevent italics in the `minted` environment.
+ - "\\AtBeginEnvironment{minted}{\\let\\itshape\\relax}"
+ # Prevent italics in the `\mintinline` command.
+ - "\\usepackage{xpatch}"
+ - "`\\xpatchcmd{\\mintinline}{\\begingroup}{\\begingroup\\let\\itshape\\relax}{}{}`{=latex}"
+```
+
+Note on the last line calling `\xpatchcmd`, we escape the backslashes and
+additionally force `pandoc` to treat this as `latex` code by making it an inline
+`latex` code element. See [pandoc issue 2139 (comment)][pandoc_issue_2139] for
+more information.
+
+Formally, you may want to apply the ``-"`\\raw_tex`{=latex}"`` trick to all
+metadata to indicate it is `latex` specific code. However, since `pandoc`
+strips out any raw `latex` when converting to other writers, it isn't necessary.
+
+**Option 2**
+
+You can also create your own custom `beamer` or `latex` template to have much
+finer control over what is / is not included in your document. You may obtain
+a copy of the template that `pandoc` uses by default by running
+`pandoc -D beamer` or `pandoc -D latex` depending on your document type.
+
+After you have modified the template to suit your needs (including at the very
+least a `\usepackage{minted}`), specify your template file to `pandoc` using
+the `--template <path/to/template/file>` command line argument.
+
+## PDF Compilation
+
+To compile a PDF, there are two things that the `minted` package requires be
+available: an escaped shell to be able to run external commands (the
+`-shell-escape` command line flag), and the ability to create and later read
+auxiliary files (`minted` runs `pygmentize` for the highlighting).
+
+At the time of writing this, only one of these is accessible using `pandoc`
+directly. One may pass `--pdf-engine-opt=-shell-escape` to forward the
+`-shell-escape` flag to the latex engine being used. Unfortunately, though,
+the second component (related to temporary files being created) is not supported
+by `pandoc`. See [pandoc issue 4271][pandoc_issue_4271].
+
+**However**, in reality this is an minor issue that can easily be worked around.
+Instead of generating `md => pdf`, you just use `pandoc` to generate `md => tex`
+and then compile `tex => pdf` yourself. See the [sample Makefile](Makefile) for
+examples of how to execute both stages. **Furthermore**, you will notice a
+significant advantage of managing the `pdf` compilation yourself: the generated
+`minted` files are cached and unless you `make clean` (or remove them manually),
+unchanged code listings will be reused. That is, you will have faster
+compilation times :slightly_smiling_face:
+
+# Minted Filter Settings
+
+Direct control over the settings of this filter are performed by setting
+sub-keys of a `minted` metadata key for your document.
+
+## Default Settings
+
+By default, this filter
+
+1. Transforms all inline `Code` elements to `\mintinline`. This can be disabled
+ globally by setting `no_mintinline: true`.
+
+2. Transforms all `CodeBlock` elements to `\begin{minted} ... \end{minted}` raw
+ latex code. This cannot be disabled.
+
+3. Both (1) and (2) default to the `"text"` pygments lexer, meaning that inline
+ code or code blocks without a specific code class applied will receive no
+ syntax highlighting. This can be changed globally by setting
+ `default_block_language: "lexer"` or `default_inline_language: "lexer"`.
+
+4. All `CodeBlock` elements have the `autogobble` attribute applied to them,
+ which informs `minted` to trim all common preceding whitespace. This can be
+ disabled globally by setting `no_default_autogobble: true`. However, doing
+ this is **strongly discouraged**. Consider a code block nested underneath
+ a list item. Pandoc will (correctly) generate indented code, meaning you
+ will need to manually inform `minted` to `gobble=indent` where `indent` is
+ the number of spaces to trim. Note that `pandoc` may not reproduce the same
+ indentation level of the original document.
+
+## All Metadata Settings
+
+Each of the following are nested under the `minted` metadata key.
+
+### `no_default_autogobble` (boolean)
+
+By default this filter will always use `autogobble` with minted, which will
+automatically trim common preceding whitespace. This is important because
+code blocks nested under a list or other block elements _will_ have common
+preceding whitespace that you _will_ want trimmed.
+
+### `no_mintinline` (boolean)
+
+Globally prevent this filter from emitting `\mintinline` calls for inline
+Code elements, emitting `\texttt` instead. Possibly useful in saving
+compile time for large documents that do not seek to have syntax
+highlighting on inline code elements.
+
+### `default_block_language` (string)
+
+The default pygments lexer class to use for code blocks. By default this
+is `"text"`, meaning no syntax highlighting. This is a fallback value, code
+blocks that explicitly specify a lexer will not use it.
+
+### `default_inline_language` (string)
+
+Same as `default_block_language`, only for inline code (typed in single
+backticks). The default is also `"text"`, and changing is discouraged.
+
+### `block_attributes` (list of strings)
+
+Any default attributes to apply to _all_ code blocks. These may be
+overriden on a per-code-block basis. See section 5.3 of the
+[minted documentation][minted_docs] for available options.
+
+### `inline_attributes` (list of strings)
+
+Any default attributes to apply to _all_ inline code. These may be
+overriden on a per-code basis. See section 5.3 of the
+[minted documentation][minted_docs] for available options.
+
+[minted_docs]: http://mirrors.ctan.org/macros/latex/contrib/minted/minted.pdf
+[minted]: https://ctan.org/pkg/minted?lang=en
+[pygments]: http://pygments.org/
+[pandoc_issue_2139]: https://github.com/jgm/pandoc/issues/2139#issuecomment-310522113
+[pandoc_issue_4271]: https://github.com/jgm/pandoc/issues/4721
+
+# Important Usage Notes
+
+Refer to the [`sample.md`](sample.md) file for some live examples of how to use
+this filter. If you execute `make` in this directory, `sample_beamer.pdf`,
+`sample_latex.pdf`, and `sample.html` will all be generated to demonstrate the
+filter in action.
+
+`pandoc` allows you to specify additional attributes on either the closing
+backtick of an inline code element, or after the third backtick of a fenced
+code block. This is done using `{curly braces}`, an example:
+
+```md
+`#include <type_traits>`{.cpp .showspaces style=bw}
+```
+
+or
+
+ ```{.cpp .showspaces style=bw}
+ #include <type_traits>
+ ```
+
+In order, these are
+
+- `.cpp`: specify the language lexer class.
+- `.showspaces`: a `minted` boolean attribute.
+- `style=bw`: a `minted` attribute that takes an argument (`bw` is a pygments
+ style, black-white, just an example).
+
+There are two rules that must not be violated:
+
+1. Any time you want to supply extra arguments to `minted` to a specific inline
+ code or code block element, **the lexer class must always be first, and
+ always be present**.
+
+ This is a limitation of the implementation of this filter.
+
+2. Observe the difference between specifying boolean attributes vs attributes
+ that take an argument. Boolean `minted` attributes **must** have a leading
+ `.`, and `minted` attributes that take an argument **may not** have a leading
+ `.`.
+
+ - **Yes**: `{.cpp .showspaces}`, **No**: `{.cpp showspaces}`
+ - **Yes**: `{.cpp style=bw}`, **No**: `{.cpp .style=bw}`
+
+ If you violate this, then `pandoc` will likely not produce an actual inline
+ `Code` or `CodeBlock` element, but instead something else (undefined).
+
+Last, but not least, you will see that the `--no-highlight` flag is used in the
+`Makefile` for the latex targets. This is added in the spirit of the filter
+being a "full replacement" for `pandoc` highlighting with `minted`. This only
+affects inline code elements that meet the following criteria:
+
+1. The inline code element has a lexer, e.g., `{.cpp}`.
+2. The inline code element can actually be parsed for that language by `pandoc`.
+
+If these two conditions are met, and you do **not** specify `--no-highlight`,
+the `pandoc` highlighting engine will take over. Users are encouraged to build
+the samples (`make` in this directory) and look at the end of the
+`Special Characters are Supported` section. If you remove `--no-highlight`,
+`make realclean`, and then `make` again, you will see that the pandoc
+highlighting engine will colorize the `auto foo = [](){};`.
+
+Simply put: if you do not want any pandoc highlighting in your LaTeX, **make
+sure you add `--no-highlight`** and it will not happen.
+
+It is advantageous for this filter to rely on this behavior, because it means
+that the filter does not need to worry about escaping special characters for
+LaTeX -- `pandoc` will do that for us. Inspect the generated `sample_*.tex`
+files (near the end) to see the difference. `--no-highlight` will produce
+`\texttt` commands, but omitting this flag will result in some `\VERB` commands
+from `pandoc`.
+
+# Bonus
+
+Included here is a simple python script to help you get the right color
+definitions for `bgcolor` with minted. Just run
+[`background_color.py`](background_color.py) with a single argument that is the
+name of the pygments style you want the `latex` background color definition for:
+
+```console
+$ ./background_color.py monokai
+Options for monokai (choose *one*):
+
+ (*) \definecolor{monokai_bg}{HTML}{272822}
+ (*) \definecolor{monokai_bg}{RGB}{39,40,34}
+ (*) \definecolor{monokai_bg}{rgb}{0.1529,0.1569,0.1333}
+ |--------/
+ |
+ +--> You can rename this too :)
+```
+
+See the contents of [`sample.md`](sample.md) (click on "View Raw" to see the
+comments in the metadata section). Notably, in order to use `\definecolor` you
+should make sure that the `xcolor` package is actually included. Comments in
+the file explain the options.
diff --git a/paper/lua-filters/minted/background_color.py b/paper/lua-filters/minted/background_color.py
new file mode 100755
index 0000000..e830db4
--- /dev/null
+++ b/paper/lua-filters/minted/background_color.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+r"""
+A simple script to print out the RGB ``\definecolor`` command for the background
+color of a specified pygments style name.
+"""
+
+import sys
+try:
+ from pygments.styles import get_style_by_name
+except ImportError as ie:
+ sys.stderr.write("Please install the Pygments package:\n{0}\n".format(ie))
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ # Make sure we have a style name provided.
+ if len(sys.argv) != 2:
+ sys.stderr.write("Usage: {0} <style_name>\n\n".format(sys.argv[0]))
+ sys.stderr.write(" Tip: run `pygmentize -L` to see style names.\n")
+ sys.exit(1)
+
+ # Grab the style object, error out if invalid name.
+ style_name = sys.argv[1]
+ try:
+ style = get_style_by_name(style_name)
+ except Exception as e:
+ sys.stderr.write("Unable to find {0}:\n{1}\n".format(style_name, e))
+ sys.exit(1)
+
+ # Convert the hexadecimal string into rgb.
+ background_hex = style.background_color.replace("#", "")
+ if len(background_hex) != 6:
+ sys.stderr.write("Unknown hex color: {0}\n".format(background_hex))
+ sys.exit(1)
+
+ try:
+ r = int(background_hex[0:2], 16)
+ g = int(background_hex[2:4], 16)
+ b = int(background_hex[4:6], 16)
+ except Exception as e:
+ sys.stderr.write("Unable to convert to integers:\n{0}\n".format(e))
+ sys.exit(1)
+
+ # Build out the various options for \definecolor
+ # All should be equivalent, but users may have a preference of one format
+ # over another :p
+ tex_color_name = "{0}_bg".format(style_name)
+ def_HTML = r"\definecolor{{{0}}}{{HTML}}{{{1}}}".format(
+ tex_color_name, background_hex.upper()
+ )
+ def_RGB = r"\definecolor{{{0}}}{{RGB}}{{{1}}}".format(
+ tex_color_name, "{0},{1},{2}".format(r, g, b)
+ )
+ def_rgb = r"\definecolor{{{0}}}{{rgb}}{{{1}}}".format(
+ tex_color_name,
+ ",".join(["{0:.4}".format(float(c) / 255.0) for c in [r, g, b]])
+ )
+
+ # Enumerate the options
+ print("Options for {0} (choose *one*):\n".format(style_name))
+ print(" (*) {0}".format(def_HTML))
+ print(" (*) {0}".format(def_RGB))
+ print(" (*) {0}".format(def_rgb))
+
+ # Make sure they know that `{style_name}_bg` can be changed to whatever
+ # they want to be using in their document.
+ notice = "{0}|{1}/".format(
+ len(r" (*) \definecolor{") * " ",
+ (len(tex_color_name) - 2) * "-"
+ )
+ vline = notice[0:notice.find("|")+1]
+ can_change = vline.replace("|", "+--> You can rename this too :)")
+ print(notice)
+ print(vline)
+ print(can_change)
diff --git a/paper/lua-filters/minted/minted.lua b/paper/lua-filters/minted/minted.lua
new file mode 100644
index 0000000..19f608e
--- /dev/null
+++ b/paper/lua-filters/minted/minted.lua
@@ -0,0 +1,456 @@
+--[[
+minted -- enable the minted environment for code listings in beamer and latex.
+
+MIT License
+
+Copyright (c) 2019 Stephen McDowell
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+]]
+
+--------------------------------------------------------------------------------
+-- Quick documentation. See full documentation here: --
+-- https://github.com/pandoc/lua-filters/blob/master/minted --
+--------------------------------------------------------------------------------
+--[[
+Brief overview of metadata keys that you can use in your document:
+
+minted:
+ no_default_autogobble: <boolean>, *DISCOURAGED*
+ no_mintinline: <boolean>
+ default_block_language: <string>
+ default_inline_language: <string>
+ block_attributes: <list of strings>
+ - attr_1
+ - attr_2
+ - ...
+ inline_attributes: <list of strings>
+ - attr_1
+ - attr_2
+ - ...
+
+In words, underneath the `minted` metadata key, you have the following options:
+
+### `no_default_autogobble` (boolean)
+
+By default this filter will always use `autogobble` with minted, which will
+automatically trim common preceding whitespace. This is important because
+code blocks nested under a list or other block elements _will_ have common
+preceding whitespace that you _will_ want trimmed.
+
+### `no_mintinline` (boolean)
+
+Globally prevent this filter from emitting `\mintinline` calls for inline
+Code elements, emitting `\texttt` instead. Possibly useful in saving
+compile time for large documents that do not seek to have syntax
+highlighting on inline code elements.
+
+### `default_block_language` (string)
+
+The default pygments lexer class to use for code blocks. By default this
+is `"text"`, meaning no syntax highlighting. This is a fallback value, code
+blocks that explicitly specify a lexer will not use it.
+
+### `default_inline_language` (string)
+
+Same as `default_block_language`, only for inline code (typed in single
+backticks). The default is also `"text"`, and changing is discouraged.
+
+### `block_attributes` (list of strings)
+
+Any default attributes to apply to _all_ code blocks. These may be
+overriden on a per-code-block basis. See section 5.3 of the
+[minted documentation][minted_docs] for available options.
+
+### `inline_attributes` (list of strings)
+
+Any default attributes to apply to _all_ inline code. These may be
+overriden on a per-code basis. See section 5.3 of the
+[minted documentation][minted_docs] for available options.
+
+[minted_docs]: http://mirrors.ctan.org/macros/latex/contrib/minted/minted.pdf
+]]
+
+local List = require('pandoc.List')
+
+--------------------------------------------------------------------------------
+-- Potential metadata elements to override. --
+--------------------------------------------------------------------------------
+local minted_no_mintinline = false
+local minted_default_block_language = "text"
+local minted_default_inline_language = "text"
+local minted_block_attributes = {}
+local minted_inline_attributes = {}
+
+--------------------------------------------------------------------------------
+-- Constants used to differentiate Code and CodeBlock elements. --
+--------------------------------------------------------------------------------
+local MintedInline = 0
+local MintedBlock = 1
+
+--------------------------------------------------------------------------------
+-- Utility functions. --
+--------------------------------------------------------------------------------
+-- Return the string lexer class to be used with minted. `elem` should be
+-- either a Code or CodeBlock element (whose `classes` list will be inspected
+-- first). `kind` is assumed to be either `MintedInline` or `MintedBlock` in
+-- order to choose the appropriate fallback lexer when unspecified.
+local function minted_language(elem, kind)
+ -- If the code [block] attached classes, we assume the first one is the
+ -- lexer class to use.
+ if #elem.classes > 0 then
+ return elem.classes[1]
+ end
+ -- Allow user-level metadata to override the inline language.
+ if kind == MintedInline then
+ return minted_default_inline_language
+ end
+ -- Allow user-level metadata to override the block language.
+ if kind == MintedBlock then
+ return minted_default_block_language
+ end
+
+ -- Failsafe, should not hit here unless function called incorrectly.
+ return "text"
+end
+
+-- Returns a boolean specifying whether or not the specified string `cls` is an
+-- option that is supported by the minted package.
+local function is_minted_class(cls)
+ -- Section 5.3 Available Options of Minted documentation. Note that many of
+ -- these do not apply to \mintinline (inline Code). Users are responsible
+ -- for supplying valid arguments to minted. For example, specifying
+ -- `autogobble` and `gobble` at the same time is a usage error.
+ --
+ -- http://mirrors.ctan.org/macros/latex/contrib/minted/minted.pdf
+ local all_minted_options = List:new{
+ "autogobble", "baselinestretch", "beameroverlays", "breakafter",
+ "breakaftergroup", "breakaftersymbolpre", "breakaftersymbolpost",
+ "breakanywhere", "breakanywheresymbolpre", "breakanywheresymbolpost",
+ "breakautoindent", "breakbefore", "breakbeforegroup",
+ "breakbeforesymbolpre", "breakbeforesymbolpost", "breakbytoken",
+ "breakbytokenanywhere", "breakindent", "breakindentnchars", "breaklines",
+ "breaksymbol", "breaksymbolleft", "breaksymbolright", "breaksymbolindent",
+ "breaksymbolindentnchars", "breaksymbolindentleft",
+ "breaksymbolindentleftnchars", "breaksymbolindentright",
+ "breaksymbolindentrightnchars", "breaksymbolsep", "breaksymbolsepnchars",
+ "breaksymbolsepleft", "breaksymbolsepleftnchars", "breaksymbolsepright",
+ "breaksymbolseprightnchars", "bgcolor", "codetagify", "curlyquotes",
+ "encoding", "escapeinside", "firstline", "firstnumber", "fontfamily",
+ "fontseries", "fontsize", "fontshape", "formatcom", "frame", "framerule",
+ "framesep", "funcnamehighlighting", "gobble", "highlightcolor",
+ "highlightlines", "keywordcase", "label", "labelposition", "lastline",
+ "linenos", "numberfirstline", "numbers", "mathescape", "numberblanklines",
+ "numbersep", "obeytabs", "outencoding", "python3", "resetmargins",
+ "rulecolor", "samepage", "showspaces", "showtabs", "space", "spacecolor",
+ "startinline", "style", "stepnumber", "stepnumberfromfirst",
+ "stepnumberoffsetvalues", "stripall", "stripnl", "tab", "tabcolor",
+ "tabsize", "texcl", "texcomments", "xleftmargin", "xrightmargin"
+ }
+ return all_minted_options:includes(cls, 0)
+end
+
+-- Return a string for the minted attributes `\begin{minted}[attributes]` or
+-- `\mintinline[attributes]`. Attributes are acquired by inspecting the
+-- specified element's `classes` and `attr` fields. Any global attributes
+-- provided in the document metadata will be included _only_ if they do not
+-- override the element-level attributes.
+--
+-- `elem` should either be a Code or CodeBlock element, and `kind` is assumed to
+-- be either `MintedInline` or `MintedBlock`. The `kind` determines which
+-- global default attribute list to use.
+local function minted_attributes(elem, kind)
+ -- The full listing of attributes that will be joined and returned.
+ local minted_attributes = {}
+
+ -- Book-keeping, track xxx=yyy keys `xxx` that have been added to
+ -- `minted_attributes` to make checking optional global defaults via the
+ -- `block_attributes` or `inline_attributes` easier.
+ local minted_keys = {}
+
+ -- Boolean style options for minted (e.g., ```{.bash .autogobble}) will appear
+ -- in the list of classes.
+ for _, cls in ipairs(elem.classes) do
+ if is_minted_class(cls) then
+ table.insert(minted_attributes, cls)
+ table.insert(minted_keys, cls)
+ end
+ end
+
+ -- Value options using key=value (e.g., ```{.bash fontsize=\scriptsize}) show
+ -- up in the list of attributes.
+ for _, attr in ipairs(elem.attributes) do
+ cls, value = attr[1], attr[2]
+ if is_minted_class(cls) then
+ table.insert(minted_attributes, cls .. "=" .. value)
+ table.insert(minted_keys, cls)
+ end
+ end
+
+ -- Add any global defaults _only_ if they do not conflict. Note that conflict
+ -- is only in the literal sense. If a user has `autogobble` and `gobble=2`
+ -- specified, these do conflict in the minted sense, but this filter makes no
+ -- checks on validity ;)
+ local global_defaults = nil
+ if kind == MintedInline then
+ global_defaults = minted_inline_attributes
+ elseif kind == MintedBlock then
+ global_defaults = minted_block_attributes
+ end
+ for _, global_attr in ipairs(global_defaults) do
+ -- Either use the index of `=` minus one, or -1 if no `=` present. Fallback
+ -- on -1 means that the substring is the original string.
+ local end_idx = (string.find(global_attr, "=") or 0) - 1
+ local global_key = string.sub(global_attr, 1, end_idx)
+ local can_insert_global = true
+ for _, existing_key in ipairs(minted_keys) do
+ if existing_key == global_key then
+ can_insert_global = false
+ break
+ end
+ end
+
+ if can_insert_global then
+ table.insert(minted_attributes, global_attr)
+ end
+ end
+
+ -- Return a comma delimited string for specifying the attributes to minted.
+ return table.concat(minted_attributes, ",")
+end
+
+-- Return the specified `elem` with any minted data removed from the `classes`
+-- and `attr`. Otherwise writers such as the HTML writer might produce invalid
+-- code since latex makes heavy use of the \backslash.
+local function remove_minted_attibutes(elem)
+ -- Remove any minted items from the classes.
+ classes = {}
+ for _, cls in ipairs(elem.classes) do
+ if not is_minted_class(cls) and cls ~= "no_minted" then
+ table.insert(classes, cls)
+ end
+ end
+ elem.classes = classes
+
+ -- Remove any minted items from the attributes.
+ extra_attrs = {}
+ for _, attr in ipairs(elem.attributes) do
+ cls, value = attr[1], attr[2]
+ if not is_minted_class(cls) then
+ table.insert(extra_attrs, {cls, value})
+ end
+ end
+ elem.attributes = extra_attrs
+
+ -- Return the (potentially modified) element for pandoc to take over.
+ return elem
+end
+
+-- Return a `start_delim` and `end_delim` that can safely wrap around the
+-- specified `text` when used inline. If no special characters occur in `text`,
+-- then a pair of braces are returned. Otherwise, if any character of
+-- `possible_delims` are not in `text`, then it is returned. If no delimiter
+-- could be found, an error is raised.
+local function minted_inline_delims(text)
+ local start_delim, end_delim
+ if text:find('[{}]') then
+ -- Try some other delimiter (the alphanumeric digits are in Python's
+ -- string.digits + string.ascii_letters order)
+ possible_delims = ('|!@#^&*-=+' .. '0123456789' ..
+ 'abcdefghijklmnopqrstuvwxyz' ..
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+ for char in possible_delims:gmatch('.') do
+ if not text:find(char, 1, true) then
+ start_delim = char
+ end_delim = char
+ break
+ end
+ end
+ if not start_delim then
+ local msg = 'Unable to determine delimiter to use around inline code %q'
+ error(msg:format(text))
+ end
+ else
+ start_delim = '{'
+ end_delim = '}'
+ end
+
+ return start_delim, end_delim
+end
+
+--------------------------------------------------------------------------------
+-- Pandoc overrides. --
+--------------------------------------------------------------------------------
+-- Override the pandoc Meta function so that we can parse the metadata for the
+-- document and store the necessary variables locally to use in other functions
+-- such as Code and CodeBlock (helper methods).
+function Meta(m)
+ -- Grab the `minted` metadata, quit early if not present.
+ local minted = m["minted"]
+ local found_autogobble = false
+ local always_autogobble = true
+ if minted ~= nil then
+ -- Parse and set the global bypass to turn off all \mintinline calls.
+ local no_mintinline = minted["no_mintinline"]
+ if no_mintinline ~= nil then
+ minted_no_mintinline = no_mintinline
+ end
+
+ -- Parse and set the default block language.
+ local default_block_language = minted.default_block_language
+ and pandoc.utils.stringify(minted.default_block_language)
+ if default_block_language ~= nil then
+ minted_default_block_language = default_block_language
+ end
+
+ -- Parse and set the default inline language.
+ local default_inline_language = minted.default_inline_language
+ and pandoc.utils.stringify(minted.default_inline_language)
+ if default_inline_language ~= nil then
+ minted_default_inline_language = default_inline_language
+ end
+
+ -- Parse the global default minted attributes to use on every block.
+ local block_attributes = minted["block_attributes"]
+ if block_attributes ~= nil then
+ for _, attr in ipairs(block_attributes) do
+ if attr == "autogobble" then
+ found_autogobble = true
+ end
+ table.insert(minted_block_attributes, attr[1].text)
+ end
+ end
+
+ -- Allow users to turn off autogobble for blocks, but really they should not
+ -- ever seek to do this (indented code blocks under list for example).
+ local no_default_autogobble = minted["no_default_autogobble"]
+ if no_default_autogobble ~= nil then
+ always_autogobble = not no_default_autogobble
+ end
+
+ -- Parse the global default minted attributes to use on ever inline.
+ local inline_attributes = minted["inline_attributes"]
+ if inline_attributes ~= nil then
+ for _, attr in ipairs(inline_attributes) do
+ table.insert(minted_inline_attributes, attr[1].text)
+ end
+ end
+ end
+
+ -- Make sure autogobble is turned on by default if no `minted` meta key is
+ -- provided for the document.
+ if always_autogobble and not found_autogobble then
+ table.insert(minted_block_attributes, "autogobble")
+ end
+
+ -- Return the metadata to pandoc (unchanged).
+ return m
+end
+
+-- Override inline code elements to use \mintinline for beamer / latex writers.
+-- Other writers have all minted attributes removed.
+function Code(elem)
+ if FORMAT == "beamer" or FORMAT == "latex" then
+ -- Allow a bypass to turn off \mintinline via adding .no_minted class.
+ local found_no_minted_class = false
+ for _, cls in ipairs(elem.classes) do
+ if cls == "no_minted" then
+ found_no_minted_class = true
+ break
+ end
+ end
+
+ -- Check for local or global bypass to turn off \mintinline
+ if minted_no_mintinline or found_no_minted_class then
+ return nil -- Return `nil` signals to `pandoc` that elem is not changed.
+ end
+
+ local start_delim, end_delim = minted_inline_delims(elem.text)
+ local language = minted_language(elem, MintedInline)
+ local attributes = minted_attributes(elem, MintedInline)
+ local raw_minted = string.format(
+ "\\mintinline[%s]{%s}%s%s%s",
+ attributes,
+ language,
+ start_delim,
+ elem.text,
+ end_delim
+ )
+ -- NOTE: prior to pandoc commit 24a0d61, `beamer` cannot be used as the
+ -- RawBlock format. Using `latex` should not cause any problems.
+ return pandoc.RawInline("latex", raw_minted)
+ else
+ return remove_minted_attibutes(elem)
+ end
+end
+
+-- Override code blocks to use \begin{minted}...\end{minted} for beamer / latex
+-- writers. Other writers have all minted attributes removed.
+function CodeBlock(block)
+ if FORMAT == "beamer" or FORMAT == "latex" then
+ local language = minted_language(block, MintedBlock)
+ local attributes = minted_attributes(block, MintedBlock)
+ local raw_minted = string.format(
+ "\\begin{minted}[%s]{%s}\n%s\n\\end{minted}",
+ attributes,
+ language,
+ block.text
+ )
+ -- NOTE: prior to pandoc commit 24a0d61, `beamer` cannot be used as the
+ -- RawBlock format. Using `latex` should not cause any problems.
+ return pandoc.RawBlock("latex", raw_minted)
+ else
+ return remove_minted_attibutes(block)
+ end
+end
+
+-- Override headers to make all beamer frames fragile, since any minted
+-- environments or \mintinline invocations will halt compilation if the frame
+-- is not marked as fragile.
+function Header(elem)
+ if FORMAT == 'beamer' then
+ -- Check first that 'fragile' is not already present.
+ local has_fragile = false
+ for _, val in ipairs(elem.classes) do
+ if val == 'fragile' then
+ has_fragile = true
+ break
+ end
+ end
+
+ -- If not found, add fragile to the list of classes.
+ if not has_fragile then
+ table.insert(elem.classes, 'fragile')
+ end
+
+ -- NOTE: pass the remaining work to pandoc, noting that 2.5 and below
+ -- may duplicate the 'fragile' specifier. Duplicated fragile does *not*
+ -- cause compile errors.
+ return elem
+ end
+end
+
+-- NOTE: order of return matters, Meta needs to be first otherwise the metadata
+-- from the document will not be loaded _first_.
+return {
+ {Meta = Meta},
+ {Code = Code},
+ {CodeBlock = CodeBlock},
+ {Header = Header}
+}
diff --git a/paper/lua-filters/minted/run_minted_tests.py b/paper/lua-filters/minted/run_minted_tests.py
new file mode 100755
index 0000000..15803da
--- /dev/null
+++ b/paper/lua-filters/minted/run_minted_tests.py
@@ -0,0 +1,522 @@
+#!/usr/bin/env python
+
+"""
+Unit tests for the pandoc minted.lua filter.
+"""
+
+# Lint this file with: flake8 --max-line-length=80
+import os
+import string
+import subprocess
+import sys
+import textwrap
+
+code_block = textwrap.dedent('''
+ ## A Code Block
+
+ ```{.cpp}
+ auto mult = []<typename T, typename U>(T const & x, U const & y) {
+ return x * y;
+ };
+ ```
+''')
+"""
+The base CodeBlock code. {.cpp} is used as a replacement marker in most tests!
+"""
+
+inline_delims = '|!@#^&*-=+' + string.digits + string.ascii_letters
+inline_code = textwrap.dedent('''
+ ## Inline Code
+
+ `#include <type_traits>`{.cpp}
+ C and C++ use `{` and `}` to delimit scopes.
+ Some other special characters:
+ These check bypass: `~!@#$%^&*()-=_+[]\\{}|;\':",./<>?`
+ These check regular inline: ''' + ' '.join(
+ '`{' + inline_delims[:i] + '`' for i in range(len(inline_delims))
+))
+"""
+The base Code code. {.cpp} is used as a replacement marker in most tests!
+"""
+
+
+def run_pandoc(pandoc_args, stdin):
+ """Run pandoc with the specified arguments, returning the output."""
+ # The input / output should be small enough for these tests that buffer
+ # overflows should not happen.
+ pandoc_proc = subprocess.Popen(
+ ["pandoc"] + pandoc_args,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE
+ )
+
+ # Python 3.x and later require communicating with bytes.
+ if sys.version_info[0] >= 3:
+ stdin = bytes(stdin, "utf-8")
+
+ stdout, stderr = pandoc_proc.communicate(input=stdin)
+ if pandoc_proc.returncode != 0:
+ sys.stderr.write("Non-zero exit code of {ret} from pandoc!\n".format(
+ ret=pandoc_proc.returncode
+ ))
+ sys.stderr.write("pandoc stderr: {stderr}".format(
+ stderr=stderr.decode("utf-8")
+ ))
+ sys.exit(1)
+
+ return stdout.decode("utf-8")
+
+
+def fail_test(test_name, messages, ansi_color_code="31"):
+ """
+ Print failure message and ``sys.exit(1)``.
+
+ ``test_name`` (str)
+ The name of the test (to make finding in code easier).
+
+ ``messages`` (list of str -- or -- str)
+ A single string, or list of strings, to print out to ``stderr`` that
+ explain the reason for the test failure.
+
+ ``ansi_color_code`` (str)
+ A an ANSI color code to use to colorize the failure message :) Default
+ is ``"31"``, which is red.
+ """
+ sys.stderr.write(
+ "\033[0;{ansi_color_code}mTest {test_name} FAILED\033[0m\n".format(
+ ansi_color_code=ansi_color_code, test_name=test_name
+ )
+ )
+ if isinstance(messages, list):
+ for m in messages:
+ sys.stderr.write("--> {m}\n".format(m=m))
+ else:
+ sys.stderr.write("--> {messages}\n".format(messages=messages))
+ sys.exit(1)
+
+
+def ensure_fragile(test_name, pandoc_output):
+ r"""
+ Ensure that every \begin{frame} has (at least one) fragile.
+
+ ``test_name`` (str)
+ The name of the test (forwards to ``fail_test``).
+
+ ``pandoc_output`` (str)
+ The pandoc output for the test case.
+ """
+ for line in pandoc_output.splitlines():
+ if r"\begin{frame}" in line:
+ if "fragile" not in line:
+ fail_test(
+ test_name,
+ r"\begin{frame} without 'fragile': {line}".format(line=line)
+ )
+
+
+def ensure_present(test_name, string, pandoc_output):
+ """
+ Assert that ``string`` is found in ``pandoc_output``.
+
+ ``test_name`` (str)
+ The name of the test (forwards to ``fail_test``).
+
+ ``string`` (str)
+ The string to check verbatim ``string in pandoc_output``.
+
+ ``pandoc_output`` (str)
+ The pandoc output for the test case.
+ """
+ if string not in pandoc_output:
+ fail_test(
+ test_name,
+ "The requested string '{string}' was not found in:\n{pout}".format(
+ string=string, pout=pandoc_output
+ )
+ )
+
+
+def ensure_not_present(test_name, string, pandoc_output):
+ """
+ Assert that ``string`` is **not** found in ``pandoc_output``.
+
+ ``test_name`` (str)
+ The name of the test (forwards to ``fail_test``).
+
+ ``string`` (str)
+ The string to check verbatim ``string not in pandoc_output``.
+
+ ``pandoc_output`` (str)
+ The pandoc output for the test case.
+ """
+ if string in pandoc_output:
+ fail_test(
+ test_name,
+ "The forbidden string '{string}' was found in:\n{pout}".format(
+ string=string, pout=pandoc_output
+ )
+ )
+
+
+def run_tex_tests(pandoc_args, fmt):
+ """
+ Run same tests for latex writers.
+
+ ``pandoc_args`` (list of str)
+ The base list of arguments to forward to pandoc. Some tests may remove
+ the ``--no-highlight`` flag to validate whether or not pandoc
+ highlighting macros appear as expected (or not at all).
+
+ ``fmt`` (str)
+ The format is assumed to be either 'latex' or 'beamer'.
+ """
+ def verify(test_name, args, md, *strings):
+ """Run pandoc, ensure fragile, and string in output."""
+ output = run_pandoc(args + ["-t", fmt], md)
+ if fmt == "beamer":
+ ensure_fragile(test_name, output)
+ else: # latex writer
+ ensure_not_present(test_name, "fragile", output)
+ for s in strings:
+ ensure_present(test_name, s, output)
+ # Make sure the pandoc highlighting is not being used
+ if "--no-highlight" in args:
+ ensure_not_present(test_name, r"\VERB", output)
+ # if `nil` is present, that likely means a problem parsing the metadata
+ ensure_not_present(test_name, "nil", output)
+
+ ############################################################################
+ # CodeBlock tests. #
+ ############################################################################
+ begin_minted = r"\begin{{minted}}[{attrs}]{{{lang}}}"
+ verify(
+ "[code-block] default",
+ pandoc_args,
+ code_block,
+ begin_minted.format(attrs="autogobble", lang="cpp")
+ )
+ verify(
+ "[code-block] no_default_autogobble",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ no_default_autogobble: true
+ ---
+ {code_block}
+ ''').format(code_block=code_block),
+ begin_minted.format(attrs="", lang="cpp")
+ )
+ verify(
+ "[code-block] default block language is 'text'",
+ pandoc_args,
+ code_block.replace("{.cpp}", ""),
+ begin_minted.format(attrs="autogobble", lang="text")
+ )
+ verify(
+ "[code-block] user provided default_block_language",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ default_block_language: "haskell"
+ ---
+ {code_block}
+ ''').format(code_block=code_block.replace("{.cpp}", "")),
+ begin_minted.format(attrs="autogobble", lang="haskell")
+ )
+ verify(
+ "[code-block] user provided block_attributes",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ block_attributes:
+ - "showspaces"
+ - "space=."
+ ---
+ {code_block}
+ ''').format(code_block=code_block),
+ begin_minted.format(
+ attrs=",".join(["showspaces", "space=.", "autogobble"]),
+ lang="cpp"
+ )
+ )
+ verify(
+ "[code-block] user provided block_attributes and no_default_autogobble",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ no_default_autogobble: true
+ block_attributes:
+ - "style=monokai"
+ - "bgcolor=monokai_bg"
+ ---
+ {code_block}
+ ''').format(code_block=code_block),
+ begin_minted.format(
+ attrs=",".join(["style=monokai", "bgcolor=monokai_bg"]), lang="cpp"
+ )
+ )
+ verify(
+ "[code-block] attributes on code block",
+ pandoc_args,
+ code_block.replace(
+ "{.cpp}", "{.cpp .showspaces bgcolor=tango_bg style=tango}"
+ ),
+ begin_minted.format(
+ attrs=",".join([
+ "showspaces", "bgcolor=tango_bg", "style=tango", "autogobble"
+ ]),
+ lang="cpp"
+ )
+ )
+ verify(
+ "[code-block] attributes on code block + user block_attributes",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ block_attributes:
+ - "showspaces"
+ - "space=."
+ ---
+ {code_block}
+ ''').format(
+ code_block=code_block.replace(
+ "{.cpp}", "{.cpp bgcolor=tango_bg style=tango}"
+ )
+ ),
+ begin_minted.format(
+ attrs=",".join([
+ "bgcolor=tango_bg",
+ "style=tango",
+ "showspaces",
+ "space=.",
+ "autogobble"
+ ]),
+ lang="cpp"
+ )
+ )
+ verify(
+ "[code-block] traditional fenced code block",
+ pandoc_args,
+ code_block.replace("{.cpp}", "cpp"),
+ begin_minted.format(attrs="autogobble", lang="cpp")
+ )
+ verify(
+ "[code-block] non-minted attributes not forwarded",
+ pandoc_args,
+ code_block.replace("{.cpp}", "{.cpp .showspaces .hello}"),
+ begin_minted.format(
+ attrs=",".join(["showspaces", "autogobble"]), lang="cpp"
+ )
+ )
+
+ ############################################################################
+ # Inline Code tests. #
+ ############################################################################
+ mintinline = r"\mintinline[{attrs}]{{{lang}}}"
+ verify(
+ "[inline-code] default",
+ pandoc_args,
+ inline_code,
+ mintinline.format(attrs="", lang="cpp"),
+ "|{|",
+ "|}|",
+ *[
+ delim + '{' + inline_delims[:i] + delim
+ for i, delim in enumerate(inline_delims)
+ ]
+ )
+ verify(
+ "[inline-code] default language is text",
+ pandoc_args,
+ inline_code,
+ mintinline.format(attrs="", lang="text"),
+ "|{|",
+ "|}|"
+ )
+ # begin: global no_mintinline shared testing with / without --no-highlight
+ inline_no_mintinline_globally_md = textwrap.dedent('''
+ ---
+ minted:
+ no_mintinline: true
+ ---
+ {inline_code}
+ ''').format(inline_code=inline_code)
+ inline_no_mintinline_globally_strings = [
+ r"\texttt{\{}",
+ r"\texttt{\}}",
+ (r"\texttt{" +
+ r"\textasciitilde{}!@\#\$\%\^{}\&*()-=\_+{[}{]}\textbackslash{}\{\}" +
+ r"""\textbar{};\textquotesingle{}:",./\textless{}\textgreater{}?}""")
+ ]
+ verify(
+ "[inline-code] no_mintinline off globally",
+ pandoc_args,
+ inline_no_mintinline_globally_md,
+ r"\texttt{\#include\ \textless{}type\_traits\textgreater{}}",
+ *inline_no_mintinline_globally_strings
+ )
+ verify(
+ "[inline-code] no_mintinline off globally, remove --no-highlight",
+ [arg for arg in pandoc_args if arg != "--no-highlight"],
+ inline_no_mintinline_globally_md,
+ r"\VERB|\PreprocessorTok{#include }\ImportTok{<type_traits>}|",
+ *inline_no_mintinline_globally_strings
+ )
+ # end: global no_mintinline shared testing with / without --no-highlight
+ # begin: no_minted shared testing with / without --no-highlight
+ inline_no_minted_md = inline_code.replace("{.cpp}", "{.cpp .no_minted}")
+ inline_no_minted_strings = ["|{|", "|}|"]
+ verify(
+ "[inline-code] .no_minted on single inline Code",
+ pandoc_args,
+ inline_no_minted_md,
+ r"texttt{\#include\ \textless{}type\_traits\textgreater{}}",
+ *inline_no_minted_strings
+ )
+ verify(
+ "[inline-code] .no_minted on single inline Code, remove --no-highlight",
+ [arg for arg in pandoc_args if arg != "--no-highlight"],
+ inline_no_minted_md,
+ r"\VERB|\PreprocessorTok{#include }\ImportTok{<type_traits>}|",
+ *inline_no_minted_strings
+ )
+ # end: no_minted shared testing with / without --no-highlight
+ verify(
+ "[inline-code] user provided default_inline_language",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ default_inline_language: "haskell"
+ ---
+ {inline_code}
+ ''').format(inline_code=inline_code),
+ mintinline.format(attrs="", lang="haskell")
+ )
+ verify(
+ "[inline-code] user provided inline_attributes",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ inline_attributes:
+ - "showspaces"
+ - "space=."
+ ---
+ {inline_code}
+ ''').format(inline_code=inline_code),
+ mintinline.format(
+ attrs=",".join(["showspaces", "space=."]), lang="cpp"
+ ),
+ mintinline.format(
+ attrs=",".join(["showspaces", "space=."]), lang="text"
+ )
+ )
+ verify(
+ "[inline-code] attributes on inline code",
+ pandoc_args,
+ inline_code.replace(
+ "{.cpp}", "{.cpp .showspaces bgcolor=tango_bg style=tango}"
+ ),
+ mintinline.format(
+ attrs=",".join(["showspaces", "bgcolor=tango_bg", "style=tango"]),
+ lang="cpp"
+ )
+ )
+ verify(
+ "[inline-code] attributes on inline code + user inline_attributes",
+ pandoc_args,
+ textwrap.dedent('''
+ ---
+ minted:
+ inline_attributes:
+ - "showspaces"
+ - "space=."
+ ---
+ {inline_code}
+ ''').format(
+ inline_code=inline_code.replace(
+ "{.cpp}", "{.cpp bgcolor=tango_bg style=tango}"
+ )
+ ),
+ mintinline.format(
+ attrs=",".join([
+ "bgcolor=tango_bg",
+ "style=tango",
+ "showspaces",
+ "space=."
+ ]),
+ lang="cpp"
+ )
+ )
+ verify(
+ "[inline-code] non-minted attributes not forwarded",
+ pandoc_args,
+ inline_code.replace("{.cpp}", "{.cpp .showspaces .hello}"),
+ mintinline.format(attrs="showspaces", lang="cpp")
+ )
+
+
+def run_html_tests(args):
+ """
+ Run tests with an html5 writer to make sure minted commands are not used.
+ Also make sure minted specific attributes are indeed stripped.
+
+ ``args`` (list of str)
+ The base list of arguments to forward to pandoc.
+ """
+ def verify(test_name, md, attrs=[]):
+ """Verify minted and any strings in attrs not produced"""
+ output = run_pandoc(args + ["-t", "html5"], md)
+ ensure_not_present(test_name, "mint", output)
+ ensure_not_present(test_name, "fragile", output)
+ if attrs:
+ for a in attrs:
+ ensure_not_present(test_name, a, output)
+ # if `nil` is present, that likely means a problem parsing the metadata
+ ensure_not_present(test_name, "nil", output)
+
+ verify(r"[html] no \begin{minted}", code_block)
+ verify(r"[html] no \mintinline", inline_code)
+ verify(
+ r"[html] no \begin{minted} or \mintinline",
+ "{code_block}\n\n{inline_code}".format(
+ code_block=code_block, inline_code=inline_code
+ )
+ )
+ verify(
+ "[html] code block minted specific attributes stripped",
+ code_block.replace(
+ "{.cpp}",
+ "{.cpp .showspaces space=. bgcolor=minted_bg style=minted}"
+ ),
+ ["showspaces", "space", "bgcolor", "style"]
+ )
+ verify(
+ "[html] inline code minted specific attributes stripped",
+ inline_code.replace(
+ "{.cpp}",
+ "{.cpp .showspaces space=. bgcolor=minted_bg style=minted}"
+ ),
+ ["showspaces", "space", "bgcolor", "style"]
+ )
+
+
+if __name__ == "__main__":
+ # Initial path setup for input tests and lua filter
+ this_file_dir = os.path.abspath(os.path.dirname(__file__))
+ minted_lua = os.path.join(this_file_dir, "minted.lua")
+ if not os.path.isfile(minted_lua):
+ sys.stderr.write("Cannot find '{minted_lua}'...".format(
+ minted_lua=minted_lua
+ ))
+ sys.exit(1)
+
+ args = ["--fail-if-warnings", "--no-highlight", "--lua-filter", minted_lua]
+ run_tex_tests(args, "beamer")
+ run_tex_tests(args, "latex")
+ run_html_tests(args)
diff --git a/paper/lua-filters/minted/sample.md b/paper/lua-filters/minted/sample.md
new file mode 100644
index 0000000..7197047
--- /dev/null
+++ b/paper/lua-filters/minted/sample.md
@@ -0,0 +1,135 @@
+---
+title: Pandoc Minted Sample
+# NOTE: If you want to use `\definecolor` commands in your `header-includes`
+# section, setting `colorlinks: true` will `\usepackage{xcolor}` which is needed
+# for `\definecolor`. You can alternatively `\usepackage{xcolor}` explicitly in
+# in the `header-includes` section if you do not want everything else that
+# `colorlinks: true` will bring in. See `pandoc -D latex` output to see
+# everything that `colorlinks: true` will do _in addition_ to including xcolor.
+colorlinks: true
+header-includes:
+ # Include the minted package, set global style, define colors, etc.
+ - "\\usepackage{minted}"
+ - "\\usemintedstyle{tango}"
+ - "\\definecolor{tango_bg}{rgb}{0.9725,0.9725,0.9725}"
+ - "\\definecolor{monokai_bg}{rgb}{0.1529,0.1569,0.1333}"
+ # NOTE: comment out these last three and recompile to see the italics used
+ # by default for the `tango` style.
+ # Prevent italics in the `minted` environment.
+ - "\\AtBeginEnvironment{minted}{\\let\\itshape\\relax}"
+ # Prevent italics in the `\mintinline` command.
+ - "\\usepackage{xpatch}"
+ - "`\\xpatchcmd{\\mintinline}{\\begingroup}{\\begingroup\\let\\itshape\\relax}{}{}`{=latex}"
+minted:
+ block_attributes:
+ - "bgcolor=tango_bg"
+---
+
+## Inline Code in Pandoc
+
+- Raw inline code:
+
+ ```md
+ `#include <type_traits>`
+ ```
+
+ \vspace*{-3ex} produces: `#include <type_traits>`
+
+- Apply just a lexer:
+
+ ```md
+ `#include <type_traits>`{.cpp}
+ ```
+
+ \vspace*{-3ex} produces: `#include <type_traits>`{.cpp}
+
+- Change the background color and highlighting style:
+
+ ```{.md fontsize=\scriptsize}
+ <!-- Note: we defined monokai_bg in the metadata! -->
+ `#include <type_traits>`{.cpp bgcolor=monokai_bg style=monokai}
+ ```
+
+ \vspace*{-3ex} produces:
+ `#include <type_traits>`{.cpp bgcolor=monokai_bg style=monokai}
+
+ - Must **always** include language (`.cpp` here) **first**, always!
+
+## Inline Code Bypasses
+
+- Want the regular teletype text? Specify **both** the lexer class name and one
+ additional class `.no_minted`.
+
+ ```{.md}
+ <!-- The "text lexer" -->
+ `no minted`{.text .no_minted}
+ ```
+
+ \vspace*{-3ex} produces: `no mintinline`{.text .no_minted} vs `with mintinline`
+
+ - Inspect generated code, the PDF output is indistinguishable.
+
+- Alternatively, you can set `no_mintinline: true`{.yaml style=paraiso-light} to prevent the filter
+ from emitting _any_ `\mintinline`{.latex} calls.
+ - If you don't need syntax highlighting on your inline code elements, this may
+ greatly improve compile times for large documents.
+
+
+## Code Blocks
+
+- Use the defaults, but still supply the lexer:
+
+ ```bash
+ echo "Hi there" # How are you?
+ ```
+
+ \vspace*{-3ex} produces
+
+ ```bash
+ echo "Hi there" # How are you?
+ ```
+
+ \vspace*{-3ex}
+
+- As with inline code, you can change whatever you want:
+
+ ```{.bash bgcolor=monokai_bg style=monokai}
+ echo "Hi there" # How are you?
+ ```
+
+ \vspace*{-3ex} produces
+
+ ```{.bash bgcolor=monokai_bg style=monokai}
+ echo "Hi there" # How are you?
+ ```
+
+ \vspace*{-3ex}
+
+ - Must **always** include language (`.bash` here) **first**, always!
+
+
+## Special Characters are Supported
+
+- Code blocks:
+
+ ```md
+ `~!@#$%^&*()-=_+[]}{|;':",.\/<>?
+ ```
+
+ \vspace*{-3ex}
+
+- Inline code
+
+ ``with mintinline `~!@#$%^&*()-=_+[]}{|;':",.\/<>?``
+
+ Note: If you use almost all special characters *and* all alphanumeric
+ characters in a single inline code fragment, minted may not be able to find a
+ suitable delimiter to place around the \LaTeX\ inline command.
+
+- Inline code with bypass
+
+ ``no mintinline `~!@#$%^&*()-=_+[]}{|;':",.\/<>?``{.text .no_minted}
+
+- Specific lexer with mintinline: `auto foo = [](){};`{.cpp}
+- Without mintinline: `auto foo = [](){};`{.cpp .no_minted}
+ - Output color depends on `--no-highlight` flag for `pandoc`.
diff --git a/paper/lua-filters/multiple-bibliographies/Makefile b/paper/lua-filters/multiple-bibliographies/Makefile
new file mode 100644
index 0000000..a42ce28
--- /dev/null
+++ b/paper/lua-filters/multiple-bibliographies/Makefile
@@ -0,0 +1,6 @@
+test: sample.md multiple-bibliographies.lua
+ @pandoc --lua-filter=multiple-bibliographies.lua \
+ --standalone --to=native $< 2>/dev/null \
+ | diff -u - expected.native
+
+.PHONY: test
diff --git a/paper/lua-filters/multiple-bibliographies/README.md b/paper/lua-filters/multiple-bibliographies/README.md
new file mode 100644
index 0000000..0111ee6
--- /dev/null
+++ b/paper/lua-filters/multiple-bibliographies/README.md
@@ -0,0 +1,33 @@
+# multiple-bibliographies
+
+This filter allows to create multiple bibliographies using
+`pandoc-citeproc`. The content of each bibliography is controlled
+via YAML values and the file in which a bibliographic entry is
+specified.
+
+## Usage
+
+Instead of using the usual *bibliography* metadata field, all
+bibliographies must be defined via a separate field of the scheme
+*bibliographyX*, e.g.
+
+ ---
+ bibliography_main: main-bibliography.bib
+ bibliography_software: software.bib
+ ---
+
+The placement of bibliographies is controlled via special divs.
+
+ # References
+
+ ::: {#refs_main}
+ :::
+
+ # Software
+
+ ::: {#refs_software}
+ :::
+
+Each refsX div should have a matching bibliographyX entry in the
+header. These divs are filled with citations from the respective
+bib-file.
diff --git a/paper/lua-filters/multiple-bibliographies/expected.native b/paper/lua-filters/multiple-bibliographies/expected.native
new file mode 100644
index 0000000..a7ead12
--- /dev/null
+++ b/paper/lua-filters/multiple-bibliographies/expected.native
@@ -0,0 +1,14 @@
+Pandoc (Meta {unMeta = fromList [("bibliography_recommended_reading",MetaInlines [Str "secondary.bib"]),("bibliography_sources",MetaInlines [Str "primary.bib"]),("nocite",MetaInlines [Cite [Citation {citationId = "Knu86", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@Knu86"],Str ",",Space,Cite [Citation {citationId = "Bae", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@Bae"]]),("title",MetaInlines [Str "Multiple",Space,Str "Bibliographies",Space,Str "Demo"])]})
+[Para [Cite [Citation {citationId = "Nie72", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 1}] [Str "Nietzsche",Space,Str "(1872)"],Str ",",Space,Cite [Citation {citationId = "Bel", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 2}] [Str "Bellori",Space,Str "(1672)"]]
+,Header 1 ("references",[],[]) [Str "References"]
+,Div ("refs_sources",[],[])
+ [Div ("ref-Bel",[],[])
+ [Para [Str "Bellori.",Space,Str "1672.",Space,Emph [Str "Le",Space,Str "Vite",Space,Str "de\8217",Space,Str "Pittori,",Space,Str "Scultori",Space,Str "E",Space,Str "Architetti",Space,Str "Moderni"],Str "."]]
+ ,Div ("ref-Nie72",[],[])
+ [Para [Str "Nietzsche,",Space,Str "Friedrich.",Space,Str "1872.",Space,Emph [Str "Die",Space,Str "Geburt",Space,Str "Der",Space,Str "Trag\246die",Space,Str "Aus",Space,Str "Dem",Space,Str "Geiste",Space,Str "Der",Space,Str "Musik"],Str "."]]]
+,Header 1 ("recommended-reading",[],[]) [Str "Recommended",Space,Str "Reading"]
+,Div ("refs_recommended_reading",[],[])
+ [Div ("ref-Bae",[],[])
+ [Para [Str "B\228tschmann,",Space,Str "Oskar.",Space,Str "1985.",Space,Emph [Str "Pygmalion",Space,Str "Als",Space,Str "Betrachter"],Str "."]]
+ ,Div ("ref-Knu86",[],[])
+ [Para [Str "Knuth,",Space,Str "Donald",Space,Str "E.",Space,Str "1986.",Space,Emph [Str "The",Space,Str "Texbook"],Str "."]]]]
diff --git a/paper/lua-filters/multiple-bibliographies/multiple-bibliographies.lua b/paper/lua-filters/multiple-bibliographies/multiple-bibliographies.lua
new file mode 100644
index 0000000..934e6ea
--- /dev/null
+++ b/paper/lua-filters/multiple-bibliographies/multiple-bibliographies.lua
@@ -0,0 +1,110 @@
+--[[
+multiple-bibliographies – create multiple bibliographies
+
+Copyright © 2018-2019 Albert Krewinkel
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+]]
+local List = require 'pandoc.List'
+local utils = require 'pandoc.utils'
+local stringify = utils.stringify
+local run_json_filter = utils.run_json_filter
+
+--- Collection of all cites in the document
+local all_cites = {}
+--- Document meta value
+local doc_meta = pandoc.Meta{}
+
+--- Div used by pandoc-citeproc to insert the bibliography.
+local refs_div = pandoc.Div({}, pandoc.Attr('refs'))
+
+local supports_quiet_flag = (function ()
+ local version = pandoc.pipe('pandoc-citeproc', {'--version'}, '')
+ local major, minor, patch = version:match 'pandoc%-citeproc (%d+)%.(%d+)%.?(%d*)'
+ major, minor, patch = tonumber(major), tonumber(minor), tonumber(patch)
+ return major > 0
+ or minor > 14
+ or (minor == 14 and patch >= 5)
+end)()
+
+--- Resolve citations in the document by combining all bibliographies
+-- before running pandoc-citeproc on the full document.
+local function resolve_doc_citations (doc)
+ -- combine all bibliographies
+ local meta = doc.meta
+ local orig_bib = meta.bibliography
+ meta.bibliography = pandoc.MetaList{orig_bib}
+ for name, value in pairs(meta) do
+ if name:match('^bibliography_') then
+ table.insert(meta.bibliography, value)
+ end
+ end
+ -- add dummy div to catch the created bibliography
+ table.insert(doc.blocks, refs_div)
+ -- resolve all citations
+ doc = run_json_filter(doc, 'pandoc-citeproc')
+ -- remove catch-all bibliography
+ table.remove(doc.blocks)
+ -- restore bibliography to original value
+ doc.meta.bibliography = orig_bib
+ return doc
+end
+
+--- Explicitly create a new meta object with all fields relevant for
+--- pandoc-citeproc.
+local function meta_for_pandoc_citeproc (bibliography)
+ -- We could just indiscriminately copy all meta fields, but let's be
+ -- explicit about what's important.
+ local fields = {
+ 'bibliography', 'references', 'csl', 'citation-style',
+ 'link-citations', 'citation-abbreviations', 'lang',
+ 'suppress-bibliography', 'reference-section-title',
+ 'notes-after-punctuation', 'nocite'
+ }
+ local new_meta = pandoc.Meta{}
+ for _, field in ipairs(fields) do
+ new_meta[field] = doc_meta[field]
+ end
+ new_meta.bibliography = bibliography
+ return new_meta
+end
+
+--- Create a bibliography for a given topic. This acts on all divs whose
+-- ID starts with "refs", followed by nothing but underscores and
+-- alphanumeric characters.
+local function create_topic_bibliography (div)
+ local name = div.identifier:match('^refs([_%w]*)$')
+ local bibfile = name and doc_meta['bibliography' .. name]
+ if not bibfile then
+ return nil
+ end
+ local tmp_blocks = {pandoc.Para(all_cites), refs_div}
+ local tmp_meta = meta_for_pandoc_citeproc(bibfile)
+ local tmp_doc = pandoc.Pandoc(tmp_blocks, tmp_meta)
+ local filter_args = {FORMAT, supports_quiet_flag and '-q' or nil}
+ local res = run_json_filter(tmp_doc, 'pandoc-citeproc', filter_args)
+ -- First block of the result contains the dummy paragraph, second is
+ -- the refs Div filled by pandoc-citeproc.
+ div.content = res.blocks[2].content
+ return div
+end
+
+return {
+ {
+ -- Collect all citations and the doc's Meta value for other filters.
+ Cite = function (c) all_cites[#all_cites + 1] = c end,
+ Meta = function (m) doc_meta = m end,
+ },
+ { Pandoc = resolve_doc_citations },
+ { Div = create_topic_bibliography },
+}
diff --git a/paper/lua-filters/multiple-bibliographies/primary.bib b/paper/lua-filters/multiple-bibliographies/primary.bib
new file mode 100644
index 0000000..8c9decc
--- /dev/null
+++ b/paper/lua-filters/multiple-bibliographies/primary.bib
@@ -0,0 +1,10 @@
+@book{Bel,
+ author = {Bellori},
+ title = {Le vite de' pittori, scultori e architetti moderni},
+ year = {1672},
+}
+@book{Nie72,
+ author = {Nietzsche, Friedrich},
+ title = {Die Geburt der Tragödie aus dem Geiste der Musik},
+ year = {1872},
+}
diff --git a/paper/lua-filters/multiple-bibliographies/sample.md b/paper/lua-filters/multiple-bibliographies/sample.md
new file mode 100644
index 0000000..d4ab2eb
--- /dev/null
+++ b/paper/lua-filters/multiple-bibliographies/sample.md
@@ -0,0 +1,17 @@
+---
+title: Multiple Bibliographies Demo
+bibliography_sources: primary.bib
+bibliography_recommended_reading: secondary.bib
+nocite: '@Knu86, @Bae'
+---
+@Nie72, @Bel
+
+# References
+
+::: {#refs_sources}
+:::
+
+# Recommended Reading
+
+::: {#refs_recommended_reading}
+:::
diff --git a/paper/lua-filters/multiple-bibliographies/secondary.bib b/paper/lua-filters/multiple-bibliographies/secondary.bib
new file mode 100644
index 0000000..45e6306
--- /dev/null
+++ b/paper/lua-filters/multiple-bibliographies/secondary.bib
@@ -0,0 +1,10 @@
+@book{Bae,
+ author = {Bätschmann, Oskar},
+ title = {Pygmalion als Betrachter},
+ year = {1985}
+}
+@book{Knu86,
+ author = {Knuth, Donald E.},
+ year = {1986},
+ title = {The \TeX book},
+}
diff --git a/paper/lua-filters/pagebreak/Makefile b/paper/lua-filters/pagebreak/Makefile
new file mode 100644
index 0000000..c8786b0
--- /dev/null
+++ b/paper/lua-filters/pagebreak/Makefile
@@ -0,0 +1,4 @@
+test:
+ @pandoc --lua-filter=pagebreak.lua sample.md | diff -u expected.html -
+
+.PHONY: test
diff --git a/paper/lua-filters/pagebreak/README.md b/paper/lua-filters/pagebreak/README.md
new file mode 100644
index 0000000..b9a5e04
--- /dev/null
+++ b/paper/lua-filters/pagebreak/README.md
@@ -0,0 +1,68 @@
+pagebreak
+=========
+
+This filter converts paragraps containing only the LaTeX
+`\newpage` or `\pagebreak` command into appropriate pagebreak
+markup for other formats. The command must be the only contents
+of a raw TeX block in order to be recognized. I.e., for Markdown
+the following is sufficient:
+
+ Paragraph before page break
+
+ \newpage
+
+ Paragraph after page break
+
+
+Usage
+-----
+
+Fully supported output formats are:
+
+- Docx,
+- LaTeX,
+- HTML, and
+- EPUB.
+
+ODT is supported, but requires additional settings in the
+reference document (see below).
+
+In all other formats, the page break is represented using the
+form feed character.
+
+
+### Usage with HTML
+If you want to use an HTML class rather than an inline style set
+the value of the metadata key `newpage_html_class` or the
+environment variable `PANDOC_NEWPAGE_HTML_CLASS` (the metadata
+'wins' if both are defined) to the name of the class and use CSS
+like this:
+
+ @media all {
+ .page-break { display: none; }
+ }
+ @media print {
+ .page-break { display: block; page-break-after: always; }
+ }
+
+
+### Usage with ODT
+
+To use with ODT you must create a reference ODT with a named
+paragraph style called `Pagebreak` (or whatever you set the
+metadata field `newpage_odt_style` or the environment variable
+`PANDOC_NEWPAGE_ODT_STYLE` to) and define it as having no extra
+space before or after but set it to have a pagebreak after it
+<https://help.libreoffice.org/Writer/Text_Flow>.
+
+(There will be an empty dummy paragraph, which means some extra
+vertical space, and you probably want that space to go at the
+bottom of the page before the break rather than at the top of
+the page after the break!)
+
+
+Alternative syntax
+------------------
+
+The form feed character as the only element in a paragraph is
+supported as an alternative to the LaTeX syntax described above.
diff --git a/paper/lua-filters/pagebreak/expected.html b/paper/lua-filters/pagebreak/expected.html
new file mode 100644
index 0000000..7998826
--- /dev/null
+++ b/paper/lua-filters/pagebreak/expected.html
@@ -0,0 +1,6 @@
+<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec hendrerit tempor tellus. Donec pretium posuere tellus.</p>
+<div style="page-break-after: always;"></div>
+<p>Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nulla posuere. Donec vitae dolor.</p>
+<div style="page-break-after: always;"></div>
+<p>Pellentesque dapibus suscipit ligula. Donec posuere augue in quam. Suspendisse potenti.</p>
+<p>Final paragraph without a preceding pagebreak.</p>
diff --git a/paper/lua-filters/pagebreak/pagebreak.lua b/paper/lua-filters/pagebreak/pagebreak.lua
new file mode 100644
index 0000000..4c00698
--- /dev/null
+++ b/paper/lua-filters/pagebreak/pagebreak.lua
@@ -0,0 +1,97 @@
+--[[
+pagebreak – convert raw LaTeX page breaks to other formats
+
+Copyright © 2017-2019 Benct Philip Jonsson, Albert Krewinkel
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+]]
+local stringify_orig = (require 'pandoc.utils').stringify
+
+local function stringify(x)
+ return type(x) == 'string' and x or stringify_orig(x)
+end
+
+--- configs – these are populated in the Meta filter.
+local pagebreak = {
+ epub = '<p style="page-break-after: always;"> </p>',
+ html = '<div style="page-break-after: always;"></div>',
+ latex = '\\newpage{}',
+ ooxml = '<w:p><w:r><w:br w:type="page"/></w:r></w:p>',
+}
+
+local function pagebreaks_from_config (meta)
+ local html_class =
+ (meta.newpage_html_class and stringify(meta.newpage_html_class))
+ or os.getenv 'PANDOC_NEWPAGE_HTML_CLASS'
+ if html_class and html_class ~= '' then
+ pagebreak.html = string.format('<div class="%s"></div>', html_class)
+ end
+
+ local odt_style =
+ (meta.newpage_odt_style and stringify(meta.newpage_odt_style))
+ or os.getenv 'PANDOC_NEWPAGE_ODT_STYLE'
+ if odt_style and odt_style ~= '' then
+ pagebreak.odt = string.format('<text:p text:style-name="%s"/>', odt_style)
+ end
+end
+
+--- Return a block element causing a page break in the given format.
+local function newpage(format)
+ if format == 'docx' then
+ return pandoc.RawBlock('openxml', pagebreak.ooxml)
+ elseif format:match 'latex' then
+ return pandoc.RawBlock('tex', pagebreak.latex)
+ elseif format:match 'html.*' then
+ return pandoc.RawBlock('html', pagebreak.html)
+ elseif format:match 'epub' then
+ return pandoc.RawBlock('html', pagebreak.epub)
+ else
+ -- fall back to insert a form feed character
+ return pandoc.Para{pandoc.Str '\f'}
+ end
+end
+
+local function is_newpage_command(command)
+ return command:match '^\\newpage%{?%}?$'
+ or command:match '^\\pagebreak%{?%}?$'
+end
+
+-- Filter function called on each RawBlock element.
+function RawBlock (el)
+ -- Don't do anything if the output is TeX
+ if FORMAT:match 'tex$' then
+ return nil
+ end
+ -- check that the block is TeX or LaTeX and contains only
+ -- \newpage or \pagebreak.
+ if el.format:match 'tex' and is_newpage_command(el.text) then
+ -- use format-specific pagebreak marker. FORMAT is set by pandoc to
+ -- the targeted output format.
+ return newpage(FORMAT)
+ end
+ -- otherwise, leave the block unchanged
+ return nil
+end
+
+-- Turning paragraphs which contain nothing but a form feed
+-- characters into line breaks.
+function Para (el)
+ if #el.content == 1 and el.content[1].text == '\f' then
+ return newpage(FORMAT)
+ end
+end
+
+return {
+ {Meta = pagebreaks_from_config},
+ {RawBlock = RawBlock, Para = Para}
+}
diff --git a/paper/lua-filters/pagebreak/sample.md b/paper/lua-filters/pagebreak/sample.md
new file mode 100644
index 0000000..dc49ce1
--- /dev/null
+++ b/paper/lua-filters/pagebreak/sample.md
@@ -0,0 +1,14 @@
+Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec
+hendrerit tempor tellus. Donec pretium posuere tellus.
+
+\newpage
+
+Cum sociis natoque penatibus et magnis dis parturient montes,
+nascetur ridiculus mus. Nulla posuere. Donec vitae dolor.
+
+
+
+Pellentesque dapibus suscipit ligula. Donec posuere augue in
+quam. Suspendisse potenti.
+
+Final paragraph without a preceding pagebreak.
diff --git a/paper/lua-filters/plantuml/Makefile b/paper/lua-filters/plantuml/Makefile
new file mode 100644
index 0000000..feff6f4
--- /dev/null
+++ b/paper/lua-filters/plantuml/Makefile
@@ -0,0 +1,3 @@
+test:
+ @pandoc --self-contained --lua-filter=plantuml.lua --metadata title=README readme.md -o output.html
+
diff --git a/paper/lua-filters/plantuml/output.html b/paper/lua-filters/plantuml/output.html
new file mode 100644
index 0000000..67c4b58
--- /dev/null
+++ b/paper/lua-filters/plantuml/output.html
@@ -0,0 +1,45 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang xml:lang>
+<head>
+ <meta charset="utf-8" />
+ <meta name="generator" content="pandoc" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+ <title>readme</title>
+ <style type="text/css">
+ code{white-space: pre-wrap;}
+ span.smallcaps{font-variant: small-caps;}
+ span.underline{text-decoration: underline;}
+ div.column{display: inline-block; vertical-align: top; width: 50%;}
+ </style>
+ <!--[if lt IE 9]>
+ <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
+ <![endif]-->
+</head>
+<body>
+<h1 id="plantuml-pandoc-filter">PlantUML Pandoc filter</h1>
+<p>PlantUML Pandoc filter to process code blocks with class “plantuml” containing PlantUML notation into images.</p>
+<ul>
+<li>For textual output formats, use –extract-media=DIR</li>
+<li>For HTML formats, you may alternatively use –self-contained</li>
+</ul>
+<h2 id="example-in-markdown-file">Example in markdown-file</h2>
+<p><img src="" alt="PlantUML Diagramm" /></p>
+<h2 id="run-pandoc">Run pandoc</h2>
+<pre><code>pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm</code></pre>
+<h2 id="prerequisites">Prerequisites</h2>
+<ul>
+<li>download PlantUML from http://plantuml.com (needs JAVA)</li>
+<li>3 ways to set the environment
+<ol type="1">
+<li>plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder</li>
+<li>set a Environment Variable PLANTUML with the path to plantuml.jar
+<ul>
+<li>Windows - with powershell: Set-Item env:PLANTUML “c:.jar”</li>
+</ul></li>
+<li>change path to plantuml.jar in plantuml.lua</li>
+</ol></li>
+</ul>
+<p>This script based on the example “Converting ABC code to music notation” from https://pandoc.org/lua-filters.html</p>
+<p>This script was only tested with markdown to html on a windows environment!</p>
+</body>
+</html>
diff --git a/paper/lua-filters/plantuml/plantuml.lua b/paper/lua-filters/plantuml/plantuml.lua
new file mode 100644
index 0000000..7903e6a
--- /dev/null
+++ b/paper/lua-filters/plantuml/plantuml.lua
@@ -0,0 +1,56 @@
+--[[
+# PlantUML Pandoc filter
+PlantUML Pandoc filter to process code blocks with class "plantuml" containing PlantUML notation into images.
+
+* For textual output formats, use --extract-media=DIR
+* For HTML formats, you may alternatively use --self-contained
+
+## Example in markdown-file
+```plantuml
+@startuml
+Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response
+Alice -> Bob: Another authentication Request Alice <-- Bob: another authentication Response @enduml
+```
+## Run pandoc
+```
+pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm
+```
+
+## Prerequisites
+* download PlantUML from http://plantuml.com (needs JAVA)
+* 3 ways to set the environment
+ 1. plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder
+ 2. set a Environment Variable PLANTUML with the path to plantuml.jar
+ * Windows - with powershell: Set-Item env:PLANTUML "c:\bin\plantuml.jar"
+ 3. change path to plantuml.jar in plantuml.lua
+
+This script based on the example "Converting ABC code to music notation" from https://pandoc.org/lua-filters.html
+**This script was only tested with markdown to html on a windows environment!**
+]]
+
+-- Path to PlantUML.jar
+-- if you use opinion 3 change the path to plantuml.jar like this:
+-- local plantumlPath = os.getenv("PLANTUML") or "c:\\bin\\plantuml.jar"
+local plantumlPath = os.getenv("PLANTUML") or "plantuml.jar"
+
+-- SVG has a much better quality
+-- local filetype = "png"
+-- local mimetype = "image/png"
+local filetype = "svg"
+local mimetype = "image/svg+xml"
+
+-- call plantuml.jar wit some parameters (see plantuml help)
+local function plantuml(puml, filetype, plantumlPath)
+ local final = pandoc.pipe("java", {"-jar", plantumlPath, "-t" .. filetype, "-pipe", "-charset", "UTF8"}, puml)
+ return final
+end
+
+-- search for class "plantuml" and replace with image
+function CodeBlock(block)
+ if block.classes[1] == "plantuml" then
+ local img = plantuml(block.text, filetype, plantumlPath)
+ local fname = pandoc.sha1(img) .. "." .. filetype
+ pandoc.mediabag.insert(fname, mimetype, img)
+ return pandoc.Para{ pandoc.Image({pandoc.Str("PlantUML Diagramm")}, fname) }
+ end
+end
diff --git a/paper/lua-filters/plantuml/readme.md b/paper/lua-filters/plantuml/readme.md
new file mode 100644
index 0000000..de5ba74
--- /dev/null
+++ b/paper/lua-filters/plantuml/readme.md
@@ -0,0 +1,30 @@
+# PlantUML Pandoc filter
+PlantUML Pandoc filter to process code blocks with class "plantuml" containing PlantUML notation into images.
+
+* For textual output formats, use --extract-media=DIR
+* For HTML formats, you may alternatively use --self-contained
+
+## Example in markdown-file
+```plantuml
+@startuml
+Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response
+Alice -> Bob: Another authentication Request Alice <-- Bob: another authentication Response
+@enduml
+```
+## Run pandoc
+```
+pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm
+```
+
+## Prerequisites
+* download PlantUML from http://plantuml.com (needs JAVA)
+* 3 ways to set the environment
+ 1. plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder
+ 2. set a Environment Variable PLANTUML with the path to plantuml.jar
+ * Windows - with powershell: Set-Item env:PLANTUML "c:\bin\plantuml.jar"
+ 3. change path to plantuml.jar in plantuml.lua
+
+
+This script based on the example "Converting ABC code to music notation" from https://pandoc.org/lua-filters.html
+
+This script was only tested with markdown to html on a windows environment!
diff --git a/paper/lua-filters/runtests.sh b/paper/lua-filters/runtests.sh
new file mode 100755
index 0000000..bbcbbba
--- /dev/null
+++ b/paper/lua-filters/runtests.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# argument is list of filters
+
+FILTERS=$*
+let err=0
+for d in $FILTERS ; do
+ make --no-print-directory -C $d test
+ if [ $? -eq 0 ]; then
+ echo "PASS $d"
+ else
+ echo "FAIL $d"
+ err=1
+ fi
+done
+exit $err
+
diff --git a/paper/lua-filters/scholarly-metadata/Makefile b/paper/lua-filters/scholarly-metadata/Makefile
new file mode 100644
index 0000000..fadf7dd
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/Makefile
@@ -0,0 +1,8 @@
+test: sample.md scholarly-metadata.lua
+ @pandoc --lua-filter=scholarly-metadata.lua --standalone --to=markdown $< \
+ | diff -u expected.md -
+
+expected.md: sample.md scholarly-metadata.lua
+ pandoc --lua-filter=scholarly-metadata.lua --standalone --output $@ $<
+
+.PHONY: test
diff --git a/paper/lua-filters/scholarly-metadata/README.md b/paper/lua-filters/scholarly-metadata/README.md
new file mode 100644
index 0000000..7fb1d2a
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/README.md
@@ -0,0 +1,91 @@
+# scholarly-metadata
+
+The filter turns metadata entries for authors and their
+affiliations into a canonical form. This allows users to
+conveniently declare document authors and their affiliations,
+while making it possible to rely on default object metadata
+structures when using the data in other filters or when accessing
+the data from custom templates.
+
+
+## Canonical format for authors and affiliations
+
+Authors and affiliations entries are treated as *named objects*.
+All named objects will have an ID and a name, i.e. they are
+metadata objects with *at least* those two keys:
+
+ - id: namedObjectExample
+ name: Example for a named object.
+
+The filter converts the *author* and *institute* metadata fields
+into lists of named objects.
+
+E.g., the following YAML data
+
+ author:
+ - Jane Doe:
+ email: 'jane.doe@example.edu'
+ - John Q. Doe
+
+
+will be transformed into
+
+ author:
+ - email: 'jane.doe\@example.edu'
+ id: Jane Doe
+ name: Jane Doe
+ - id: 'John Q. Doe'
+ name: 'John Q. Doe'
+
+Internally, `id` will be a simple string, while `name` is of type
+`MetaInlines`.
+
+
+## Referencing affiliations
+
+Author affiliations are a common feature of scholarly
+publications. It is possible to add institutes to each author
+object. Three methods of doing this are supported.
+
+1. **Referencing institutes by list index**: affiliations can be
+ listed in the *institute* metadata field and then referenced
+ by using the numerical index:
+
+ institute:
+ - Acme Corporation
+ - Federation of Planets
+ author:
+ - Jane Doe:
+ institute: [1, 2]
+ - John Q. Doe:
+ institute: [2]
+
+ This is also the canonical representation used to keep track
+ of author affiliations.
+
+2. **Referencing institutes by ID**: using numerical indices is
+ error prone and difficult to maintain when adding or removing
+ authors or affilications. It is hence possible to use IDs
+ instead:
+
+ institute:
+ - acme: Acme Corporation
+ - federation: Federation of Planets
+ author:
+ - Jane Doe:
+ institute: [acme, federation]
+ - John Q. Doe:
+ institute: [federation]
+
+3. **Adding institute as an attribute**: sometimes it might be
+ more convenient to give an affiliation directly in the
+ author's YAML object. Those objects can still be referenced
+ by ID from authors listed below such entry.
+
+ author:
+ - Jane Doe:
+ institute:
+ - Acme Cooproration
+ - federation: Federation of Planets
+ - John Q. Doe:
+ institute: [federation]
diff --git a/paper/lua-filters/scholarly-metadata/expected.md b/paper/lua-filters/scholarly-metadata/expected.md
new file mode 100644
index 0000000..353436f
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/expected.md
@@ -0,0 +1,41 @@
+---
+author:
+- id: Jane Doe
+ institute:
+ - 1
+ - 2
+ name: Jane Doe
+- id: 'John Q. Doe'
+ institute:
+ - 1
+ name: 'John Q. Doe'
+- id: Peder Ås
+ institute:
+ - 1
+ name: Peder Ås
+- id: Juan Pérez
+ institute:
+ - 3
+ name: Juan Pérez
+- id: Max Mustermann
+ name: Max Mustermann
+institute:
+- address: '23 Science Street, Eureka, Mississippi, USA'
+ id: fosg
+ name: Formatting Open Science Group
+- id: fop
+ name: Federation of Planets
+- id: Acme Corporation
+ name: Acme Corporation
+---
+
+Abstract
+========
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim
+veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea
+commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
+velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint
+occaecat cupidatat non proident, sunt in culpa qui officia deserunt
+mollit anim id est laborum.
diff --git a/paper/lua-filters/scholarly-metadata/sample.md b/paper/lua-filters/scholarly-metadata/sample.md
new file mode 100644
index 0000000..855272e
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/sample.md
@@ -0,0 +1,30 @@
+---
+author:
+ - Jane Doe:
+ institute:
+ - fosg
+ - fop
+ - John Q. Doe:
+ institute: fosg
+ - Peder Ås:
+ institute: fosg
+ - Juan Pérez:
+ institute:
+ - name: Acme Corporation
+ - Max Mustermann
+institute:
+ - fosg:
+ name: Formatting Open Science Group
+ address: 23 Science Street, Eureka, Mississippi, USA
+ - fop: Federation of Planets
+...
+
+# Abstract
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
+eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut
+enim ad minim veniam, quis nostrud exercitation ullamco laboris
+nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in
+reprehenderit in voluptate velit esse cillum dolore eu fugiat
+nulla pariatur. Excepteur sint occaecat cupidatat non proident,
+sunt in culpa qui officia deserunt mollit anim id est laborum.
diff --git a/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua b/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua
new file mode 100644
index 0000000..3ec529c
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua
@@ -0,0 +1,180 @@
+--[[
+ScholarlyMeta – normalize author/affiliation meta variables
+
+Copyright (c) 2017-2019 Albert Krewinkel, Robert Winkler
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+]]
+local List = require 'pandoc.List'
+
+-- Split a string at commas.
+local function comma_separated_values(str)
+ local acc = List:new{}
+ for substr in str:gmatch('([^,]*)') do
+ acc[#acc + 1] = substr:gsub('^%s*', ''):gsub('%s*$', '') -- trim
+ end
+ return acc
+end
+
+--- Ensure the return value is a list.
+local function ensure_list (val)
+ if type(val) ~= 'table' then
+ -- create singleton list (or empty list if val == nil).
+ return List:new{val}
+ elseif val.t == 'MetaInlines' then
+ -- check if this is really a comma-separated list
+ local csv = comma_separated_values(pandoc.utils.stringify(val))
+ if #csv >= 2 then
+ return csv
+ end
+ return List:new{val}
+ elseif val.t == 'MetaList' then
+ return List:new(val)
+ else
+ -- MetaBlocks or MetaMap, use as a singleton
+ return List:new{val}
+ end
+end
+
+--- Returns a function which checks whether an object has the given ID.
+local function has_id (id)
+ return function(x) return x.id == id end
+end
+
+--- Copy all key-value pairs of the first table into the second iff there is no
+-- such key yet in the second table.
+-- @returns the second argument
+function add_missing_entries(a, b)
+ for k, v in pairs(a) do
+ b[k] = b[k] or v
+ end
+ return b
+end
+
+--- Create an object with a name. The name is either taken directly from the
+-- `name` field, or from the *only* field name (i.e., key) if the object is a
+-- dictionary with just one entry. If neither exists, the name is left unset
+-- (`nil`).
+function to_named_object (obj)
+ local named = {}
+ if type(obj) ~= 'table' then
+ -- if the object isn't a table, just use its value as a name.
+ named.name = pandoc.MetaInlines{pandoc.Str(tostring(obj))}
+ named.id = tostring(obj)
+ elseif obj.t == 'MetaInlines' then
+ -- Treat inlines as the name
+ named.name = obj
+ named.id = pandoc.utils.stringify(obj)
+ elseif obj.name ~= nil then
+ -- object has name attribute → just create a copy of the object
+ add_missing_entries(obj, named)
+ named.id = pandoc.utils.stringify(named.id or named.name)
+ elseif next(obj) and next(obj, next(obj)) == nil then
+ -- the entry's key is taken as the name, the value contains the
+ -- attributes.
+ key, attribs = next(obj)
+ if type(attribs) == "string" or attribs.t == 'MetaInlines' then
+ named.name = attribs
+ else
+ add_missing_entries(attribs, named)
+ named.name = named.name or pandoc.MetaInlines{pandoc.Str(tostring(key))}
+ end
+ named.id = named.id and pandoc.utils.stringify(named.id) or key
+ else
+ -- this is not a named object adhering to the usual conventions.
+ error('not a named object: ' .. tostring(obj))
+ end
+ return named
+end
+
+--- Resolve institute placeholders to full named objects
+local function resolve_institutes (institute, known_institutes)
+ local unresolved_institutes
+ if institute == nil then
+ unresolved_institutes = {}
+ elseif type(institute) == "string" or type(institute) == "number" then
+ unresolved_institutes = {institute}
+ else
+ unresolved_institutes = institute
+ end
+
+ local result = List:new{}
+ for i, inst in ipairs(unresolved_institutes) do
+ result[i] =
+ known_institutes[tonumber(inst)] or
+ known_institutes:find_if(has_id(pandoc.utils.stringify(inst))) or
+ to_named_object(inst)
+ end
+ return result
+end
+
+--- Insert a named object into a list; if an object of the same name exists
+-- already, add all properties only present in the new object to the existing
+-- item.
+function merge_on_id (list, namedObj)
+ local elem, idx = list:find_if(has_id(namedObj.id))
+ local res = elem and add_missing_entries(namedObj, elem) or namedObj
+ local obj_idx = idx or (#list + 1)
+ -- return res, obj_idx
+ list[obj_idx] = res
+ return res, #list
+end
+
+--- Flatten a list of lists.
+local function flatten (lists)
+ local result = List:new{}
+ for _, lst in ipairs(lists) do
+ result:extend(lst)
+ end
+ return result
+end
+
+--- Canonicalize authors and institutes
+local function canonicalize(raw_author, raw_institute)
+ local institutes = ensure_list(raw_institute):map(to_named_object)
+ local authors = ensure_list(raw_author):map(to_named_object)
+
+ for _, author in ipairs(authors) do
+ author.institute = resolve_institutes(
+ ensure_list(author.institute),
+ institutes
+ )
+ end
+
+ -- Merge institutes defined in author objects with those defined in the
+ -- top-level list.
+ local author_insts = flatten(authors:map(function(x) return x.institute end))
+ for _, inst in ipairs(author_insts) do
+ merge_on_id(institutes, inst)
+ end
+
+ -- replace institutes with their indices
+ local to_index = function (inst)
+ return tostring(select(2, institutes:find_if(has_id(inst.id))))
+ end
+ for _, author in ipairs(authors) do
+ author.institute = pandoc.MetaList(author.institute:map(to_index))
+ end
+
+ return authors, institutes
+end
+
+
+return {
+ {
+ Meta = function(meta)
+ meta.author, meta.institute = canonicalize(meta.author, meta.institute)
+ return meta
+ end
+ }
+}
diff --git a/paper/lua-filters/scrlttr2/Makefile b/paper/lua-filters/scrlttr2/Makefile
new file mode 100644
index 0000000..acd4c7e
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/Makefile
@@ -0,0 +1,9 @@
+test: sample.md scrlttr2.lua sample.pdf
+ @pandoc --to=latex --lua-filter=scrlttr2.lua -s sample.md | \
+ sh expected-strings.sh
+ @rm sample.pdf
+
+%.pdf: %.md scrlttr2.lua
+ @pandoc --lua-filter=scrlttr2.lua --output=$@ $<
+
+.PHONY: test
diff --git a/paper/lua-filters/scrlttr2/README.md b/paper/lua-filters/scrlttr2/README.md
new file mode 100644
index 0000000..2a4e440
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/README.md
@@ -0,0 +1,60 @@
+# scrlttr2
+
+This filter allows to write DIN 5008 letter using the [scrlttr2]
+LaTeX document class from KOMA script. It converts metadata to
+the appropriate KOMA variables and allows using the default LaTeX
+template shipped with pandoc.
+
+[scrlttr2]: https://www.ctan.org/pkg/scrlttr2
+
+## Base variables
+
+ - `opening`: phrase used as an opening;
+ defaults to "Dear Sir/Madam,"
+ - `closing`: closing phrase; defaults to "Sincerely,"
+ - `address`: recipient's street address;
+ defaults to "no address given"
+ - `date`: the date of the letter; defaults to the current day.
+
+## KOMA Variables
+
+Currently, the following metadata fields are translated to KOMA
+variables:
+
+- `fromaddress` (alias: `return-address`): address of the sender
+- `fromfax` (alias: `fax`): sender's fax number
+- `fromemail` (alias: `email`): sender's email
+- `fromlogo` (alias: `logo`): image to be used as the sender's logo
+- `fromname` (alias: `author`): sender name
+- `fromphone` (alias: `phone`): sender's phone number
+- `fromurl` (alias: `url`): sender's URL
+- `customer`: customer number
+- `invoice`: invoice number
+- `myref`: sender's reference
+- `place`: sender's place used near date
+- `signature`: sender's signature
+- `subject`: letter's subject
+- `title`: letter title
+- `yourref`: addressee's reference
+
+The values of these variables are converted to MetaInlines. If a
+list is given, then each list item is used as a line, e.g.,
+
+ fromaddress:
+ - 35 Industry Way
+ - Springfield
+
+The `KOMAoptions` value is inferred from the given variables, but
+can be overwritten by specifying it explicitly.
+
+See the scrlttr2 documentation for details.
+
+## Intended Usage
+
+Many sender variables don't change, so it is sensible to provide
+default values for these. Authors using Markdown to draft letters
+can use a separate YAML file for this. E.g., if there is a file
+`default.yml` which contains the sender's details, then only the
+addressee's data must be specified.
+
+ pandoc --lua-filter=scrlttr2 letter.md default.yml -o out.pdf
diff --git a/paper/lua-filters/scrlttr2/expected-strings.sh b/paper/lua-filters/scrlttr2/expected-strings.sh
new file mode 100644
index 0000000..f2b54c2
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/expected-strings.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+latex_result="$(cat -)"
+
+assert_contains ()
+{
+ printf '%s' "$latex_result" | grep -qF "$1" -
+ if [ $? -ne 0 ]; then
+ printf 'Output does not contain `%s`.\n' "$1" >&2
+ exit 1
+ fi
+}
+
+# whether we are using the scrlttr2 class
+assert_contains '{scrlttr2}'
+
+assert_contains '\setkomavar{fromname}{Jane Doe}'
+assert_contains '\setkomavar{fromaddress}{35 Industry Way\\ Springfield}'
+assert_contains '\setkomavar{subject}{Letter of Reference}'
+assert_contains '\setkomavar{date}{February 29, 2020}'
+
+# Custom opening and default closing
+assert_contains '\opening{To Whom It May Concern,}'
+assert_contains '\closing{Sincerely,}'
+
+# Author and date
+assert_contains '\author{Jane Doe}'
+assert_contains '\date{February 29, 2020}'
+
+# Recipient address
+assert_contains '\begin{letter}{Fireworks Inc.\\ 123 Fake St\\ 58008 Springfield}'
diff --git a/paper/lua-filters/scrlttr2/sample.md b/paper/lua-filters/scrlttr2/sample.md
new file mode 100644
index 0000000..5f13554
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/sample.md
@@ -0,0 +1,16 @@
+---
+author: Jane Doe
+fromaddress:
+ - 35 Industry Way
+ - Springfield
+opening: To Whom It May Concern,
+subject: Letter of Reference
+date: February 29, 2020
+address:
+ - Fireworks Inc.
+ - 123 Fake St
+ - 58008 Springfield
+...
+
+I strongly recommend to embiggen your team by giving John Doe the position of a
+yak shaver. He has shown cromulent performance as a bike shedder.
diff --git a/paper/lua-filters/scrlttr2/scrlttr2.lua b/paper/lua-filters/scrlttr2/scrlttr2.lua
new file mode 100644
index 0000000..78f38fd
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/scrlttr2.lua
@@ -0,0 +1,161 @@
+-- Ensure unpack also works if pandoc was compiled against Lua 5.1
+local unpack = unpack or table.unpack
+local List = require 'pandoc.List'
+local stringify = (require 'pandoc.utils')['stringify']
+
+--- Set some default options
+local default = {
+ opening = 'Dear Sir/Madam,',
+ closing = 'Sincerely,',
+ address = 'no address given'
+}
+
+--- Return a list of inlines representing a call to a latex command.
+local function latex_command (command, ...)
+ local entry = {
+ pandoc.RawInline('latex', '\\' .. command),
+ }
+ for _, arg in ipairs{...} do
+ entry[#entry + 1] = pandoc.RawInline('latex', '{')
+ if type(arg) ~= 'table' then
+ entry[#entry + 1] = pandoc.RawInline('latex', tostring(arg))
+ else
+ List.extend(entry, arg)
+ end
+ entry[#entry + 1] = pandoc.RawInline('latex', '}')
+ end
+ return entry
+end
+
+--- Convert the given meta-value to a list of inlines
+local function ensure_inlines (val)
+ if not val or type(val) == 'string' or type(val) == 'boolean' then
+ return pandoc.MetaInlines{pandoc.Str(tostring(val))}
+ elseif type(val) == 'table' and val.t == 'MetaInlines' then
+ return val
+ elseif type(val) == 'table' then
+ local res = List:new{}
+ for i = 1, #val do
+ res:extend(val[i])
+ res[#res + 1] = pandoc.RawInline('latex', '\\\\ ')
+ end
+ res[#res] = nil -- drop last linebreak
+ return pandoc.MetaInlines(res)
+ else
+ return pandoc.MetaInlines{pandoc.Str(pandoc.utils.stringify(val))}
+ end
+end
+
+--- Convert the given value to a MetaList
+local function ensure_meta_list (val)
+ if not val or val.t ~= 'MetaList' then
+ return pandoc.MetaList{}
+ else
+ return val
+ end
+end
+
+--- Set supported variables as KOMA variables.
+function setkomavar_commands (meta)
+ local set_vars = {}
+ local res = {}
+ local function set_koma_var (name, value, enable)
+ if value ~= nil then
+ res[#res + 1] = latex_command('setkomavar', name, ensure_inlines(value))
+ if enable then
+ set_vars[#set_vars + 1] = name
+ end
+ end
+ end
+
+ set_koma_var('fromname', meta.fromname or meta.author)
+ set_koma_var('fromaddress', meta.fromaddress or meta['return-address'])
+ set_koma_var('subject', meta.subject)
+ set_koma_var('title', meta.title)
+ set_koma_var('signature', meta.signature)
+ set_koma_var('customer', meta.customer)
+ set_koma_var('yourref', meta.yourref)
+ set_koma_var('myref', meta.myref)
+ set_koma_var('invoice', meta.invoice)
+ set_koma_var('place', meta.place)
+
+ set_koma_var('fromfax', meta.fromfax or meta.fax, true)
+ set_koma_var('fromurl', meta.fromurl or meta.url, true)
+ set_koma_var('fromlogo', meta.fromlogo or meta.logo, true)
+ set_koma_var('fromemail', meta.fromemail or meta.email, true)
+ set_koma_var('fromphone', meta.fromphone or meta.phone, true)
+
+ -- don't set date if date is set to `false`
+ if meta.date == nil or meta.date == true then
+ if meta['date-format'] then
+ set_koma_var('date', os.date(stringify(date_format)))
+ else
+ set_koma_var('date', pandoc.MetaInlines{pandoc.RawInline('latex', '\\today')})
+ end
+ elseif meta.date then
+ set_koma_var('date', meta.date)
+ end
+
+ if meta['KOMAoptions'] or #set_vars >= 1 then
+ res[#res + 1] = latex_command(
+ 'KOMAoptions',
+ meta['KOMAoptions']
+ or table.concat(set_vars, '=true,') .. '=true'
+ )
+ end
+
+ return res
+end
+
+--- Bring Metadata in a form suitable for the scrlttr KOMA class
+local function make_koma_metadata(meta)
+ local header_includes = ensure_meta_list(meta['header-includes'])
+ List.extend(header_includes, setkomavar_commands(meta))
+
+ local include_before = ensure_meta_list(meta['include-before'])
+ List.extend(
+ include_before,
+ {
+ pandoc.MetaInlines(
+ latex_command(
+ 'begin',
+ 'letter',
+ ensure_inlines(meta.address or default.address)
+ )
+ ),
+
+ pandoc.MetaInlines(
+ latex_command('opening', meta.opening or default.opening)
+ ),
+ }
+ )
+
+ local include_after = ensure_meta_list(meta['include-after'])
+ List.extend(
+ include_after,
+ {
+ pandoc.MetaInlines(
+ latex_command('closing', meta.closing or default.closing)
+ ),
+ pandoc.MetaInlines(latex_command('end', 'letter')),
+ }
+ )
+
+ -- unset or reset some unwanted vars
+ meta.data = nil -- set via komavar 'date'
+ meta.title = nil -- set via komavar 'subject'
+ meta.indent = true -- disable parskib
+ -- set documentclass to scrlttr2 if it's unset
+ meta.documentclass = meta.documentclass or pandoc.MetaString'scrlttr2'
+
+
+ meta['header-includes'] = header_includes
+ meta['include-before'] = include_before
+ meta['include-after'] = include_after
+
+ return meta
+end
+
+return {
+ {Meta = make_koma_metadata}
+}
diff --git a/paper/lua-filters/section-refs/Makefile b/paper/lua-filters/section-refs/Makefile
new file mode 100644
index 0000000..dcceb70
--- /dev/null
+++ b/paper/lua-filters/section-refs/Makefile
@@ -0,0 +1,26 @@
+OPTIONS_test_default := -t native \
+ -M bibliography=bibliography.bib \
+ -F pandoc-citeproc \
+ --lua-filter=section-refs.lua
+
+OPTIONS_test_no_citeproc := -t native \
+ --lua-filter=section-refs.lua
+
+OPTIONS_test_refs_name := -t native \
+ -M bibliography=bibliography.bib \
+ -M reference-section-title="Works Cited" \
+ -F pandoc-citeproc \
+ --lua-filter=section-refs.lua
+
+OPTIONS_test_section_level := -t native \
+ -M bibliography=bibliography.bib \
+ -M reference-section-title="Works Cited" \
+ -M section-refs-level=2\
+ -F pandoc-citeproc \
+ --lua-filter=section-refs.lua
+
+.PHONY: test
+test: test_default test_no_citeproc test_refs_name test_section_level
+
+test_%: expected_%.native sample.md bibliography.bib
+ @pandoc sample.md $(OPTIONS_$@) | diff --strip-trailing-cr -u $< -
diff --git a/paper/lua-filters/section-refs/README.md b/paper/lua-filters/section-refs/README.md
new file mode 100644
index 0000000..941bc36
--- /dev/null
+++ b/paper/lua-filters/section-refs/README.md
@@ -0,0 +1,19 @@
+# section-refs
+
+This filter allows the user to put bibliographies at the end of each
+section, containing only those references in the section. It works on
+the output of `pandoc-citeproc`, and so must be run after
+`pandoc-citeproc`. For example:
+
+~~~
+pandoc input.md -F pandoc-citerproc --lua-filter section-refs.lua
+~~~
+
+It allows curstomization through two metadata fields:
+`reference-section-title` and `section-refs-level` (default 1). The
+`section-refs-level` variable controls what level the biblography will
+occur at the end of. The header of the generated references section will
+be one level higher than `section-refs-level` (so if it occurs at the
+end of a level-1 section, it will receive a level-2 header, and so on).
+
+This filter requires pandoc version >= 2.1.
diff --git a/paper/lua-filters/section-refs/bibliography.bib b/paper/lua-filters/section-refs/bibliography.bib
new file mode 100644
index 0000000..7ce54d5
--- /dev/null
+++ b/paper/lua-filters/section-refs/bibliography.bib
@@ -0,0 +1,70 @@
+@BOOK{ainsworth:sheppard,
+ title = {Jack Sheppard: A Romance},
+ author = {William Harrison Ainsworth},
+ address = {London},
+ publisher = {George Routledge \& Sons},
+ year = {1900},
+ shorttitle = {Jack Sheppard},
+}
+
+@Article{altick:aldine,
+ author = {Richard D. Altick},
+ title = {From Aldine to Everyman: Cheap Reprint Series of the
+ English Classics 1830--1906},
+ journal = {Studies in Bibliography},
+ year = 1958,
+ volume = 11,
+ pages = {3--24}
+}
+
+@BOOK{cohen:jokes,
+ title = {Jokes: Philosophical Thoughts on Joking Matters},
+ publisher = {University of Chicago Press},
+ year = 1999,
+ author = {Ted Cohen},
+ address = {Chicago},
+ shorttitle = {Jokes}
+}
+
+@Book{dames:physiology,
+ author = {Nicholas Dames},
+ title = {The Physiology of the Novel: Reading, Neural
+ Science, and the Form of Victorian Fiction},
+ publisher = {Oxford University Press},
+ year = 2007,
+ address = {Oxford},
+ shorttitle = {Physiology}
+}
+
+@Book{kant:critique2,
+ author = {Immanuel Kant},
+ editor = {Mary Gregor},
+ translator = {Mary Gregor},
+ title = {Critique of Practical Reason},
+ publisher = {Cambridge University Press},
+ year = 2001,
+ address = {Cambridge, UK},
+ shorttitle = {Practical}
+}
+
+@Book{lukacs:european,
+ author = {Georg Luk{\'a}cs},
+ title = {Studies in European Realism: A Sociological Survey
+ of the Writings of Balzac, Stendhal, Zola, Tolstoy,
+ Gorki, and Others},
+ publisher = {The Merlin Press},
+ year = 1989,
+ translator = {Edith Bone},
+ address = {London},
+ shorttitle = {Studies}
+}
+
+@Book{trollope:autobiography,
+ author = {Anthony Trollope},
+ editor = {Michael Sadleir and Frederick Page},
+ title = {An Autobiography},
+ publisher = {Oxford University Press},
+ year = 1999,
+ address = {Oxford},
+ origdate = 1883
+}
diff --git a/paper/lua-filters/section-refs/expected_default.native b/paper/lua-filters/section-refs/expected_default.native
new file mode 100644
index 0000000..b1c6945
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_default.native
@@ -0,0 +1,25 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."]
+,Div ("refs-1",["references"],[])
+ [Div ("ref-ainsworth:sheppard",[],[])
+ [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]]
+ ,Div ("ref-altick:aldine",[],[])
+ [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-kant:critique2",[],[])
+ [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."]
+,Div ("refs-2",["references"],[])
+ [Div ("ref-cohen:jokes",[],[])
+ [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]]
+ ,Div ("ref-lukacs:european",[],[])
+ [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]]
+ ,Div ("ref-trollope:autobiography",[],[])
+ [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]]
diff --git a/paper/lua-filters/section-refs/expected_no_citeproc.native b/paper/lua-filters/section-refs/expected_no_citeproc.native
new file mode 100644
index 0000000..2219b6e
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_no_citeproc.native
@@ -0,0 +1,7 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@ainsworth:sheppard",Space,Str "27]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@dames:physiology]"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@kant:critique2",Space,Str "29]"],Str "."]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@altick:aldine",Space,Str "20]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@dames:physiology]"],Str "."]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@lukacs:european",Space,Str "125]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@cohen:jokes",Space,Str "3]"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@trollope:autobiography",Space,Str "392]"],Str "."]]
diff --git a/paper/lua-filters/section-refs/expected_refs_name.native b/paper/lua-filters/section-refs/expected_refs_name.native
new file mode 100644
index 0000000..0d026ed
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_refs_name.native
@@ -0,0 +1,27 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."]
+,Div ("refs-1",["references"],[])
+ [Header 2 ("bibliography-1",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-ainsworth:sheppard",[],[])
+ [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]]
+ ,Div ("ref-altick:aldine",[],[])
+ [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-kant:critique2",[],[])
+ [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."]
+,Div ("refs-2",["references"],[])
+ [Header 2 ("bibliography-2",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-cohen:jokes",[],[])
+ [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]]
+ ,Div ("ref-lukacs:european",[],[])
+ [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]]
+ ,Div ("ref-trollope:autobiography",[],[])
+ [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]]
diff --git a/paper/lua-filters/section-refs/expected_section_level.native b/paper/lua-filters/section-refs/expected_section_level.native
new file mode 100644
index 0000000..1d3c89e
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_section_level.native
@@ -0,0 +1,31 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Div ("refs-1",["references"],[])
+ [Header 3 ("bibliography-1",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."]
+,Div ("refs-2",["references"],[])
+ [Header 3 ("bibliography-2",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-ainsworth:sheppard",[],[])
+ [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-kant:critique2",[],[])
+ [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."]
+,Div ("refs-3",["references"],[])
+ [Header 3 ("bibliography-3",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-altick:aldine",[],[])
+ [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."]
+,Div ("refs-4",["references"],[])
+ [Header 3 ("bibliography-4",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-cohen:jokes",[],[])
+ [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]]
+ ,Div ("ref-lukacs:european",[],[])
+ [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]]
+ ,Div ("ref-trollope:autobiography",[],[])
+ [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]]
diff --git a/paper/lua-filters/section-refs/sample.md b/paper/lua-filters/section-refs/sample.md
new file mode 100644
index 0000000..00ddb86
--- /dev/null
+++ b/paper/lua-filters/section-refs/sample.md
@@ -0,0 +1,18 @@
+# Here is one section
+
+## A subsection
+
+Here is something [@ainsworth:sheppard 27]. And here is something else
+[@dames:physiology]. Finally, we want to make sure that we have one last
+citation here [@kant:critique2 29].
+
+## Another subsection.
+
+Here is something [@altick:aldine 20]. And here is something repeated
+[@dames:physiology].
+
+# Here is another section
+
+Here is something [@lukacs:european 125]. And here is something else
+[@cohen:jokes 3]. Finally, we want to make sure that we have one last
+citation here [@trollope:autobiography 392].
diff --git a/paper/lua-filters/section-refs/section-refs.lua b/paper/lua-filters/section-refs/section-refs.lua
new file mode 100644
index 0000000..68e61d0
--- /dev/null
+++ b/paper/lua-filters/section-refs/section-refs.lua
@@ -0,0 +1,138 @@
+function is_ref_div (blk)
+ return (blk.t == "Div" and blk.identifier == "refs")
+end
+
+function is_ref_header (blk)
+ return (blk.t == "Header" and blk.identifier == "bibliography")
+end
+
+function get_all_refs (blks)
+ for _, b in pairs(blks) do
+ if is_ref_div(b) then
+ return b.content
+ end
+ end
+end
+
+function remove_all_refs (blks)
+ local out = {}
+ for _, b in pairs(blks) do
+ if not (is_ref_div(b) or is_ref_header(b)) then
+ table.insert(out, b)
+ end
+ end
+ return out
+end
+
+-- We return a {number, ref} pair so we can sort in the individual
+-- bibliographies.
+function citation_to_numbered_ref (citation, all_refs)
+ local div_id = "ref-" .. citation.id
+ for i, d in ipairs(all_refs) do
+ if d.t == "Div" and d.identifier == div_id then
+ return {i, d}
+ end
+ end
+end
+
+
+function get_partial_refs (blocks, all_refs)
+ local cites = {}
+ local citegetter = {
+ Cite = function (el)
+ for _, c in pairs(el.citations) do
+ table.insert(cites, c)
+ end
+ end
+ }
+
+ for _, b in pairs(blocks) do
+ pandoc.walk_block(b, citegetter)
+ end
+
+
+ -- first we make a list of the {number, ref} pairs so we can sort
+ -- them. Then after sorting, we're going to make a new list with
+ -- only the second element.
+ local numbered_refs = {}
+ for _, c in pairs(cites) do
+ local r = citation_to_numbered_ref(c, all_refs)
+ if r then
+ table.insert(numbered_refs, r)
+ end
+ end
+
+ table.sort(numbered_refs, function(x, y) return x[1] < y[1] end)
+
+ local refs = {}
+ for _, nr in pairs(numbered_refs) do
+ table.insert(refs, nr[2])
+ end
+
+ return refs
+end
+
+function add_section_refs (blks, lvl, refs_title, all_refs)
+ local output_blks = {}
+ local section = {}
+ local refs_num = 0
+
+ local go = function ()
+ refs_num = refs_num + 1
+ local section_refs = get_partial_refs(section, all_refs)
+ if refs_title then
+ local hdr = pandoc.Header(lvl + 1,
+ refs_title,
+ pandoc.Attr("bibliography-" .. tostring(refs_num),
+ {"unnumbered"}))
+ table.insert(section_refs, 1, hdr)
+ end
+ local refs_div = pandoc.Div(section_refs,
+ pandoc.Attr("refs-" .. tostring(refs_num),
+ {"references"}))
+ table.insert(section, refs_div)
+ for _, x in pairs(section) do
+ table.insert(output_blks, x)
+ end
+ end
+
+ -- to avoid putting a bib after an intro paragraph.
+ local seen_hdr_before = false
+ for _, b in pairs(blks) do
+ if b.t == "Header" and b.level <= lvl then
+ if seen_hdr_before then
+ go()
+ section = {b}
+ else
+ seen_hdr_before = true
+ table.insert(section, b)
+ end
+ else
+ table.insert(section, b)
+ end
+ end
+ go()
+ return output_blks
+end
+
+function Pandoc(doc)
+ if PANDOC_VERSION == nil then -- if pandoc_version < 2.1
+ io.stderr:write("WARNING: pandoc >= 2.1 required for section-refs filter\n")
+ return doc
+ end
+ local refs_title = doc.meta["reference-section-title"]
+ -- if we get it from a command-line field, read it in as md.
+ if type(refs_title) == "string" then
+ refs_title = pandoc.read(refs_title, "markdown").blocks[1].content
+ end
+ local lvl = tonumber(doc.meta["section-refs-level"]) or 1
+ local all_refs = get_all_refs(doc.blocks)
+ -- we only want to do something if there are refs to work
+ -- with. This way, if this is run without pandoc-citeproc, it will
+ -- just return the same document.
+ if all_refs then
+ local unreffed = remove_all_refs(doc.blocks)
+ local output = add_section_refs(unreffed, lvl, refs_title, all_refs)
+ return pandoc.Pandoc(output, doc.meta)
+ end
+end
diff --git a/paper/lua-filters/short-captions/Makefile b/paper/lua-filters/short-captions/Makefile
new file mode 100644
index 0000000..756cf2b
--- /dev/null
+++ b/paper/lua-filters/short-captions/Makefile
@@ -0,0 +1,18 @@
+LF = --lua-filter=short-captions.lua
+F = -F pandoc-crossref
+
+test:
+ @pandoc $(LF) README.md -w latex | diff --strip-trailing-cr expected-1.tex -
+
+crossref:
+ @echo testing short-captions.lua after pandoc-crossref
+ @pandoc $(F) $(LF) README.md -w latex | diff --strip-trailing-cr expected-2.tex -
+ @echo testing short-captions.lua before pandoc-crossref
+ @pandoc $(LF) $(F) README.md -w latex | diff --strip-trailing-cr expected-2.tex -
+
+readme:
+ @pandoc -s $(F) $(LF) README.md -o README.pdf
+
+latex:
+ @pandoc -s $(F) $(LF) README.md -o README.tex
+
diff --git a/paper/lua-filters/short-captions/README.md b/paper/lua-filters/short-captions/README.md
new file mode 100644
index 0000000..2721004
--- /dev/null
+++ b/paper/lua-filters/short-captions/README.md
@@ -0,0 +1,52 @@
+---
+title: "short-captions.lua"
+lof: true
+---
+
+# Short captions in \LaTeX\ output
+
+For latex output, this filter uses the attribute `short-caption` for
+figures so that the attribute value appears in the List of Figures, if
+one is desired.
+
+# Usage
+
+Where you would have a figure in, say, markdown as
+
+ ![The caption](foo.png )
+
+You can now specify the figure as
+
+ ![The long caption](foo.png){short-caption="a short caption"}
+
+If the document metadata includes `lof:true`, then the List of Figures
+will use the short caption. This is particularly useful for students
+writing dissertations, who often have to include a List of Figures in
+the front matter, but where figure captions themselves can be quite
+lengthy.
+
+ pandoc --lua-filter=short-captions.lua article.md -o article.tex
+
+ pandoc --lua-filter=short-captions.lua article.md -o article.pdf
+
+
+
+# Example
+
+@Fig:shortcap is an interesting figure with a long caption, but a short
+caption in the List of Figures.
+
+![This is an *extremely* interesting figure that has a lot of detail I
+will need to describe in a few sentences. This figure has a short
+caption that will appear in the list of figures. Other attributes are
+preserved](fig.pdf){#fig:shortcap short-caption="A short caption with
+math $x^n + y^n = z^n$" width="50%"}
+
+
+# Limitations
+
+- The filter will process the `short-caption` attribute value as pandoc
+ markdown, regardless of the input format.
+- It does not work for tables and listings yet.
+- But it works with pandoc-crossref, regardless of the order of
+ application.
diff --git a/paper/lua-filters/short-captions/expected-1.tex b/paper/lua-filters/short-captions/expected-1.tex
new file mode 100644
index 0000000..a28bb7e
--- /dev/null
+++ b/paper/lua-filters/short-captions/expected-1.tex
@@ -0,0 +1,67 @@
+\hypertarget{short-captions-in-output}{%
+\section{\texorpdfstring{Short captions in
+\LaTeX~output}{Short captions in ~output}}\label{short-captions-in-output}}
+
+For latex output, this filter uses the attribute \texttt{short-caption}
+for figures so that the attribute value appears in the List of Figures,
+if one is desired.
+
+\hypertarget{usage}{%
+\section{Usage}\label{usage}}
+
+Where you would have a figure in, say, markdown as
+
+\begin{verbatim}
+![The caption](foo.png )
+\end{verbatim}
+
+You can now specify the figure as
+
+\begin{verbatim}
+![The long caption](foo.png){short-caption="a short caption"}
+\end{verbatim}
+
+If the document metadata includes \texttt{lof:true}, then the List of
+Figures will use the short caption. This is particularly useful for
+students writing dissertations, who often have to include a List of
+Figures in the front matter, but where figure captions themselves can be
+quite lengthy.
+
+\begin{verbatim}
+pandoc --lua-filter=short-captions.lua article.md -o article.tex
+
+pandoc --lua-filter=short-captions.lua article.md -o article.pdf
+\end{verbatim}
+
+\hypertarget{example}{%
+\section{Example}\label{example}}
+
+@Fig:shortcap is an interesting figure with a long caption, but a short
+caption in the List of Figures.
+
+\hypertarget{fig:shortcap}{%
+\begin{figure}
+\centering
+\includegraphics[width=0.5\textwidth,height=\textheight]{fig.pdf}
+\caption[{A short caption with math \(x^n + y^n = z^n\)}]{This is an
+\emph{extremely} interesting figure that has a lot of detail I will need
+to describe in a few sentences. This figure has a short caption that
+will appear in the list of figures. Other attributes are preserved}
+\label{fig:shortcap}
+\end{figure}
+}
+
+\hypertarget{limitations}{%
+\section{Limitations}\label{limitations}}
+
+\begin{itemize}
+\tightlist
+\item
+ The filter will process the \texttt{short-caption} attribute value as
+ pandoc markdown, regardless of the input format.
+\item
+ It does not work for tables and listings yet.
+\item
+ But it works with pandoc-crossref, regardless of the order of
+ application.
+\end{itemize}
diff --git a/paper/lua-filters/short-captions/expected-2.tex b/paper/lua-filters/short-captions/expected-2.tex
new file mode 100644
index 0000000..95444b3
--- /dev/null
+++ b/paper/lua-filters/short-captions/expected-2.tex
@@ -0,0 +1,67 @@
+\hypertarget{short-captions-in-output}{%
+\section{\texorpdfstring{Short captions in
+\LaTeX~output}{Short captions in ~output}}\label{short-captions-in-output}}
+
+For latex output, this filter uses the attribute \texttt{short-caption}
+for figures so that the attribute value appears in the List of Figures,
+if one is desired.
+
+\hypertarget{usage}{%
+\section{Usage}\label{usage}}
+
+Where you would have a figure in, say, markdown as
+
+\begin{verbatim}
+![The caption](foo.png )
+\end{verbatim}
+
+You can now specify the figure as
+
+\begin{verbatim}
+![The long caption](foo.png){short-caption="a short caption"}
+\end{verbatim}
+
+If the document metadata includes \texttt{lof:true}, then the List of
+Figures will use the short caption. This is particularly useful for
+students writing dissertations, who often have to include a List of
+Figures in the front matter, but where figure captions themselves can be
+quite lengthy.
+
+\begin{verbatim}
+pandoc --lua-filter=short-captions.lua article.md -o article.tex
+
+pandoc --lua-filter=short-captions.lua article.md -o article.pdf
+\end{verbatim}
+
+\hypertarget{example}{%
+\section{Example}\label{example}}
+
+Fig.~\ref{fig:shortcap} is an interesting figure with a long caption,
+but a short caption in the List of Figures.
+
+\hypertarget{fig:shortcap}{%
+\begin{figure}
+\centering
+\includegraphics[width=0.5\textwidth,height=\textheight]{fig.pdf}
+\caption[{A short caption with math \(x^n + y^n = z^n\)}]{This is an
+\emph{extremely} interesting figure that has a lot of detail I will need
+to describe in a few sentences. This figure has a short caption that
+will appear in the list of figures. Other attributes are preserved}
+\label{fig:shortcap}
+\end{figure}
+}
+
+\hypertarget{limitations}{%
+\section{Limitations}\label{limitations}}
+
+\begin{itemize}
+\tightlist
+\item
+ The filter will process the \texttt{short-caption} attribute value as
+ pandoc markdown, regardless of the input format.
+\item
+ It does not work for tables and listings yet.
+\item
+ But it works with pandoc-crossref, regardless of the order of
+ application.
+\end{itemize}
diff --git a/paper/lua-filters/short-captions/fig.pdf b/paper/lua-filters/short-captions/fig.pdf
new file mode 100644
index 0000000..cac7f39
--- /dev/null
+++ b/paper/lua-filters/short-captions/fig.pdf
Binary files differ
diff --git a/paper/lua-filters/short-captions/short-captions.lua b/paper/lua-filters/short-captions/short-captions.lua
new file mode 100644
index 0000000..9aaf309
--- /dev/null
+++ b/paper/lua-filters/short-captions/short-captions.lua
@@ -0,0 +1,37 @@
+if FORMAT ~= "latex" then
+ return
+end
+
+local function latex(str)
+ return pandoc.RawInline('latex', str)
+end
+
+function figure_image (elem)
+ local image = elem.content and elem.content[1]
+ return (image.t == 'Image' and image.title == 'fig:')
+ and image
+ or nil
+end
+
+function Para (para)
+ local img = figure_image(para)
+ if not img or not img.caption or not img.attributes['short-caption'] then
+ return nil
+ end
+
+ local short_caption = pandoc.Span(
+ pandoc.read(img.attributes['short-caption']).blocks[1].c
+ )
+ local hypertarget = "{%%\n"
+ local label = "\n"
+ if img.identifier ~= img.title then
+ hypertarget = string.format("\\hypertarget{%s}{%%\n",img.identifier)
+ label = string.format("\n\\label{%s}",img.identifier)
+ end
+ return pandoc.Para {
+ latex(hypertarget .. "\\begin{figure}\n\\centering\n"),
+ img,
+ latex("\n\\caption["), short_caption, latex("]"), pandoc.Span(img.caption),
+ latex(label .."\n\\end{figure}\n}\n")
+ }
+end
diff --git a/paper/lua-filters/spellcheck/Makefile b/paper/lua-filters/spellcheck/Makefile
new file mode 100644
index 0000000..9d51bff
--- /dev/null
+++ b/paper/lua-filters/spellcheck/Makefile
@@ -0,0 +1,2 @@
+test:
+ @pandoc --lua-filter=spellcheck.lua sample.md | sort | diff --strip-trailing-cr -u expected.txt -
diff --git a/paper/lua-filters/spellcheck/README.md b/paper/lua-filters/spellcheck/README.md
new file mode 100644
index 0000000..5f5d6a3
--- /dev/null
+++ b/paper/lua-filters/spellcheck/README.md
@@ -0,0 +1,42 @@
+# spellcheck
+
+This filter checks the spelling of words in the body of the
+document (omitting metadata). The external program `aspell` is
+used for the checking, and must be present in the path.
+
+Why use this instead of just running `aspell` on the
+document's source? Because this filter is sensitive to
+the semantics of the document in ways that `aspell` is
+not:
+
+- Material in code spans, raw HTML, URLs in links,
+ and math is not spell-checked, eliminating a big
+ class of false positives.
+
+- The filter is sensitive to the `lang` specified in
+ the document's metadata; this will be treated as the
+ default language for the document.
+
+- It is also sensitive to `lang` attributes on native
+ divs and spans. Thus, for example, in an English
+ document, `[chevaux]{lang=fr}` will not be registered
+ as a spelling error.
+
+To run it,
+
+ pandoc --lua-filter spellcheck.lua sample.md
+
+A list of misspelled words (or at any rate, words not
+in the appropriate dictionary) will be printed to stdout.
+If the word is in a div or span with a non-default `lang`
+attribute, the relevant language will be indicated in
+brackets after the word, separated by a tab.
+
+To add words to the list for a language, you can add files
+with names `.aspell.LANG.pws` in your home directory. Example:
+
+```
+% cat ~/.aspell.en.pws
+personal_ws-1.1 en 0
+goopy
+```
diff --git a/paper/lua-filters/spellcheck/expected.txt b/paper/lua-filters/spellcheck/expected.txt
new file mode 100644
index 0000000..dd973c8
--- /dev/null
+++ b/paper/lua-filters/spellcheck/expected.txt
@@ -0,0 +1,2 @@
+missspeling [en]
+summer
diff --git a/paper/lua-filters/spellcheck/sample.md b/paper/lua-filters/spellcheck/sample.md
new file mode 100644
index 0000000..31f7834
--- /dev/null
+++ b/paper/lua-filters/spellcheck/sample.md
@@ -0,0 +1,15 @@
+---
+lang: fr-FR
+...
+
+Ces sont des mots français.
+Mais pas summer.
+
+[This is a sentence in English,
+with one missspeling.]{lang=en}
+
+::: {lang=en}
+Here's a div in English.
+Code is ignored: `baoeuthasoe`{.nolang}.
+So are [URLs](http://example.com/notaword).
+:::
diff --git a/paper/lua-filters/spellcheck/spellcheck.lua b/paper/lua-filters/spellcheck/spellcheck.lua
new file mode 100644
index 0000000..85ae281
--- /dev/null
+++ b/paper/lua-filters/spellcheck/spellcheck.lua
@@ -0,0 +1,70 @@
+-- lua filter for spell checking: requires 'aspell'.
+-- Copyright (C) 2017-2019 John MacFarlane, released under MIT license
+
+local text = require('text')
+local words = {}
+local deflang
+
+local function add_to_dict(lang, t)
+ if not words[lang] then
+ words[lang] = {}
+ end
+ if not words[lang][t] then
+ words[lang][t] = (words[lang][t] or 0) + 1
+ end
+end
+
+local function get_deflang(meta)
+ deflang = (meta.lang and meta.lang[1] and meta.lang[1].c) or 'en'
+ -- the following is better but won't work in pandoc 2.0.6.
+ -- it requires pandoc commit ecc46e229fde934f163d1f646383d24bfe2039e1:
+ -- deflang = (meta.lang and pandoc.utils.stringify(meta.lang)) or 'en'
+ return {} -- eliminate meta so it doesn't get spellchecked
+end
+
+local function run_spellcheck(lang)
+ local keys = {}
+ local wordlist = words[lang]
+ for k,_ in pairs(wordlist) do
+ keys[#keys + 1] = k
+ end
+ local inp = table.concat(keys, '\n')
+ local outp = pandoc.pipe('aspell', {'list','-l',lang}, inp)
+ for w in string.gmatch(outp, "(%a*)\n") do
+ io.write(w)
+ if lang ~= deflang then
+ io.write("\t[" .. lang .. "]")
+ end
+ io.write("\n")
+ end
+end
+
+local function results(el)
+ pandoc.walk_block(pandoc.Div(el.blocks), {Str = function(e) add_to_dict(deflang, e.text) end})
+ for lang,v in pairs(words) do
+ run_spellcheck(lang)
+ end
+ os.exit(0)
+end
+
+local function checkstr(el)
+ add_to_dict(deflang, el.text)
+end
+
+local function checkspan(el)
+ local lang = el.attributes.lang
+ if not lang then return nil end
+ pandoc.walk_inline(el, {Str = function(e) add_to_dict(lang, e.text) end})
+ return {} -- remove span, so it doesn't get checked again
+end
+
+local function checkdiv(el)
+ local lang = el.attributes.lang
+ if not lang then return nil end
+ pandoc.walk_block(el, {Str = function(e) add_to_dict(lang, e.text) end})
+ return {} -- remove div, so it doesn't get checked again
+end
+
+return {{Meta = get_deflang},
+ {Div = checkdiv, Span = checkspan},
+ {Str = function(e) add_to_dict(deflang, e.text) end, Pandoc = results}}
diff --git a/paper/lua-filters/table-short-captions/Makefile b/paper/lua-filters/table-short-captions/Makefile
new file mode 100644
index 0000000..3df47b1
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/Makefile
@@ -0,0 +1,24 @@
+LF = --lua-filter=table-short-captions.lua
+F = -F pandoc-crossref
+
+test: sample.md
+ @pandoc -s $(LF) -t native $< | \
+ diff -u expected-sample.native -
+
+test-with-crossref: sample.md
+ @pandoc -s $(LF) $(F) -t latex $< | \
+ diff -u expected-sample.tex -
+
+README.pdf: README.md
+ @pandoc $(LF) $(F) $< -o $@
+
+sample.tex: sample.md
+ @pandoc -s $(LF) $(F) -t latex $< -o $@
+
+sample.pdf: sample.md
+ @pandoc -s $(LF) $(F) -t latex $< -o $@
+
+clean:
+ rm -v *.aux *.dvi *.fdb_latexmk *.fls *.log *.lot *.ps *.pdf sample.tex | true
+
+.PHONY: test test-with-crossref clean
diff --git a/paper/lua-filters/table-short-captions/README.md b/paper/lua-filters/table-short-captions/README.md
new file mode 100644
index 0000000..853b809
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/README.md
@@ -0,0 +1,66 @@
+---
+title: "table-short-captions.lua"
+---
+
+# Short captions in \LaTeX\ tables output
+
+For LaTeX output, this filter enables use of the attribute
+`short-caption` for tables. The attribute value will appear in the List
+of Tables.
+
+This filter also enables the class `.unlisted` for tables. This will
+prevent the table caption from appearing in the List of Tables.
+
+# Usage
+
+In Pandoc Markdown, you can add a caption to a table with
+
+ Table: This is the *italicised long caption* of my table, which has
+ a very long caption.
+
+If the document metadata includes `lot:true`, then the List of Tables
+will be inserted at the beginning of the document.
+
+The [pandoc-crossref](http://lierdakil.github.io/pandoc-crossref/)
+filter extends this, and enables you to specify a custom label for the
+table.
+
+ Table: This is the *italicised long caption* of my table, which has
+ a very long caption. {#tbl:full-of-juicy-data}
+
+This filter, when run _before_ pandoc-crossref, allows you to add short
+captions to the table as a `short-caption` attribute. What is between
+the quotes will be parsed as Markdown.
+
+**Important!:** You _must_ use empty square brackets before the
+attributes tag.
+
+ Table: This is the *italicised long caption* of my table, which has
+ a very long caption.
+ []{#tbl:full-of-juicy-data short-caption="Short caption for *juicy* data table."}
+
+Alternatively, if you wish to create a table which is unlisted in the
+List of Tables, you can use the `.unlisted` class in the attributes tag.
+
+ Table: This is the *italicised long caption* of my table, which will
+ not appear in the List of Tables. []{#tbl:full-of-juicy-data .unlisted}
+
+This filter should prove useful for students writing dissertations, who
+often have to include a List of Tables in the front matter, but where
+table captions themselves can be quite lengthy.
+
+ pandoc --lua-filter=table-short-captions.lua \
+ --filter pandoc-crossref \
+ article.md -o article.tex
+
+ pandoc --lua-filter=table-short-captions.lua \
+ --filter pandoc-crossref \
+ article.md -o article.pdf
+
+
+# Limitations
+
+- The filter will process the `short-caption` attribute value as pandoc
+ markdown, regardless of the input format.
+- pandoc-crossref should be run after it.
+- I have only tested this from a Markdown source.
diff --git a/paper/lua-filters/table-short-captions/expected-sample.native b/paper/lua-filters/table-short-captions/expected-sample.native
new file mode 100644
index 0000000..c387ebe
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/expected-sample.native
@@ -0,0 +1,68 @@
+Pandoc (Meta {unMeta = fromList [("lot",MetaBool True),("title",MetaInlines [Str "Tests",Space,Str "for",Space,Str "table-short-captions.lua"])]})
+[Para [Str "These",Space,Str "tests",Space,Str "are",Space,Str "written",Space,Str "so",Space,Str "that",Space,Str "if",Space,Strong [Str "bold",Space,Str "font"],Space,Str "appears",Space,Str "in",Space,Str "the",Space,Str "LOT,",Space,Str "something",Space,Str "is",Space,Str "wrong."]
+,Para [Str "The",Space,Str "tests",Space,Str "are",Space,Str "split",Space,Str "into",Space,Str "two:",Space,Str "expected",Space,Str "uses,",Space,Str "and",Space,Str "non-standard",Space,Str "uses/errors.",LineBreak,Str "The",Space,Str "non-standard",Space,Str "uses",Space,Str "are",Space,Str "presented",Space,Str "in",Space,Str "this",Space,Str "document",Space,Str "for",Space,Str "troubleshooting",Space,Str "purposes,",Space,Str "and",Space,Str "to",Space,Str "ensure",Space,Str "the",Space,Str "filter",Space,Str "doesn\8217t",Space,Str "crash",Space,Str "in",Space,Str "corner",Space,Str "cases."]
+,Header 1 ("standard-usage",[],[]) [Str "Standard",Space,Str "usage"]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl1,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label."] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl2,",Space,Str "in",Space,Str "standard",Space,Code ("",[],[]) "pandoc-crossref",Space,Str "form.",Space,Str "{#tbl:tbl-label2}"] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl3,",Space,Str "which",Space,Str "is",Space,Strong [Str "unlisted"],Str ".",Space,Span ("tbl:tbl-label3",["unlisted"],[]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl4,",Space,Str "which",Space,Str "has",Space,Str "an",Space,Strong [Str "overriding"],Space,Str "short-caption.",Space,Str "This",Space,Str "is",Space,Str "the",Space,Str "expected",Space,Str "usage.",Space,Span ("tbl:tbl-label4",[],[("short-caption","Table 4 *short* capt.")]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Header 1 ("non-standard-usageerrors",[],[]) [Str "Non-standard",Space,Str "usage/errors"]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl5,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label,",Space,Str "but",Space,Str "does",Space,Str "have",Space,Str "empty",Space,Str "braces",Space,Str "at",Space,Str "the",Space,Str "end.",Space,Str "{}"] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl6,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label,",Space,Str "but",Space,Str "does",Space,Str "have",Space,Str "an",Space,Str "empty",Space,Str "span",Space,Str "at",Space,Str "the",Space,Str "end.",Space,Span ("",[],[]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl7,",Space,Str "which",Space,Str "is",Space,Str "improperly",Space,Str "formatted,",Space,Str "and",Space,Str "will",Space,Str "appear",Space,Str "in",Space,Str "the",Space,Str "list",Space,Str "of",Space,Str "tables.",Space,Str "This",Space,Str "filter",Space,Str "requires",Space,Str "that",Space,Code ("",[],[]) ".unlisted",Space,Str "is",Space,Str "placed",Space,Str "in",Space,Str "a",Space,Str "span.",Space,Str "{#tbl:tbl-label7",Space,Str ".unlisted}"] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl8,",Space,Str "which",Space,Str "has",Space,Str "an",Space,Str "empty",Space,Str "short-caption.",Space,Str "An",Space,Str "empty",Space,Str "short-caption",Space,Str "does",Space,Str "nothing.",Space,Str "The",Space,Str "long",Space,Str "caption",Space,Str "will",Space,Str "still",Space,Str "be",Space,Str "used.",Space,Span ("tbl:tbl-label8",[],[("short-caption","")]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl9,",Space,Str "which",Space,Str "is",Space,Strong [Str "unlisted"],Str ",",Space,Str "yet",Space,Str "has",Space,Str "a",Space,Str "short-caption.",Space,Span ("tbl:tbl-label9",["unlisted"],[("short-caption","Table 9 **unlisted** *short* capt.")]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]]
diff --git a/paper/lua-filters/table-short-captions/expected-sample.tex b/paper/lua-filters/table-short-captions/expected-sample.tex
new file mode 100644
index 0000000..65a2073
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/expected-sample.tex
@@ -0,0 +1,291 @@
+% Options for packages loaded elsewhere
+\PassOptionsToPackage{unicode=true}{hyperref}
+\PassOptionsToPackage{hyphens}{url}
+%
+\documentclass[
+]{article}
+\usepackage{lmodern}
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[T1]{fontenc}
+ \usepackage[utf8]{inputenc}
+ \usepackage{textcomp} % provides euro and other symbols
+\else % if luatex or xelatex
+ \usepackage{unicode-math}
+ \defaultfontfeatures{Scale=MatchLowercase}
+ \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
+\fi
+% Use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+\IfFileExists{microtype.sty}{% use microtype if available
+ \usepackage[]{microtype}
+ \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+\makeatletter
+\@ifundefined{KOMAClassName}{% if non-KOMA class
+ \IfFileExists{parskip.sty}{%
+ \usepackage{parskip}
+ }{% else
+ \setlength{\parindent}{0pt}
+ \setlength{\parskip}{6pt plus 2pt minus 1pt}}
+}{% if KOMA class
+ \KOMAoptions{parskip=half}}
+\makeatother
+\usepackage{xcolor}
+\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
+\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
+\hypersetup{
+ pdftitle={Tests for table-short-captions.lua},
+ hidelinks,
+}
+\urlstyle{same} % disable monospaced font for URLs
+\usepackage{longtable,booktabs}
+% Allow footnotes in longtable head/foot
+\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
+\makesavenoteenv{longtable}
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+\providecommand{\tightlist}{%
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+ \let\oldparagraph\paragraph
+ \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+ \let\oldsubparagraph\subparagraph
+ \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+
+% Set default figure placement to htbp
+\makeatletter
+\def\fps@figure{htbp}
+\makeatother
+
+% -- begin:latex-table-short-captions --
+\makeatletter\AtBeginDocument{%
+\def\LT@c@ption#1[#2]#3{% % Overwrite the workhorse macro used in formatting a longtable caption.
+ \LT@makecaption#1\fnum@table{#3}%
+ \ifdefined\pandoctableshortcapt % If pandoctableshortcapt is defined (even if blank), we should override default behaviour.
+ \let\@tempa\pandoctableshortcapt% % (Use let, we don't want to expand pandoctableshortcapt!)
+ \else % If not, fall back to default behaviour
+ \def\@tempa{#2}% % (Use the argument in square brackets)
+ \fi
+ \ifx\@tempa\@empty\else % If @tempa is blank, no lot entry! Otherwise, @tempa becomes the lot title.
+ {\let\\\space
+ \addcontentsline{lot}{table}{\protect\numberline{\thetable}{\@tempa}}}%
+ \fi}
+}\makeatother
+% -- end:latex-table-short-captions --
+\makeatletter
+\@ifpackageloaded{subfig}{}{\usepackage{subfig}}
+\@ifpackageloaded{caption}{}{\usepackage{caption}}
+\captionsetup[subfloat]{margin=0.5em}
+\AtBeginDocument{%
+\renewcommand*\figurename{Figure}
+\renewcommand*\tablename{Table}
+}
+\AtBeginDocument{%
+\renewcommand*\listfigurename{List of Figures}
+\renewcommand*\listtablename{List of Tables}
+}
+\@ifpackageloaded{float}{}{\usepackage{float}}
+\floatstyle{ruled}
+\@ifundefined{c@chapter}{\newfloat{codelisting}{h}{lop}}{\newfloat{codelisting}{h}{lop}[chapter]}
+\floatname{codelisting}{Listing}
+\newcommand*\listoflistings{\listof{codelisting}{List of Listings}}
+\makeatother
+
+\title{Tests for table-short-captions.lua}
+\date{}
+
+\begin{document}
+\maketitle
+
+\listoftables
+These tests are written so that if \textbf{bold font} appears in the
+LOT, something is wrong.
+
+The tests are split into two: expected uses, and non-standard
+uses/errors.\\
+The non-standard uses are presented in this document for troubleshooting
+purposes, and to ensure the filter doesn't crash in corner cases.
+
+\hypertarget{standard-usage}{%
+\section{Standard usage}\label{standard-usage}}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl1, which does
+not have a label.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\hypertarget{tbl:tbl-label2}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label2}This is the \emph{italicised long
+caption} of tbl2, in standard \texttt{pandoc-crossref}
+form.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\def\pandoctableshortcapt{} % .unlisted
+
+\hypertarget{tbl:tbl-label3}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label3}This is the \emph{italicised long
+caption} of tbl3, which is \textbf{unlisted}.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\undef\pandoctableshortcapt
+
+\def\pandoctableshortcapt{Table 4 \emph{short} capt.}
+
+\hypertarget{tbl:tbl-label4}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label4}This is the \emph{italicised long
+caption} of tbl4, which has an \textbf{overriding} short-caption. This
+is the expected usage.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\undef\pandoctableshortcapt
+
+\hypertarget{non-standard-usageerrors}{%
+\section{Non-standard usage/errors}\label{non-standard-usageerrors}}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl5, which does
+not have a label, but does have empty braces at the end.
+\{\}}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl6, which does
+not have a label, but does have an empty span at the end.
+}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl7, which is
+improperly formatted, and will appear in the list of tables. This filter
+requires that \texttt{.unlisted} is placed in a span. \{\#tbl:tbl-label7
+.unlisted\}}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\hypertarget{tbl:tbl-label8}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label8}This is the \emph{italicised long
+caption} of tbl8, which has an empty short-caption. An empty
+short-caption does nothing. The long caption will still be
+used.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\def\pandoctableshortcapt{} % .unlisted
+
+\hypertarget{tbl:tbl-label9}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label9}This is the \emph{italicised long
+caption} of tbl9, which is \textbf{unlisted}, yet has a
+short-caption.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\undef\pandoctableshortcapt
+
+\end{document}
diff --git a/paper/lua-filters/table-short-captions/sample.md b/paper/lua-filters/table-short-captions/sample.md
new file mode 100644
index 0000000..74c27fb
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/sample.md
@@ -0,0 +1,84 @@
+---
+title: "Tests for table-short-captions.lua"
+lot: true
+---
+
+These tests are written so that if **bold font** appears in the LOT, something is wrong.
+
+The tests are split into two: expected uses, and non-standard uses/errors.
+The non-standard uses are presented in this document for troubleshooting purposes, and to ensure the filter doesn't crash in corner cases.
+
+# Standard usage
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl1, which does not have a label.
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl2, in standard `pandoc-crossref` form. {#tbl:tbl-label2}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl3, which is **unlisted**. []{#tbl:tbl-label3 .unlisted}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl4, which has an **overriding** short-caption. This is the expected usage. []{#tbl:tbl-label4 short-caption="Table 4 *short* capt."}
+
+
+# Non-standard usage/errors
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl5, which does not have a label, but does have empty braces at the end. {}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl6, which does not have a label, but does have an empty span at the end. []{}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl7, which is improperly formatted, and will appear in the list of tables. This filter requires that `.unlisted` is placed in a span. {#tbl:tbl-label7 .unlisted}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl8, which has an empty short-caption. An empty short-caption does nothing. The long caption will still be used. []{#tbl:tbl-label8 short-caption=""}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl9, which is **unlisted**, yet has a short-caption. []{#tbl:tbl-label9 .unlisted short-caption="Table 9 **unlisted** *short* capt."}
diff --git a/paper/lua-filters/table-short-captions/table-short-captions.lua b/paper/lua-filters/table-short-captions/table-short-captions.lua
new file mode 100644
index 0000000..6f4970b
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/table-short-captions.lua
@@ -0,0 +1,160 @@
+---LaTeXTableShortCapts – enable `.unlisted` and `short-caption=""` properties
+-- for Pandoc conversion to LaTeX
+
+--[[
+Copyright (c) 2019 Blake Riley
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+]]
+local List = require 'pandoc.List'
+
+-- don't do anything unless we target latex
+if FORMAT ~= "latex" then
+ return {}
+end
+
+--- Code for injection into the LaTeX header,
+-- to overwrite a macro in longtable captions.
+longtable_caption_mod = [[
+% -- begin:latex-table-short-captions --
+\makeatletter\AtBeginDocument{%
+\def\LT@c@ption#1[#2]#3{% % Overwrite the workhorse macro used in formatting a longtable caption.
+ \LT@makecaption#1\fnum@table{#3}%
+ \@ifundefined{pandoctableshortcapt}
+ {\def\@tempa{#2}} % Use default behaviour: argument in square brackets
+ {\let\@tempa\pandoctableshortcapt} % If defined (even if blank), use to override
+ \ifx\@tempa\@empty\else % If @tempa is blank, no lot entry! Otherwise, @tempa becomes the lot title.
+ {\let\\\space
+ \addcontentsline{lot}{table}{\protect\numberline{\thetable}{\@tempa}}}%
+ \fi}
+}\makeatother
+% -- end:latex-table-short-captions --
+]]
+
+--- Creates a def shortcaption block to be placed before the table
+-- @tparam ?string sc : The short-caption property value
+-- @treturn Plain : The def shortcaption block
+local function defshortcapt(sc)
+ local scblock = List:new{}
+ scblock:extend {pandoc.RawInline('tex', "\\def\\pandoctableshortcapt{")}
+ if sc then
+ scblock:extend (pandoc.read(sc).blocks[1].c)
+ end
+ scblock:extend {pandoc.RawInline('tex', "}")}
+ if not sc then
+ scblock:extend {pandoc.RawInline('tex', " % .unlisted")}
+ end
+ return pandoc.Plain(scblock)
+end
+
+--- The undef shortcaption block to be placed after the table
+local undefshortcapt = pandoc.RawBlock('tex', "\\let\\pandoctableshortcapt\\relax")
+
+--- Parses a mock "Table Attr".
+-- We use the Attr of an empty Span as if it were Table Attr.
+-- This function extracts what is needed to build a short-caption.
+-- @tparam Attr attr : The Attr of the property Span in the table caption
+-- @treturn ?string : The identifier
+-- @treturn ?string : The "short-caption" property, if present.
+-- @treturn bool : Whether ".unlisted" appeared in the classes
+local function parse_table_attrs(attr)
+ -- Find label
+ local label = nil
+ if attr.identifier and (#attr.identifier > 0) then
+ label = attr.identifier
+ end
+
+ -- Look for ".unlisted" in classes
+ local unlisted = false
+ if attr.classes:includes("unlisted") then
+ unlisted = true
+ end
+
+ -- If not unlisted, then find the property short-caption.
+ local short_caption = nil
+ if not unlisted then
+ if (attr.attributes["short-caption"]) and
+ (#attr.attributes["short-caption"] > 0) then
+ short_caption = attr.attributes['short-caption']
+ end
+ end
+
+ return label, short_caption, unlisted
+end
+
+--- Wraps a table with shortcaption code
+-- @tparam Table tbl : The table with {}-wrapped properties in the caption
+-- @treturn List[Blocks] : The table with {label} in the caption,
+-- optionally wrapped in shortcaption code
+function rewrite_longtable_caption(tbl)
+ -- Escape if there is no caption present.
+ if not tbl.caption then
+ return nil
+ end
+
+ -- Try find the properties block
+ local is_properties_span = function (inl)
+ return (inl.t) and (inl.t == "Span") -- is span
+ and (inl.content) and (#inl.content == 0) -- is empty span
+ end
+ local propspan, idx = tbl.caption:find_if(is_properties_span)
+
+ -- If we couldn't find properties, escape.
+ if not propspan then
+ return nil
+ end
+
+ -- Otherwise, parse it all
+ local label, short_caption, unlisted = parse_table_attrs(propspan.attr)
+
+ -- Excise the span from the caption
+ tbl.caption[idx] = nil
+
+ -- Put label back into caption for pandoc-crossref
+ if label then
+ tbl.caption:extend {pandoc.Str("{#"..label.."}")}
+ end
+
+ -- Place new table
+ local result = List:new{}
+ if short_caption or unlisted then
+ result:extend {defshortcapt(short_caption)}
+ end
+ result:extend {tbl}
+ if short_caption or unlisted then
+ result:extend {undefshortcapt}
+ end
+ return result
+end
+
+--- Inserts longtable_caption_mod into the header_includes
+-- @tparam Meta meta : The document metadata
+-- @treturn Meta : The document metadata, with replacement LaTeX macro
+-- in header_includes
+function add_longtable_caption_mod(meta)
+ local header_includes = -- test ? a : b
+ (meta['header-includes'] and meta['header-includes'].t == 'MetaList')
+ and meta['header-includes']
+ or pandoc.MetaList{meta['header-includes']}
+ header_includes[#header_includes + 1] =
+ pandoc.MetaBlocks{pandoc.RawBlock('tex', longtable_caption_mod)}
+ meta['header-includes'] = header_includes
+ return meta
+end
+
+return {
+ {
+ Meta = add_longtable_caption_mod,
+ Table = rewrite_longtable_caption,
+ }
+}
diff --git a/paper/lua-filters/track-changes/.gitignore b/paper/lua-filters/track-changes/.gitignore
new file mode 100644
index 0000000..5fdf006
--- /dev/null
+++ b/paper/lua-filters/track-changes/.gitignore
@@ -0,0 +1,2 @@
+/sample.docx
+/sample.pdf
diff --git a/paper/lua-filters/track-changes/Makefile b/paper/lua-filters/track-changes/Makefile
new file mode 100644
index 0000000..990450e
--- /dev/null
+++ b/paper/lua-filters/track-changes/Makefile
@@ -0,0 +1,26 @@
+.PHONY: test clean
+
+## PENDING: ensure that LaTeX output can be compiled to PDF.
+test: sample.md test-track-changes.sh sample.pdf
+ @pandoc -t markdown --wrap=preserve \
+ --lua-filter=track-changes.lua sample.md | \
+ diff --strip-trailing-cr -u - expected_accept.markdown
+ @pandoc -t markdown --wrap=preserve --track-changes=reject \
+ -M trackChanges:reject --lua-filter=track-changes.lua sample.md | \
+ diff --strip-trailing-cr -u - expected_reject.markdown
+ @pandoc -s -t html --wrap=preserve --track-changes=all \
+ -M trackChanges:all --lua-filter=track-changes.lua sample.md | \
+ diff --strip-trailing-cr -u - expected_draft.html
+ @pandoc -M trackChanges:all --track-changes=all --wrap=preserve \
+ --to=latex --lua-filter=track-changes.lua \
+ --standalone sample.md | \
+ sh test-track-changes.sh
+ @rm sample.pdf
+
+sample.pdf: sample.md track-changes.lua
+ @pandoc -M trackChanges:all --track-changes=all \
+ --lua-filter=track-changes.lua \
+ --output $@ $<
+
+clean:
+ rm sample.pdf || true
diff --git a/paper/lua-filters/track-changes/README.md b/paper/lua-filters/track-changes/README.md
new file mode 100644
index 0000000..379b4ff
--- /dev/null
+++ b/paper/lua-filters/track-changes/README.md
@@ -0,0 +1,18 @@
+# Tracks changes in LaTeX and HTML or removes them in other output formats
+
+The Pandoc Docx reader and writer supports track changes of MS Word
+(command line parameter `--track-changes=accept|reject|all`).
+
+If `--track-changes=all` was used to read a docx file, track changes
+and/or comments are included in the AST as spans and are written to any
+other output formats than docx and clutters the output.
+
+This Lua filter addresses this problem by interpreting the parameter
+`--track-changes` (pandoc version >= 2.1.1) or the metadata variable
+`trackChanges: accept|reject|all` (set either in a YAML block or with
+`-M`) and accepts/rejects changes and removes comments for all output
+formats including docx. In case of `--track-changes=all` and for html
+and latex, it converts track changings and comments to appropriate
+commands (for LaTex provided by the [changes
+package](https://ctan.org/pkg/changes)) and tries to mimic the
+visualization as in MS Word.
diff --git a/paper/lua-filters/track-changes/TODO.md b/paper/lua-filters/track-changes/TODO.md
new file mode 100644
index 0000000..86290ec
--- /dev/null
+++ b/paper/lua-filters/track-changes/TODO.md
@@ -0,0 +1,18 @@
+# Ideas and ToDos
+
+- [ ] nested comments
+- [x] comments across paragraphs
+- [ ] implement `paragraph-insertion`, `paragraph-deletion`
+- [/] implement multiple classes (see https://github.com/jgm/pandoc/issues/4270#issuecomment-358996343)
+- [x] treat comments with multiple paragraphs (see [#4270](https://github.com/jgm/pandoc/issues/4270))
+- [x] track changes in chapter titles
+- [ ] combine Strs after acceptions/rejections
+- [ ] remove track changes from automatic section identifiers
+- [ ] track changes in captions (figure, table, etc.)
+- [ ] color comments with authors color
+- [x] docx sample
+- [x] `PANDOC_READER_OPTIONS.trackChanges`
+- [x] HTML support with `<ins>`, `<del>` (requested with jgm/pandoc#1560) and `<mark>` with title attribute or spans with CSS
+- [ ] HTML track changes decorations like explaind at [Comparing and contrasting ins, del, and s](http://html5doctor.com/ins-del-s) or https://github.com/jgm/pandoc/issues/2884#issuecomment-240263921
+- [ ] auto identifiers, be careful on nested/overlapping comments
+- [ ] citations in comment text \ No newline at end of file
diff --git a/paper/lua-filters/track-changes/expected_accept.markdown b/paper/lua-filters/track-changes/expected_accept.markdown
new file mode 100644
index 0000000..9207571
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_accept.markdown
@@ -0,0 +1,29 @@
+Track changes in LaTeX and HTML
+===============================
+
+A **simple** comment from me.
+
+This is a text with an *exciting* insertion.
+
+This is/was a text with a deletion.
+
+Here is the text to be moved.
+
+Here is a comment with nested changes.
+
+Here is a multi-line paragraph containing some text and a long deletion wrapping over two lines.
+
+This is a new paragraph.
+
+And so is this.
+
+One more.
+
+A *header* with a comment {#a-header-width-a-notecomment}
+=========================
+
+Some unmodified text ...
+
+\newpage
+
+... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
diff --git a/paper/lua-filters/track-changes/expected_draft.html b/paper/lua-filters/track-changes/expected_draft.html
new file mode 100644
index 0000000..d5c3cc5
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_draft.html
@@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
+<head>
+ <meta charset="utf-8" />
+ <meta name="generator" content="pandoc" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+ <title>Track changes in LaTeX and HTML</title>
+ <style>
+ code{white-space: pre-wrap;}
+ span.smallcaps{font-variant: small-caps;}
+ span.underline{text-decoration: underline;}
+ div.column{display: inline-block; vertical-align: top; width: 50%;}
+ </style>
+ <!--[if lt IE 9]>
+ <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
+ <![endif]-->
+</head>
+<body>
+<header id="title-block-header">
+<h1 class="title">Track changes in LaTeX and HTML</h1>
+</header>
+<nav id="TOC" role="doc-toc">
+
+</nav>
+<h1 id="track-changes-in-latex-and-html">Track changes in LaTeX and HTML</h1>
+<p>A <mark data-author="Mathias C. Walter" date="2016-05-21T22:14:00Z" data-id="1" title="I agree!"><strong>simple</strong></mark> comment from me.</p>
+<p>This is a text with <ins data-author="MCW" date="2014-06-25T10:40:00Z">an exciting</ins> insertion.</p>
+<p>This is/was a text with a <del data-author="SWS" date="2014-06-25T10:42:00Z">short</del> deletion.</p>
+<p><ins data-author="FKA" date="2016-04-16T08:20:00Z">Here is the text to be moved.</ins></p>
+<p><del data-author="John F. Kennedy" date="2016-04-16T08:20:00Z">Here is the text to be moved.</del></p>
+<p>Here is a <mark data-author="JFK" date="2016-07-29T16:50:00Z" data-id="2" title="Why?">com<ins data-author="SWS" date="2016-07-29T16:50:00Z">m</ins>ent with nest<del data-author="FKA" date="2016-04-16T08:20:00Z">t</del>ed changes</mark>.</p>
+<p>Here is a multi-line paragraph containing some text and a long deletion <del data-author="MCW" date="2016-04-16T08:20:00Z">short insertion</del> wrapping over two lines.</p>
+<p>This is <mark data-author="MCW" date="2016-05-09T16:13:00Z" data-id="4" title="A comment across paragraphs.">a new paragraph.</p>
+<p>And so</mark> is this.</p>
+<p>One <mark data-author="Jesse Rosenthal" date="2016-05-09T16:14:00Z" data-id="5" title="This one has multiple paragraphs. &#10; &#10; See?">more</mark>.</p>
+<h1 id="a-header-width-a-notecomment">A <em>header</em> wi<del data-author="FKA" date="2018-03-02T23:07:00Z">d</del>th <ins data-author="JFK" date="2018-03-02T23:07:00Z">a</ins> <mark data-author="FKA" date="2017-08-24T22:14:00Z" data-id="3" title="Note">comment</mark></h1>
+<p>Some unmodified text …</p>
+
+<p>… continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).</p>
+</body>
+</html>
diff --git a/paper/lua-filters/track-changes/expected_draft.tex b/paper/lua-filters/track-changes/expected_draft.tex
new file mode 100644
index 0000000..87ea1ac
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_draft.tex
@@ -0,0 +1,159 @@
+\PassOptionsToPackage{unicode=true}{hyperref} % options for packages loaded elsewhere
+\PassOptionsToPackage{hyphens}{url}
+%
+\documentclass[
+]{article}
+\usepackage{lmodern}
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[T1]{fontenc}
+ \usepackage[utf8]{inputenc}
+ \usepackage{textcomp} % provides euro and other symbols
+\else % if luatex or xelatex
+ \usepackage{unicode-math}
+ \defaultfontfeatures{Scale=MatchLowercase}
+ \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
+\fi
+% use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+\IfFileExists{microtype.sty}{% use microtype if available
+ \usepackage[]{microtype}
+ \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+\makeatletter
+\@ifundefined{KOMAClassName}{% if non-KOMA class
+ \IfFileExists{parskip.sty}{%
+ \usepackage{parskip}
+ }{% else
+ \setlength{\parindent}{0pt}
+ \setlength{\parskip}{6pt plus 2pt minus 1pt}}
+}{% if KOMA class
+ \KOMAoptions{parskip=half}}
+\makeatother
+\usepackage{xcolor}
+\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
+\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
+\hypersetup{
+ pdftitle={Track changes in LaTeX and HTML},
+ pdfborder={0 0 0},
+ breaklinks=true}
+\urlstyle{same} % don't use monospace font for urls
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+\providecommand{\tightlist}{%
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+\setcounter{secnumdepth}{-2}
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+ \let\oldparagraph\paragraph
+ \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+ \let\oldsubparagraph\subparagraph
+ \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+
+% set default figure placement to htbp
+\makeatletter
+\def\fps@figure{htbp}
+\makeatother
+
+\RequirePackage[debrief]{silence}
+\ErrorsOff
+\usepackage{fancyhdr}
+\pagestyle{fancy}
+\fancyhf{}
+\fancyhead[C]{\leftmark}
+\usepackage[markup=underlined,authormarkup=none]{changes}
+\definecolor{auth1}{HTML}{4477AA}
+\definecolor{auth2}{HTML}{117733}
+\definecolor{auth3}{HTML}{999933}
+\definecolor{auth4}{HTML}{CC6677}
+\definecolor{auth5}{HTML}{AA4499}
+\definecolor{auth6}{HTML}{332288}
+\usepackage[textsize=scriptsize]{todonotes}
+\setlength{\marginparwidth}{3cm}
+\makeatletter
+\setremarkmarkup{\todo[color=Changes@Color#1!20]{\sffamily\textbf{#1:}~#2}}
+\makeatother
+\newcommand{\note}[2][]{\added[#1,remark={#2}]{}}
+\newcommand\hlnotesingle{%
+ \bgroup
+ \expandafter\def\csname sout\space\endcsname{\bgroup \ULdepth =-.8ex \ULset}%
+ \markoverwith{\textcolor{yellow}{\rule[-.5ex]{.1pt}{2.5ex}}}%
+ \ULon}
+\newcommand\hlnote[1]{\let\helpcmd\hlnotesingle\parhelp#1\par\relax\relax}
+\long\def\parhelp#1\par#2\relax{%
+ \helpcmd{#1}\ifx\relax#2\else\par\parhelp#2\relax\fi%
+}
+
+\makeatletter
+\newcommand\ifmoving{%
+ \ifx\protect\@unexpandable@protect
+ \expandafter\@firstoftwo
+ \else
+ \expandafter\@secondoftwo
+ \fi
+}
+
+\newcommand{\gobbletwo}[2][]{\@bsphack\@esphack}
+\newcommand{\gobbleone}[1][]{\@bsphack\@esphack}
+
+\let\oldadded\added
+\let\olddeleted\deleted
+\let\oldhlnote\hlnote
+\let\oldnote\note
+\renewcommand{\added}{\ifmoving{\gobbleone}{\oldadded}}
+\renewcommand{\deleted}{\ifmoving{\gobbletwo}{\olddeleted}}
+\renewcommand{\hlnote}{\ifmoving{}{\oldhlnote}}
+\renewcommand{\note}{\ifmoving{\gobbletwo}{\oldnote}}
+\makeatother
+\definechangesauthor[name={FKA}, color=auth1]{FKA}
+\definechangesauthor[name={JFK}, color=auth2]{JFK}
+\definechangesauthor[name={Jesse Rosenthal}, color=auth3]{JR}
+\definechangesauthor[name={MCW}, color=auth4]{MCW}
+\definechangesauthor[name={SWS}, color=auth5]{SWS}
+
+\title{Track changes in LaTeX and HTML}
+\date{}
+
+\begin{document}
+\maketitle
+
+{
+\setcounter{tocdepth}{3}
+\tableofcontents
+}
+\hypertarget{track-changes-in-latex-and-html}{%
+\section{Track changes in LaTeX and HTML}\label{track-changes-in-latex-and-html}}
+
+A \note[id=MCW]{I agree!}\hlnote{\textbf{simple}} comment from me.
+
+This is a text with \added[id=MCW]{an exciting} insertion.
+
+This is/was a text with a \deleted[id=SWS]{short} deletion.
+
+\added[id=FKA]{Here is the text to be moved.}
+
+\deleted[id=JFK]{Here is the text to be moved.}
+
+Here is a \note[id=JFK]{Why?}\hlnote{com\added[id=SWS]{m}ent with nest\deleted[id=FKA]{t}ed changes}.
+
+Here is a multi-line paragraph containing some text and a long deletion \deleted[id=MCW]{short insertion} wrapping over two lines.
+
+This is \note[id=MCW]{A comment across paragraphs.}\hlnote{a new paragraph.
+
+And so} is this.
+
+One \note[id=JR]{This one has multiple paragraphs. \newline \newline See?}\hlnote{more}.
+
+\hypertarget{a-header-width-a-notecomment}{%
+\section{\texorpdfstring{A \emph{header} wi\deleted[id=FKA]{d}th \added[id=JFK]{a} \note[id=FKA]{Note}\hlnote{comment}}{A header with comment}}\label{a-header-width-a-notecomment}}
+
+Some unmodified text \ldots{}
+
+\newpage
+
+\ldots{} continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
+
+\end{document}
diff --git a/paper/lua-filters/track-changes/expected_reject.markdown b/paper/lua-filters/track-changes/expected_reject.markdown
new file mode 100644
index 0000000..5059f34
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_reject.markdown
@@ -0,0 +1,29 @@
+Track changes in LaTeX and HTML
+===============================
+
+A **simple** comment from me.
+
+This is a text with insertion.
+
+This is/was a text with a *short* deletion.
+
+Here is the text to be moved.
+
+Here is a coment with nestted changes.
+
+Here is a multi-line paragraph containing some text and a long deletion short insertion wrapping over two lines.
+
+This is a new paragraph.
+
+And so is this.
+
+One more.
+
+A *header* width comment {#a-header-width-a-notecomment}
+========================
+
+Some unmodified text ...
+
+\newpage
+
+... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
diff --git a/paper/lua-filters/track-changes/sample.md b/paper/lua-filters/track-changes/sample.md
new file mode 100644
index 0000000..42493ed
--- /dev/null
+++ b/paper/lua-filters/track-changes/sample.md
@@ -0,0 +1,43 @@
+---
+title: Track changes in LaTeX and HTML
+toc: true
+header-includes: |
+ ```{=latex}
+ \RequirePackage[debrief]{silence}
+ \ErrorsOff
+ \usepackage{fancyhdr}
+ \pagestyle{fancy}
+ \fancyhf{}
+ \fancyhead[C]{\leftmark}
+ ```
+...
+
+# Track changes in LaTeX and HTML
+
+A [I agree!]{.comment-start id="1" author="Mathias C. Walter" date="2016-05-21T22:14:00Z"}**simple**[]{.comment-end id="1"} comment from me.
+
+This is a text with [an *exciting*]{.insertion author="MCW" date="2014-06-25T10:40:00Z"} insertion.
+
+This is/was a text with a [*short*]{.deletion author="SWS" date="2014-06-25T10:42:00Z"} deletion.
+
+[Here is the text to be moved.]{.insertion author="FKA" date="2016-04-16T08:20:00Z"}
+
+[Here is the text to be moved.]{.deletion author="John F. Kennedy" date="2016-04-16T08:20:00Z"}
+
+Here is a [Why?]{.comment-start id="2" author="JFK" date="2016-07-29T16:50:00Z"}com[m]{.insertion author="SWS" date="2016-07-29T16:50:00Z"}ent with nest[t]{.deletion author="FKA" date="2016-04-16T08:20:00Z"}ed changes[]{.comment-end id="2"}.
+
+Here is a multi-line paragraph containing some text and a long deletion [short insertion]{.deletion author="MCW" date="2016-04-16T08:20:00Z"} wrapping over two lines.
+
+This is [A comment across paragraphs.]{.comment-start id="4" author="MCW" date="2016-05-09T16:13:00Z"}a new paragraph.
+
+And so[]{.comment-end id="4"} is this.
+
+One [This one has multiple paragraphs. ¶ ¶ See?]{.comment-start id="5" author="Jesse Rosenthal" date="2016-05-09T16:14:00Z"}more[]{.comment-end id="5"}.
+
+# A *header* wi[d]{.deletion author="FKA" date="2018-03-02T23:07:00Z"}th [a]{.insertion author="JFK" date="2018-03-02T23:07:00Z"} [Note]{.comment-start id="3" author="FKA" date="2017-08-24T22:14:00Z"}comment[]{.comment-end id="3"}
+
+Some unmodified text ...
+
+\newpage
+
+... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
diff --git a/paper/lua-filters/track-changes/test-track-changes.sh b/paper/lua-filters/track-changes/test-track-changes.sh
new file mode 100644
index 0000000..b7074ef
--- /dev/null
+++ b/paper/lua-filters/track-changes/test-track-changes.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+latex_result="$(cat -)"
+
+assert_contains ()
+{
+ printf '%s' "$latex_result" | grep -qF "$1" -
+ if [ $? -ne 0 ]; then
+ printf 'Output does not contain `%s`.\n' "$1" >&2
+ exit 1
+ fi
+}
+
+# whether we are using the change package
+assert_contains <<EOF
+\usepackage{changes}
+EOF
+
+# Author colors
+assert_contains '\definechangesauthor[name={JFK}, color=auth2]{JFK}'
+
+# Additions, notes, and deletions
+assert_contains <<EOF
+Here is a \note[id=JFK]{Why?}\hlnote{com\added[id=SWS]{m}ent with nest\deleted[id=FKA]{t}ed changes}.
+EOF
diff --git a/paper/lua-filters/track-changes/track-changes.lua b/paper/lua-filters/track-changes/track-changes.lua
new file mode 100644
index 0000000..4c447ea
--- /dev/null
+++ b/paper/lua-filters/track-changes/track-changes.lua
@@ -0,0 +1,247 @@
+local authors = {}
+
+local function is_tex(format)
+ return format == 'latex' or format == 'tex' or format == 'context'
+end
+
+local function is_html (format)
+ return format == 'html' or format == 'html4' or format == 'html5'
+end
+
+local function is_wordprocessing (format)
+ return format == 'docx' or format == 'odt'
+end
+
+header_track_changes = [[
+
+\makeatletter
+\PassOptionsToPackage{textsize=scriptsize}{todonotes}
+\PassOptionsToPackage{markup=underlined,authormarkup=none,commentmarkup=todo}{changes}
+\usepackage{changes}
+\@ifpackagelater{changes}{2018/11/03}{%
+}{%
+ \usepackage{todonotes}
+ \setremarkmarkup{\todo[color=Changes@Color#1!20]{\sffamily\textbf{#1:}~#2}}
+}%
+\makeatother
+\definecolor{auth1}{HTML}{4477AA}
+\definecolor{auth2}{HTML}{117733}
+\definecolor{auth3}{HTML}{999933}
+\definecolor{auth4}{HTML}{CC6677}
+\definecolor{auth5}{HTML}{AA4499}
+\definecolor{auth6}{HTML}{332288}
+\setlength{\marginparwidth}{3cm}
+\newcommand{\note}[2][]{\added[#1,remark={#2}]{}}
+\newcommand\hlnotesingle{%
+ \bgroup
+ \expandafter\def\csname sout\space\endcsname{\bgroup \ULdepth =-.8ex \ULset}%
+ \markoverwith{\textcolor{yellow}{\rule[-.5ex]{.1pt}{2.5ex}}}%
+ \ULon}
+\newcommand\hlnote[1]{\let\helpcmd\hlnotesingle\parhelp#1\par\relax\relax}
+\long\def\parhelp#1\par#2\relax{%
+ \helpcmd{#1}\ifx\relax#2\else\par\parhelp#2\relax\fi%
+}
+
+\makeatletter
+\newcommand\ifmoving{%
+ \ifx\protect\@unexpandable@protect
+ \expandafter\@firstoftwo
+ \else
+ \expandafter\@secondoftwo
+ \fi
+}
+
+\newcommand{\gobbletwo}[2][]{\@bsphack\@esphack}
+\newcommand{\gobbleone}[1][]{\@bsphack\@esphack}
+
+\let\oldadded\added
+\let\olddeleted\deleted
+\let\oldhlnote\hlnote
+\let\oldnote\note
+\renewcommand{\added}{\ifmoving{\gobbleone}{\oldadded}}
+\renewcommand{\deleted}{\ifmoving{\gobbletwo}{\olddeleted}}
+\renewcommand{\hlnote}{\ifmoving{}{\oldhlnote}}
+\renewcommand{\note}{\ifmoving{\gobbletwo}{\oldnote}}
+\makeatother
+]]
+
+local function initials(s)
+ local ignore = { -- list of words to ignore
+ ['dr'] = true, ['mr'] = true, ['ms'] = true, ['mrs'] = true, ['prof'] = true,
+ ['mx'] = true, ['sir'] = true,
+ }
+
+ local ans = {}
+ for w in s:gmatch '[%w\']+' do
+ if not ignore[w:lower()] then ans[#ans+1] = w:sub(1,1):upper() end
+ end
+ return table.concat(ans)
+end
+
+relinerHtml = {
+ Str = function (s)
+ if s.text == "¶" then
+ return pandoc.Str('&#10;')
+ end
+ end
+}
+
+relinerTex = {
+ Str = function (s)
+ if s.text == "¶" then
+ return pandoc.Str('\\newline')
+ end
+ end
+}
+
+reliner = {
+ Str = function (s)
+ if s.text == "¶" then
+ return pandoc.LineBreak()
+ end
+ end
+}
+
+function SpanReliner(elem)
+ local classes = elem.classes or elem.attr.classes
+ if classes:includes("comment-start") then
+ return pandoc.walk_inline(elem, reliner)
+ end
+end
+
+local toTex = {["comment-start"] = "\\note", insertion = "\\added", deletion = "\\deleted"}
+
+local function TrackingSpanToTex(elem)
+ if toTex[elem.classes[1]] ~= nil then
+ local author = elem.attributes.author
+ local inits = author:find' ' and initials(author) or author
+ authors[inits] = author
+ local s = toTex[elem.classes[1]] .. '[id=' .. inits .. ']{'
+ if elem.classes:includes("comment-start") then
+ s = s .. pandoc.utils.stringify(pandoc.walk_inline(elem, relinerTex)) .. '}\\hlnote{'
+ else
+ s = s .. pandoc.utils.stringify(elem.content) .. '}'
+ end
+ return pandoc.RawInline('latex', s)
+ elseif elem.classes:includes("comment-end") then
+ return pandoc.RawInline('latex', '}')
+ end
+end
+
+local function pairsByKeys(t, f)
+ local a = {}
+ for n in pairs(t) do table.insert(a, n) end
+ table.sort(a, f)
+ local i = 0
+ local iter = function ()
+ i = i + 1
+ return a[i], t[a[i]]
+ end
+ return iter
+end
+
+--- Add packages to the header includes.
+local function add_track_changes(meta)
+ local header_includes
+ if meta['header-includes'] and meta['header-includes'].t == 'MetaList' then
+ header_includes = meta['header-includes']
+ else
+ header_includes = pandoc.MetaList{meta['header-includes']}
+ end
+ header_includes[#header_includes + 1] =
+ pandoc.MetaBlocks{pandoc.RawBlock('latex', header_track_changes)}
+ local a = 1
+ for key,value in pairsByKeys(authors) do -- sorted author list; otherwise make test may fail
+ header_includes[#header_includes + 1] =
+ pandoc.MetaBlocks{pandoc.RawBlock('latex', '\\definechangesauthor[name={' .. value .. '}, color=auth' .. a .. ']{' .. key .. '}')}
+ a = a + 1
+ end
+ meta['header-includes'] = header_includes
+ return meta
+end
+
+local toHtml = {["comment-start"] = "mark", insertion = "ins", deletion = "del"}
+
+local function TrackingSpanToHtml(elem)
+ if toHtml[elem.classes[1]] ~= nil then
+ local author = elem.attributes.author
+ local inits = author:find' ' and initials(author) or author
+ authors[inits] = author
+ local s = '<' .. toHtml[elem.classes[1]]
+ for k,v in pairs(elem.attributes) do
+ local hattr = k
+ if hattr ~= 'date' then hattr = 'data-' .. hattr end
+ s = s .. ' ' .. hattr .. '="' .. v .. '"'
+ end
+ if elem.classes:includes("comment-start") then
+ if elem.identifier then
+ s = s .. ' data-id="' .. elem.identifier .. '"'
+ end
+ s = s .. ' title="' .. pandoc.utils.stringify(pandoc.walk_inline(elem, relinerHtml)) .. '">'
+ else
+ s = s .. '>' .. pandoc.utils.stringify(elem.content) .. '</' .. toHtml[elem.classes[1]] .. '>'
+ end
+ return pandoc.RawInline('html', s)
+ elseif elem.classes:includes("comment-end") then
+ return pandoc.RawInline('html', '</mark>')
+ end
+end
+
+local function SpanAcceptChanges(elem)
+ if elem.classes:includes("comment-start") or elem.classes:includes("comment-end") then
+ return {}
+ elseif elem.classes:includes("insertion") then
+ return elem.content
+ elseif elem.classes:includes("deletion") then
+ return {}
+ end
+end
+
+local function SpanRejectChanges(elem)
+ if elem.classes:includes("comment-start") or elem.classes:includes("comment-end") then
+ return {}
+ elseif elem.classes:includes("insertion") then
+ return {}
+ elseif elem.classes:includes("deletion") then
+ return elem.content
+ end
+end
+
+function Pandoc(doc)
+ local meta = doc.meta
+ local trackChangesOptions = {all = 'AllChanges', accept = 'AcceptChanges', reject = 'RejectChanges' }
+ local tc = meta and meta['trackChanges']
+ tc = type(meta['trackChanges']) == 'table' and pandoc.utils.stringify(meta['trackChanges']) or meta['trackChanges'] or 'accept'
+ local trackChanges = PANDOC_READER_OPTIONS and PANDOC_READER_OPTIONS.trackChanges or trackChangesOptions[tc]
+ meta.trackChanges = nil -- remove it from the matadata
+
+ local M = {}
+ if trackChanges == 'AllChanges' then
+ if is_html(FORMAT) then
+ M[#M + 1] = {
+ Span = TrackingSpanToHtml
+ }
+ elseif is_tex(FORMAT) then
+ M[#M + 1] = {
+ Span = TrackingSpanToTex,
+ }
+ elseif is_wordprocessing(FORMAT) then
+ M[#M + 1] = { Span = SpanReliner }
+ end
+ elseif trackChanges == 'RejectChanges' then
+ M[#M + 1] = { Span = SpanRejectChanges }
+ else -- otherwise assumes AcceptChanges
+ M[#M + 1] = { Span = SpanAcceptChanges }
+ end
+
+ if #M then
+ local blocks = doc.blocks
+ for i = 1, #M do
+ blocks = pandoc.walk_block(pandoc.Div(blocks), M[i]).content
+ end
+ if trackChanges == 'AllChanges' and is_tex(FORMAT) then
+ meta = add_track_changes(meta)
+ end
+ return pandoc.Pandoc(blocks, meta)
+ end
+end
diff --git a/paper/lua-filters/wordcount/Makefile b/paper/lua-filters/wordcount/Makefile
new file mode 100644
index 0000000..7dfba48
--- /dev/null
+++ b/paper/lua-filters/wordcount/Makefile
@@ -0,0 +1,2 @@
+test:
+ @pandoc --lua-filter=wordcount.lua sample.md | diff --strip-trailing-cr -u expected.txt -
diff --git a/paper/lua-filters/wordcount/README.md b/paper/lua-filters/wordcount/README.md
new file mode 100644
index 0000000..45efc2f
--- /dev/null
+++ b/paper/lua-filters/wordcount/README.md
@@ -0,0 +1,11 @@
+# wordcount
+
+This filter counts the words in the body of a document (omitting
+metadata like titles and abstracts), including words in code.
+It should be more accurate than `wc -w` run directly on a
+Markdown document, since the latter will count markup
+characters, like the `#` in front of an ATX header, or
+tags in HTML documents, as words.
+
+To run it, `pandoc --lua-filter wordcount.lua myfile.md`.
+The word count will be printed to stdout.
diff --git a/paper/lua-filters/wordcount/expected.txt b/paper/lua-filters/wordcount/expected.txt
new file mode 100644
index 0000000..dc608fc
--- /dev/null
+++ b/paper/lua-filters/wordcount/expected.txt
@@ -0,0 +1 @@
+15 words in body
diff --git a/paper/lua-filters/wordcount/sample.md b/paper/lua-filters/wordcount/sample.md
new file mode 100644
index 0000000..240bee0
--- /dev/null
+++ b/paper/lua-filters/wordcount/sample.md
@@ -0,0 +1,12 @@
+---
+title: Metadata words don't count
+abstract: ignored!
+---
+
+# Word count
+
+This document has *a **lot** of [words](url "title")* (15).[^1]
+
+ code is counted
+
+[^1]: Footnotes count.
diff --git a/paper/lua-filters/wordcount/wordcount.lua b/paper/lua-filters/wordcount/wordcount.lua
new file mode 100644
index 0000000..19aec11
--- /dev/null
+++ b/paper/lua-filters/wordcount/wordcount.lua
@@ -0,0 +1,29 @@
+-- counts words in a document
+
+words = 0
+
+wordcount = {
+ Str = function(el)
+ -- we don't count a word if it's entirely punctuation:
+ if el.text:match("%P") then
+ words = words + 1
+ end
+ end,
+
+ Code = function(el)
+ _,n = el.text:gsub("%S+","")
+ words = words + n
+ end,
+
+ CodeBlock = function(el)
+ _,n = el.text:gsub("%S+","")
+ words = words + n
+ end
+}
+
+function Pandoc(el)
+ -- skip metadata, just count body:
+ pandoc.walk_block(pandoc.Div(el.blocks), wordcount)
+ print(words .. " words in body")
+ os.exit(0)
+end
diff --git a/paper/media/image1.png b/paper/media/image1.png
new file mode 100644
index 0000000..2a61d5b
--- /dev/null
+++ b/paper/media/image1.png
Binary files differ
diff --git a/paper/media/image2.png b/paper/media/image2.png
new file mode 100644
index 0000000..3902872
--- /dev/null
+++ b/paper/media/image2.png
Binary files differ
diff --git a/paper/media/image3.png b/paper/media/image3.png
new file mode 100644
index 0000000..1045b7d
--- /dev/null
+++ b/paper/media/image3.png
Binary files differ
diff --git a/paper/media/image4.png b/paper/media/image4.png
new file mode 100644
index 0000000..0306b17
--- /dev/null
+++ b/paper/media/image4.png
Binary files differ
diff --git a/paper/media/image5.png b/paper/media/image5.png
new file mode 100644
index 0000000..f16264c
--- /dev/null
+++ b/paper/media/image5.png
Binary files differ
diff --git a/paper/media/image6.png b/paper/media/image6.png
new file mode 100644
index 0000000..7c9cf31
--- /dev/null
+++ b/paper/media/image6.png
Binary files differ
diff --git a/paper/media/image7.png b/paper/media/image7.png
new file mode 100644
index 0000000..8abda7f
--- /dev/null
+++ b/paper/media/image7.png
Binary files differ
diff --git a/paper/media/image8.png b/paper/media/image8.png
new file mode 100644
index 0000000..8e34c5c
--- /dev/null
+++ b/paper/media/image8.png
Binary files differ
diff --git a/paper/mutagenicity.md b/paper/mutagenicity.md
new file mode 100644
index 0000000..c316543
--- /dev/null
+++ b/paper/mutagenicity.md
@@ -0,0 +1,532 @@
+---
+title: A comparison of random forest, support vector machine, deep learning and lazar algorithms for predicting mutagenicity
+#subtitle: Performance comparison with a new expanded dataset
+author:
+ - Christoph Helma:
+ institute: ist
+ email: helma@in-silico.ch
+ correspondence: "yes"
+ - Verena Schöning:
+ institute: zeller
+ - Philipp Boss:
+ institute: zeller
+ - Jürgen Drewe:
+ institute: zeller
+institute:
+ - ist:
+ name: in silico toxicology gmbh
+ address: "Rastatterstrasse 41, 4057 Basel, Switzerland"
+ - zeller:
+ name: Zeller AG
+ address: "Seeblickstrasse 4, 8590 Romanshorn, Switzerland"
+bibliography: bibliography.bib
+keywords: mutagenicity, (Q)SAR, lazar, random forest, support vector machine, deep learning
+documentclass: scrartcl
+...
+
+Abstract
+========
+
+k-nearest neighbor (`lazar`), random forest, support vector machine and deep
+learning algorithms were applied to a new *Salmonella* mutagenicity dataset
+with 8281 unique chemical structures. Algorithm performance was evaluated
+using 5-fold crossvalidation.
+TODO
+- results
+- conclusion
+
+Introduction
+============
+
+TODO: algo history
+
+TODO: dataset history
+
+TODO: open problems
+
+The main objective of this study was
+
+ - to generate a new training dataset, by combining the most comprehensive public mutagenicity datasets
+ - to compare the performance of global models (RF, SVM, Neural Nets) with local models (`lazar`)
+
+Materials and Methods
+=====================
+
+Data
+----
+
+For all methods, the same training dataset was used. The
+training dataset was compiled from the following sources:
+
+- Kazius/Bursi Dataset (4337 compounds, @Kazius2005): <http://cheminformatics.org/datasets/bursi/cas_4337.zip>
+
+- Hansen Dataset (6513 compounds, @Hansen2009): <http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv>
+
+- EFSA Dataset (695 compounds): <https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls>
+
+Mutagenicity classifications from Kazius and Hansen datasets were used
+without further processing. To achieve consistency with these
+datasets, EFSA compounds were classified as mutagenic, if at least one
+positive result was found for TA98 or T100 Salmonella strains.
+
+Dataset merges were based on unique SMILES (*Simplified Molecular Input
+Line Entry Specification*) strings of the compound structures.
+Duplicated experimental data with the same outcome was merged into a
+single value, because it is likely that it originated from the same
+experiment. Contradictory results were kept as multiple measurements in
+the database. The combined training dataset contains 8281 unique
+structures.
+
+Source code for all data download, extraction and merge operations is
+publicly available from the git repository
+<https://git.in-silico.ch/mutagenicity-paper> under a GPL3 License.
+
+TODO: check/fix git repo
+
+Algorithms
+----------
+
+### `lazar`
+
+`lazar` (*lazy structure activity relationships*) is a modular framework
+for read-across model development and validation. It follows the
+following basic workflow: For a given chemical structure `lazar`:
+
+- searches in a database for similar structures (neighbours) with
+ experimental data,
+
+- builds a local QSAR model with these neighbours and
+
+- uses this model to predict the unknown activity of the query
+ compound.
+
+This procedure resembles an automated version of read across predictions
+in toxicology, in machine learning terms it would be classified as a
+k-nearest-neighbour algorithm.
+
+Apart from this basic workflow, `lazar` is completely modular and allows
+the researcher to use any algorithm for similarity searches and local
+QSAR (*Quantitative structure--activity relationship*) modelling.
+Algorithms used within this study are described in the following
+sections.
+
+#### Neighbour identification
+
+Similarity calculations were based on MolPrint2D fingerprints (@Bender2004) from the OpenBabel cheminformatics library
+(@OBoyle2011a). The MolPrint2D fingerprint uses
+atom environments as molecular representation, which resembles basically
+the chemical concept of functional groups. For each atom in a molecule,
+it represents the chemical environment using the atom types of connected
+atoms.
+
+MolPrint2D fingerprints are generated dynamically from chemical
+structures and do not rely on predefined lists of fragments (such as
+OpenBabel FP3, FP4 or MACCs fingerprints or lists of
+toxicophores/toxicophobes). This has the advantage that they may capture
+substructures of toxicological relevance that are not included in other
+fingerprints.
+
+From MolPrint2D fingerprints a feature vector with all atom environments
+of a compound can be constructed that can be used to calculate chemical
+similarities.
+
+The chemical similarity between two compounds a and b is expressed as
+the proportion between atom environments common in both structures A ∩ B
+and the total number of atom environments A U B (Jaccard/Tanimoto
+index).
+
+$$sim = \frac{\left| A\ \cap B \right|}{\left| A\ \cup B \right|}$$
+
+Threshold selection is a trade-off between prediction accuracy (high
+threshold) and the number of predictable compounds (low threshold). As
+it is in many practical cases desirable to make predictions even in the
+absence of closely related neighbours, we follow a tiered approach:
+
+- First a similarity threshold of 0.5 is used to collect neighbours,
+ to create a local QSAR model and to make a prediction for the query
+ compound.
+
+- If any of these steps fails, the procedure is repeated with a
+ similarity threshold of 0.2 and the prediction is flagged with a
+ warning that it might be out of the applicability domain of the
+ training data.
+
+- Similarity thresholds of 0.5 and 0.2 are the default values chosen
+ > by the software developers and remained unchanged during the
+ > course of these experiments.
+
+Compounds with the same structure as the query structure are
+automatically eliminated from neighbours to obtain unbiased predictions
+in the presence of duplicates.
+
+#### Local QSAR models and predictions
+
+Only similar compounds (neighbours) above the threshold are used for
+local QSAR models. In this investigation, we are using a weighted
+majority vote from the neighbour's experimental data for mutagenicity
+classifications. Probabilities for both classes
+(mutagenic/non-mutagenic) are calculated according to the following
+formula and the class with the higher probability is used as prediction
+outcome.
+
+$$p_{c} = \ \frac{\sum_{}^{}\text{sim}_{n,c}}{\sum_{}^{}\text{sim}_{n}}$$
+
+$p_{c}$ Probability of class c (e.g. mutagenic or non-mutagenic)\
+$\sum_{}^{}\text{sim}_{n,c}$ Sum of similarities of neighbours with
+class c\
+$\sum_{}^{}\text{sim}_{n}$ Sum of all neighbours
+
+#### Applicability domain
+
+The applicability domain (AD) of `lazar` models is determined by the
+structural diversity of the training data. If no similar compounds are
+found in the training data no predictions will be generated. Warnings
+are issued if the similarity threshold had to be lowered from 0.5 to 0.2
+in order to enable predictions. Predictions without warnings can be
+considered as close to the applicability domain and predictions with
+warnings as more distant from the applicability domain. Quantitative
+applicability domain information can be obtained from the similarities
+of individual neighbours.
+
+#### Availability
+
+- `lazar` experiments for this manuscript:
+ <https://git.in-silico.ch/mutagenicity-paper>
+ (source code, GPL3)
+
+- `lazar` framework:
+ <https://git.in-silico.ch/lazar>
+ (source code, GPL3)
+
+- `lazar` GUI:
+ <https://git.in-silico.ch/lazar-gui>
+ (source code, GPL3)
+
+- Public web interface:
+ <https://lazar.in-silico.ch>
+
+### Random Forest, Support Vector Machines, and Deep Learning in R-project
+
+For the Random Forest (RF), Support Vector Machines (SVM), and Deep
+Learning (DL) models, molecular descriptors were calculated
+with the PaDEL-Descriptors program (<http://www.yapcwsoft.com> version 2.21, @Yap2011).
+
+TODO: sentence ??
+
+From these descriptors were
+chosen, which were actually used for the generation of the DL model.
+
+
+In comparison to `lazar`, three other models (Random Forest (RF), Support
+Vector Machines (SVM), and Deep Learning (DL)) were evaluated.
+
+For the generation of these models, molecular 1D and 2D descriptors of
+the training dataset were calculated using PaDEL-Descriptors (<http://www.yapcwsoft.com> version
+2.21, @Yap2011).
+
+As the training dataset contained over 8280 instances, it was decided to
+delete instances with missing values during data pre-processing.
+Furthermore, substances with equivocal outcome were removed. The final
+training dataset contained 8080 instances with known mutagenic
+potential. The RF, SVM, and DL models were generated using the R
+software (R-project for Statistical Computing,
+<https://www.r-project.org/>*;* version 3.3.1), specific R packages used
+are identified for each step in the description below. During feature
+selection, descriptor with near zero variance were removed using
+'*NearZeroVar*'-function (package 'caret'). If the percentage of the
+most common value was more than 90% or when the frequency ratio of the
+most common value to the second most common value was greater than 95:5
+(e.g. 95 instances of the most common value and only 5 or less instances
+of the second most common value), a descriptor was classified as having
+a near zero variance. After that, highly correlated descriptors were
+removed using the '*findCorrelation*'-function (package 'caret') with a
+cut-off of 0.9. This resulted in a training dataset with 516
+descriptors. These descriptors were scaled to be in the range between 0
+and 1 using the '*preProcess*'-function (package 'caret'). The scaling
+routine was saved in order to apply the same scaling on the testing
+dataset. As these three steps did not consider the outcome, it was
+decided that they do not need to be included in the cross-validation of
+the model. To further reduce the number of features, a LASSO (*least
+absolute shrinkage and selection operator*) regression was performed
+using the '*glmnet*'-function (package '*glmnet*'). The reduced dataset
+was used for the generation of the pre-trained models.
+
+For the RF model, the '*randomForest*'-function (package
+'*randomForest*') was used. A forest with 1000 trees with maximal
+terminal nodes of 200 was grown for the prediction.
+
+The '*svm*'-function (package 'e1071') with a *radial basis function
+kernel* was used for the SVM model.
+
+The DL model was generated using the '*h2o.deeplearning*'-function
+(package '*h2o*'). The DL contained four hidden layer with 70, 50, 50,
+and 10 neurons, respectively. Other hyperparameter were set as follows:
+l1=1.0E-7, l2=1.0E-11, epsilon = 1.0E-10, rho = 0.8, and quantile\_alpha
+= 0.5. For all other hyperparameter, the default values were used.
+Weights and biases were in a first step determined with an unsupervised
+DL model. These values were then used for the actual, supervised DL
+model.
+
+To validate these models, an internal cross-validation approach was
+chosen. The training dataset was randomly split in training data, which
+contained 95% of the data, and validation data, which contain 5% of the
+data. A feature selection with LASSO on the training data was performed,
+reducing the number of descriptors to approximately 100. This step was
+repeated five times. Based on each of the five different training data,
+the predictive models were trained and the performance tested with the
+validation data. This step was repeated 10 times. Furthermore, a
+y-randomisation using the RF model was performed. During
+y-randomisation, the outcome (y-variable) is randomly permuted. The
+theory is that after randomisation of the outcome, the model should not
+be able to correlate the outcome to the properties (descriptor values)
+of the substances. The performance of the model should therefore
+indicate a by change prediction with an accuracy of about 50%. If this
+is true, it can be concluded that correlation between actual outcome and
+properties of the substances is real and not by chance (@Rücker2007).
+
+![](media/image1.png){width="6.26875in" height="5.486111111111111in"}
+
+Figure 1: Flowchart of the generation and validation of the models
+generated in R-project
+
+#### Applicability domain
+
+The AD of the training dataset and the PA dataset was evaluated using
+the Jaccard distance. A Jaccard distance of '0' indicates that the
+substances are similar, whereas a value of '1' shows that the substances
+are different. The Jaccard distance was below 0.2 for all PAs relative
+to the training dataset. Therefore, PA dataset is within the AD of the
+training dataset and the models can be used to predict the genotoxic
+potential of the PA dataset.
+
+#### y-randomisation
+
+After y-randomisation of the outcome, the accuracy and CCR are around
+50%, indicating a chance in the distribution of the results. This shows,
+that the outcome is actually related to the predictors and not by
+chance.
+
+### Deep Learning in TensorFlow
+
+Alternatively, a DL model was established with Python-based TensorFlow
+program (<https://www.tensorflow.org/>) using the high-level API Keras
+(<https://www.tensorflow.org/guide/keras>) to build the models.
+
+Data pre-processing was done by rank transformation using the
+'*QuantileTransformer*' procedure. A sequential model has been used.
+Four layers have been used: input layer, two hidden layers (with 12, 8
+and 8 nodes, respectively) and one output layer. For the output layer, a
+sigmoidal activation function and for all other layers the ReLU
+('*Rectified Linear Unit*') activation function was used. Additionally,
+a L^2^-penalty of 0.001 was used for the input layer. For training of
+the model, the ADAM algorithm was used to minimise the cross-entropy
+loss using the default parameters of Keras. Training was performed for
+100 epochs with a batch size of 64. The model was implemented with
+Python 3.6 and Keras. For training of the model, a 6-fold
+cross-validation was used. Accuracy was estimated by ROC-AUC and
+confusion matrix.
+
+Validation
+----------
+
+Results
+=======
+
+`lazar`
+-----
+
+Random Forest
+-------------
+
+The validation showed that the RF model has an accuracy of 64%, a
+sensitivity of 66% and a specificity of 63%. The confusion matrix of the
+model, calculated for 8080 instances, is provided in Table 1.
+
+Table 1: Confusion matrix of the RF model
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2274 1163 3437
+ ***TN*** 1736 2907 4643
+ ***Total*** 4010 4070 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+Support Vector Machines
+-----------------------
+
+The validation showed that the SVM model has an accuracy of 62%, a
+sensitivity of 65% and a specificity of 60%. The confusion matrix of SVM
+model, calculated for 8080 instances, is provided in Table 2.
+
+Table 2: Confusion matrix of the SVM model
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2057 1107 3164
+ ***TN*** 1953 2963 4916
+ ***Total*** 4010 4070 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+Deep Learning (R-project)
+-------------------------
+
+The validation showed that the DL model generated in R has an accuracy
+of 59%, a sensitivity of 89% and a specificity of 30%. The confusion
+matrix of the model, normalised to 8080 instances, is provided in Table
+3.
+
+Table 3: Confusion matrix of the DL model (R-project)
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 3575 435 4010
+ ***TN*** 2853 1217 4070
+ ***Total*** 6428 1652 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+DL model (TensorFlow)
+---------------------
+
+The validation showed that the DL model generated in TensorFlow has an
+accuracy of 68%, a sensitivity of 70% and a specificity of 46%. The
+confusion matrix of the model, normalised to 8080 instances, is provided
+in Table 4.
+
+Table 4: Confusion matrix of the DL model (TensorFlow)
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2851 1227 4078
+ ***TN*** 1825 2177 4002
+ ***Total*** 4676 3404 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+The ROC curves from the 6-fold validation are shown in Figure 7.
+
+![](media/image7.png){width="3.825in"
+height="2.7327045056867894in"}
+
+Figure 7: Six-fold cross-validation of TensorFlow DL model show an
+average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68%.
+
+In summary, the validation results of the four methods are presented in
+the following table.
+
+Table 5 Results of the cross-validation of the four models and after
+y-randomisation
+
+ ----------------------------------------------------------------------
+ Accuracy CCR Sensitivity Specificity
+ ----------------------- ---------- ------- ------------- -------------
+ RF model 64.1% 64.4% 66.2% 62.6%
+
+ SVM model 62.1% 62.6% 65.0% 60.3%
+
+ DL model\ 59.3% 59.5% 89.2% 29.9%
+ (R-project)
+
+ DL model (TensorFlow) 68% 62.2% 69.9% 45.6%
+
+ y-randomisation 50.5% 50.4% 50.3% 50.6%
+ ----------------------------------------------------------------------
+
+CCR (correct classification rate)
+
+Discussion
+==========
+
+General model performance
+
+Based on the results of the cross-validation for all models, `lazar`, RF,
+SVM, DL (R-project) and DL (TensorFlow) it can be state that the
+prediction results are not optimal due to different reasons. The
+accuracy as measured during cross-validation of the four models (RF,
+SVM, DL (R-project and TensorFlow)) was partly low with CCR values
+between 59.3 and 68%, with the R-generated DL model and the
+TensorFlow-generated DL model showing the worst and the best
+performance, respectively. The validation of the R-generated DL model
+revealed a high sensitivity (89.2%) but an unacceptably low specificity
+of 29.9% indicating a high number of false positive estimates. The
+TensorFlow-generated DL model, however, showed an acceptable but not
+optimal accuracy of 68%, a sensitivity of 69.9% and a specificity of
+45.6%. The low specificity indicates that both DL models tends to
+predict too many instances as positive (genotoxic), and therefore have a
+high false positive rate. This allows at least with the TensorFlow
+generated DL model to make group statements, but the confidence for
+estimations of single PAs appears to be insufficiently low.
+
+Several factors have likely contributed to the low to moderate
+performance of the used methods as shown during the cross-validation:
+
+1. The outcome in the training dataset was based on the results of AMES
+ tests for genotoxicity [ICH 2011](#_ENREF_63)(), an *in vitro* test
+ in different strains of the bacteria *Salmonella typhimurium*. In
+ this test, mutagenicity is evaluated with and without prior
+ metabolic activation of the test substance. Metabolic activation
+ could result in the formation of genotoxic metabolites from
+ non-genotoxic parent compounds. However, no distinction was made in
+ the training dataset between substances that needed metabolic
+ activation before being mutagenic and those that were mutagenic
+ without metabolic activation. `lazar` is able to handle this
+ 'inaccuracy' in the training dataset well due to the way the
+ algorithm works: `lazar` predicts the genotoxic potential based on the
+ neighbours of substances with comparable structural features,
+ considering mutagenic and not mutagenic neighbours. Based on the
+ structural similarity, a probability for mutagenicity and no
+ mutagenicity is calculated independently from each other (meaning
+ that the sum of probabilities does not necessarily adds up to 100%).
+ The class with the higher outcome is then the overall outcome for
+ the substance.
+
+> In contrast, the other models need to be trained first to recognise
+> the structural features that are responsible for genotoxicity.
+> Therefore, the mixture of substances being mutagenic with and without
+> metabolic activation in the training dataset may have adversely
+> affected the ability to separate the dataset in two distinct classes
+> and thus explains the relatively low performance of these models.
+
+2. Machine learning algorithms try to find an optimized solution in a
+ high-dimensional (one dimension per each predictor) space. Sometimes
+ these methods do not find the global optimum of estimates but only
+ local (not optimal) solutions. Strategies to find the global
+ solutions are systematic variation (grid search) of the
+ hyperparameters of the methods, which may be very time consuming in
+ particular in large datasets.
+
+
+Conclusions
+===========
+
+In this study, an attempt was made to predict the genotoxic potential of
+PAs using five different machine learning techniques (`lazar`, RF, SVM, DL
+(R-project and TensorFlow). The results of all models fitted only partly
+to the findings in literature, with best results obtained with the
+TensorFlow DL model. Therefore, modelling allows statements on the
+relative risks of genotoxicity of the different PA groups. Individual
+predictions for selective PAs appear, however, not reliable on the
+current basis of the used training dataset.
+
+This study emphasises the importance of critical assessment of
+predictions by QSAR models. This includes not only extensive literature
+research to assess the plausibility of the predictions, but also a good
+knowledge of the metabolism of the test substances and understanding for
+possible mechanisms of toxicity.
+
+In further studies, additional machine learning techniques or a modified
+(extended) training dataset should be used for an additional attempt to
+predict the genotoxic potential of PAs.
+
+References
+==========
diff --git a/paper/outfile.docx b/paper/outfile.docx
new file mode 100644
index 0000000..dc3070c
--- /dev/null
+++ b/paper/outfile.docx
Binary files differ
diff --git a/paper/outfile.enriched.json b/paper/outfile.enriched.json
new file mode 100644
index 0000000..a547003
--- /dev/null
+++ b/paper/outfile.enriched.json
@@ -0,0 +1 @@
+{"blocks":[{"t":"Header","c":[1,["introduction",[],[]],[{"t":"Str","c":"Introduction"}]]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"algo"},{"t":"Space"},{"t":"Str","c":"history"}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"history"}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"open"},{"t":"Space"},{"t":"Str","c":"problems"}]},{"t":"Header","c":[1,["materials-and-methods",[],[]],[{"t":"Str","c":"Materials"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Methods"}]]},{"t":"Header","c":[2,["mutagenicity-data",[],[]],[{"t":"Str","c":"Mutagenicity"},{"t":"Space"},{"t":"Str","c":"data"}]]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"methods,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"compiled"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"Space"},{"t":"Str","c":"sources:"}]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Str","c":"Kazius/Bursi"},{"t":"Space"},{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"(4337"},{"t":"Space"},{"t":"Str","c":"compounds,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Kazius2005","citationHash":0}],[{"t":"Str","c":"@Kazius2005"}]]},{"t":"Str","c":"):"},{"t":"Space"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://cheminformatics.org/datasets/bursi/cas_4337.zip"}],["http://cheminformatics.org/datasets/bursi/cas_4337.zip",""]]}]}],[{"t":"Para","c":[{"t":"Str","c":"Hansen"},{"t":"Space"},{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"(6513"},{"t":"Space"},{"t":"Str","c":"compounds,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Hansen2009","citationHash":0}],[{"t":"Str","c":"@Hansen2009"}]]},{"t":"Str","c":"):"},{"t":"Space"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"}],["http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv",""]]}]}],[{"t":"Para","c":[{"t":"Str","c":"EFSA"},{"t":"Space"},{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"(695"},{"t":"Space"},{"t":"Str","c":"compounds):"},{"t":"Space"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls"}],["https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls",""]]}]}]]},{"t":"Para","c":[{"t":"Str","c":"Mutagenicity"},{"t":"Space"},{"t":"Str","c":"classifications"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"Kazius"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Hansen"},{"t":"Space"},{"t":"Str","c":"datasets"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"SoftBreak"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"further"},{"t":"Space"},{"t":"Str","c":"processing."},{"t":"Space"},{"t":"Str","c":"To"},{"t":"Space"},{"t":"Str","c":"achieve"},{"t":"Space"},{"t":"Str","c":"consistency"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"SoftBreak"},{"t":"Str","c":"datasets,"},{"t":"Space"},{"t":"Str","c":"EFSA"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"classified"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"mutagenic,"},{"t":"Space"},{"t":"Str","c":"if"},{"t":"Space"},{"t":"Str","c":"at"},{"t":"Space"},{"t":"Str","c":"least"},{"t":"Space"},{"t":"Str","c":"one"},{"t":"SoftBreak"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"result"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"found"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"TA98"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"T100"},{"t":"Space"},{"t":"Str","c":"Salmonella"},{"t":"Space"},{"t":"Str","c":"strains."}]},{"t":"Para","c":[{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"merges"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"unique"},{"t":"Space"},{"t":"Str","c":"SMILES"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"Simplified"},{"t":"Space"},{"t":"Str","c":"Molecular"},{"t":"Space"},{"t":"Str","c":"Input"},{"t":"SoftBreak"},{"t":"Str","c":"Line"},{"t":"Space"},{"t":"Str","c":"Entry"},{"t":"Space"},{"t":"Str","c":"Specification"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"strings"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"compound"},{"t":"Space"},{"t":"Str","c":"structures."},{"t":"SoftBreak"},{"t":"Str","c":"Duplicated"},{"t":"Space"},{"t":"Str","c":"experimental"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"merged"},{"t":"Space"},{"t":"Str","c":"into"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"single"},{"t":"Space"},{"t":"Str","c":"value,"},{"t":"Space"},{"t":"Str","c":"because"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"likely"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"originated"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"SoftBreak"},{"t":"Str","c":"experiment."},{"t":"Space"},{"t":"Str","c":"Contradictory"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"kept"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"multiple"},{"t":"Space"},{"t":"Str","c":"measurements"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"database."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"combined"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"contains"},{"t":"Space"},{"t":"Str","c":"8281"},{"t":"Space"},{"t":"Str","c":"unique"},{"t":"SoftBreak"},{"t":"Str","c":"structures."}]},{"t":"Para","c":[{"t":"Str","c":"Source"},{"t":"Space"},{"t":"Str","c":"code"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"download,"},{"t":"Space"},{"t":"Str","c":"extraction"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"merge"},{"t":"Space"},{"t":"Str","c":"operations"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"SoftBreak"},{"t":"Str","c":"publicly"},{"t":"Space"},{"t":"Str","c":"available"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"git"},{"t":"Space"},{"t":"Str","c":"repository"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/pyrrolizidine"}],["https://git.in-silico.ch/pyrrolizidine",""]]},{"t":"Space"},{"t":"Str","c":"under"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"GPL3"},{"t":"Space"},{"t":"Str","c":"License."}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"check/fix"},{"t":"Space"},{"t":"Str","c":"git"},{"t":"Space"},{"t":"Str","c":"repo"}]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"Random"},{"t":"Space"},{"t":"Str","c":"Forest"},{"t":"Space"},{"t":"Str","c":"(RF),"},{"t":"Space"},{"t":"Str","c":"Support"},{"t":"Space"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines"},{"t":"Space"},{"t":"Str","c":"(SVM),"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Deep"},{"t":"SoftBreak"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"(DL)"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Str","c":"molecular"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"SoftBreak"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"PaDEL-Descriptors"},{"t":"Space"},{"t":"Str","c":"program"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://www.yapcwsoft.com"}],["http://www.yapcwsoft.com",""]]},{"t":"Space"},{"t":"Str","c":"version"},{"t":"Space"},{"t":"Str","c":"2.21,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Yap2011","citationHash":0}],[{"t":"Str","c":"@Yap2011"}]]},{"t":"Str","c":")."}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"sentence"},{"t":"Space"},{"t":"Str","c":"??"}]},{"t":"Para","c":[{"t":"Str","c":"From"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"SoftBreak"},{"t":"Str","c":"chosen,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"actually"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model."}]},{"t":"Header","c":[2,["algorithms",[],[]],[{"t":"Str","c":"Algorithms"}]]},{"t":"Header","c":[3,["lazar",[],[]],[{"t":"Code","c":[["",[],[]],"lazar"]}]]},{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"lazy"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Str","c":"activity"},{"t":"Space"},{"t":"Str","c":"relationships"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"modular"},{"t":"Space"},{"t":"Str","c":"framework"},{"t":"SoftBreak"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"read-across"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"development"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"validation."},{"t":"Space"},{"t":"Str","c":"It"},{"t":"Space"},{"t":"Str","c":"follows"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"following"},{"t":"Space"},{"t":"Str","c":"basic"},{"t":"Space"},{"t":"Str","c":"workflow:"},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"given"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":":"}]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Str","c":"searches"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"database"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"similar"},{"t":"Space"},{"t":"Str","c":"structures"},{"t":"Space"},{"t":"Str","c":"(neighbours)"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"experimental"},{"t":"Space"},{"t":"Str","c":"data,"}]}],[{"t":"Para","c":[{"t":"Str","c":"builds"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"and"}]}],[{"t":"Para","c":[{"t":"Str","c":"uses"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"unknown"},{"t":"Space"},{"t":"Str","c":"activity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"query"},{"t":"SoftBreak"},{"t":"Str","c":"compound."}]}]]},{"t":"Para","c":[{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"procedure"},{"t":"Space"},{"t":"Str","c":"resembles"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"automated"},{"t":"Space"},{"t":"Str","c":"version"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"read"},{"t":"Space"},{"t":"Str","c":"across"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"toxicology,"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"terms"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"would"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"classified"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"k-nearest-neighbour"},{"t":"Space"},{"t":"Str","c":"algorithm."}]},{"t":"Para","c":[{"t":"Str","c":"Apart"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"basic"},{"t":"Space"},{"t":"Str","c":"workflow,"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"completely"},{"t":"Space"},{"t":"Str","c":"modular"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"allows"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"researcher"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"use"},{"t":"Space"},{"t":"Str","c":"any"},{"t":"Space"},{"t":"Str","c":"algorithm"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"searches"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"local"},{"t":"SoftBreak"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"Quantitative"},{"t":"Space"},{"t":"Str","c":"structure–activity"},{"t":"Space"},{"t":"Str","c":"relationship"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"modelling."},{"t":"SoftBreak"},{"t":"Str","c":"Algorithms"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"within"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"study"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"described"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"SoftBreak"},{"t":"Str","c":"sections."}]},{"t":"Header","c":[4,["neighbour-identification",[],[]],[{"t":"Str","c":"Neighbour"},{"t":"Space"},{"t":"Str","c":"identification"}]]},{"t":"Para","c":[{"t":"Str","c":"Similarity"},{"t":"Space"},{"t":"Str","c":"calculations"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Bender2004","citationHash":0}],[{"t":"Str","c":"@Bender2004"}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"OpenBabel"},{"t":"Space"},{"t":"Str","c":"cheminformatics"},{"t":"Space"},{"t":"Str","c":"library"},{"t":"SoftBreak"},{"t":"Str","c":"("},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"OBoyle2011a","citationHash":0}],[{"t":"Str","c":"@OBoyle2011a"}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprint"},{"t":"Space"},{"t":"Str","c":"uses"},{"t":"SoftBreak"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"molecular"},{"t":"Space"},{"t":"Str","c":"representation,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"resembles"},{"t":"Space"},{"t":"Str","c":"basically"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"concept"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"functional"},{"t":"Space"},{"t":"Str","c":"groups."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"molecule,"},{"t":"SoftBreak"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"represents"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"environment"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"types"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"connected"},{"t":"SoftBreak"},{"t":"Str","c":"atoms."}]},{"t":"Para","c":[{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"dynamically"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"SoftBreak"},{"t":"Str","c":"structures"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"do"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"rely"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"predefined"},{"t":"Space"},{"t":"Str","c":"lists"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"fragments"},{"t":"Space"},{"t":"Str","c":"(such"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"SoftBreak"},{"t":"Str","c":"OpenBabel"},{"t":"Space"},{"t":"Str","c":"FP3,"},{"t":"Space"},{"t":"Str","c":"FP4"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"MACCs"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"lists"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"toxicophores/toxicophobes)."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"advantage"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"they"},{"t":"Space"},{"t":"Str","c":"may"},{"t":"Space"},{"t":"Str","c":"capture"},{"t":"SoftBreak"},{"t":"Str","c":"substructures"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"toxicological"},{"t":"Space"},{"t":"Str","c":"relevance"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"included"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"SoftBreak"},{"t":"Str","c":"fingerprints."}]},{"t":"Para","c":[{"t":"Str","c":"From"},{"t":"Space"},{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"feature"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"compound"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"constructed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"calculate"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"SoftBreak"},{"t":"Str","c":"similarities."}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"two"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"b"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"expressed"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"proportion"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"both"},{"t":"Space"},{"t":"Str","c":"structures"},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"∩"},{"t":"Space"},{"t":"Str","c":"B"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"total"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"U"},{"t":"Space"},{"t":"Str","c":"B"},{"t":"Space"},{"t":"Str","c":"(Jaccard/Tanimoto"},{"t":"SoftBreak"},{"t":"Str","c":"index)."}]},{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"},"sim = \\frac{\\left| A\\ \\cap B \\right|}{\\left| A\\ \\cup B \\right|}"]}]},{"t":"Para","c":[{"t":"Str","c":"Threshold"},{"t":"Space"},{"t":"Str","c":"selection"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"trade-off"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"(high"},{"t":"SoftBreak"},{"t":"Str","c":"threshold)"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"predictable"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"(low"},{"t":"Space"},{"t":"Str","c":"threshold)."},{"t":"Space"},{"t":"Str","c":"As"},{"t":"SoftBreak"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"many"},{"t":"Space"},{"t":"Str","c":"practical"},{"t":"Space"},{"t":"Str","c":"cases"},{"t":"Space"},{"t":"Str","c":"desirable"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"make"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"even"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"absence"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"closely"},{"t":"Space"},{"t":"Str","c":"related"},{"t":"Space"},{"t":"Str","c":"neighbours,"},{"t":"Space"},{"t":"Str","c":"we"},{"t":"Space"},{"t":"Str","c":"follow"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"tiered"},{"t":"Space"},{"t":"Str","c":"approach:"}]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Str","c":"First"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.5"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"collect"},{"t":"Space"},{"t":"Str","c":"neighbours,"},{"t":"SoftBreak"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"create"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"make"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"query"},{"t":"SoftBreak"},{"t":"Str","c":"compound."}]}],[{"t":"Para","c":[{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"any"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"steps"},{"t":"Space"},{"t":"Str","c":"fails,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"procedure"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"repeated"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"flagged"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"warning"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"might"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"out"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data."}]}],[{"t":"Para","c":[{"t":"Str","c":"Similarity"},{"t":"Space"},{"t":"Str","c":"thresholds"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.5"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"default"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"chosen"},{"t":"SoftBreak"},{"t":"Str","c":">"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"software"},{"t":"Space"},{"t":"Str","c":"developers"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"remained"},{"t":"Space"},{"t":"Str","c":"unchanged"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":">"},{"t":"Space"},{"t":"Str","c":"course"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"experiments."}]}]]},{"t":"Para","c":[{"t":"Str","c":"Compounds"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"query"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"SoftBreak"},{"t":"Str","c":"automatically"},{"t":"Space"},{"t":"Str","c":"eliminated"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"obtain"},{"t":"Space"},{"t":"Str","c":"unbiased"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"presence"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"duplicates."}]},{"t":"Header","c":[4,["local-qsar-models-and-predictions",[],[]],[{"t":"Str","c":"Local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"predictions"}]]},{"t":"Para","c":[{"t":"Str","c":"Only"},{"t":"Space"},{"t":"Str","c":"similar"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"(neighbours)"},{"t":"Space"},{"t":"Str","c":"above"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"models."},{"t":"Space"},{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"investigation,"},{"t":"Space"},{"t":"Str","c":"we"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"weighted"},{"t":"SoftBreak"},{"t":"Str","c":"majority"},{"t":"Space"},{"t":"Str","c":"vote"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"neighbour’s"},{"t":"Space"},{"t":"Str","c":"experimental"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"SoftBreak"},{"t":"Str","c":"classifications."},{"t":"Space"},{"t":"Str","c":"Probabilities"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"both"},{"t":"Space"},{"t":"Str","c":"classes"},{"t":"SoftBreak"},{"t":"Str","c":"(mutagenic/non-mutagenic)"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"according"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"SoftBreak"},{"t":"Str","c":"formula"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"higher"},{"t":"Space"},{"t":"Str","c":"probability"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"SoftBreak"},{"t":"Str","c":"outcome."}]},{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"},"p_{c} = \\ \\frac{\\sum_{}^{}\\text{sim}_{n,c}}{\\sum_{}^{}\\text{sim}_{n}}"]}]},{"t":"Para","c":[{"t":"Math","c":[{"t":"InlineMath"},"p_{c}"]},{"t":"Space"},{"t":"Str","c":"Probability"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"c"},{"t":"Space"},{"t":"Str","c":"(e.g. mutagenic"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"non-mutagenic)"},{"t":"LineBreak"},{"t":"Math","c":[{"t":"InlineMath"},"\\sum_{}^{}\\text{sim}_{n,c}"]},{"t":"Space"},{"t":"Str","c":"Sum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"similarities"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"c"},{"t":"LineBreak"},{"t":"Math","c":[{"t":"InlineMath"},"\\sum_{}^{}\\text{sim}_{n}"]},{"t":"Space"},{"t":"Str","c":"Sum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"neighbours"}]},{"t":"Header","c":[4,["applicability-domain",[],[]],[{"t":"Str","c":"Applicability"},{"t":"Space"},{"t":"Str","c":"domain"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"(AD)"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"determined"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"diversity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data."},{"t":"Space"},{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"Space"},{"t":"Str","c":"similar"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"SoftBreak"},{"t":"Str","c":"found"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"will"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"generated."},{"t":"Space"},{"t":"Str","c":"Warnings"},{"t":"SoftBreak"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"issued"},{"t":"Space"},{"t":"Str","c":"if"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"had"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"lowered"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"0.5"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"order"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"enable"},{"t":"Space"},{"t":"Str","c":"predictions."},{"t":"Space"},{"t":"Str","c":"Predictions"},{"t":"Space"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"warnings"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"SoftBreak"},{"t":"Str","c":"considered"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"close"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"warnings"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"more"},{"t":"Space"},{"t":"Str","c":"distant"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain."},{"t":"Space"},{"t":"Str","c":"Quantitative"},{"t":"SoftBreak"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"information"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"obtained"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"similarities"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"individual"},{"t":"Space"},{"t":"Str","c":"neighbours."}]},{"t":"Header","c":[4,["availability",[],[]],[{"t":"Str","c":"Availability"}]]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"experiments"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"manuscript:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/pyrrolizidine"}],["https://git.in-silico.ch/pyrrolizidine",""]]},{"t":"SoftBreak"},{"t":"Str","c":"(source"},{"t":"Space"},{"t":"Str","c":"code,"},{"t":"Space"},{"t":"Str","c":"GPL3)"}]}],[{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"framework:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/lazar"}],["https://git.in-silico.ch/lazar",""]]},{"t":"SoftBreak"},{"t":"Str","c":"(source"},{"t":"Space"},{"t":"Str","c":"code,"},{"t":"Space"},{"t":"Str","c":"GPL3)"}]}],[{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"GUI:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/lazar-gui"}],["https://git.in-silico.ch/lazar-gui",""]]},{"t":"SoftBreak"},{"t":"Str","c":"(source"},{"t":"Space"},{"t":"Str","c":"code,"},{"t":"Space"},{"t":"Str","c":"GPL3)"}]}],[{"t":"Para","c":[{"t":"Str","c":"Public"},{"t":"Space"},{"t":"Str","c":"web"},{"t":"Space"},{"t":"Str","c":"interface:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://lazar.in-silico.ch"}],["https://lazar.in-silico.ch",""]]}]}]]},{"t":"Header","c":[3,["random-forest-support-vector-machines-and-deep-learning-in-r-project",[],[]],[{"t":"Str","c":"Random"},{"t":"Space"},{"t":"Str","c":"Forest,"},{"t":"Space"},{"t":"Str","c":"Support"},{"t":"Space"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"R-project"}]]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"comparison"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":","},{"t":"Space"},{"t":"Str","c":"three"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"(Random"},{"t":"Space"},{"t":"Str","c":"Forest"},{"t":"Space"},{"t":"Str","c":"(RF),"},{"t":"Space"},{"t":"Str","c":"Support"},{"t":"SoftBreak"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines"},{"t":"Space"},{"t":"Str","c":"(SVM),"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"(DL))"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"evaluated."}]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Str","c":"molecular"},{"t":"Space"},{"t":"Str","c":"1D"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"2D"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"PaDEL-Descriptors"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://www.yapcwsoft.com"}],["http://www.yapcwsoft.com",""]]},{"t":"Space"},{"t":"Str","c":"version"},{"t":"SoftBreak"},{"t":"Str","c":"2.21,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Yap2011","citationHash":0}],[{"t":"Str","c":"@Yap2011"}]]},{"t":"Str","c":")."}]},{"t":"Para","c":[{"t":"Str","c":"As"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"over"},{"t":"Space"},{"t":"Str","c":"8280"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"decided"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"SoftBreak"},{"t":"Str","c":"delete"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"missing"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"pre-processing."},{"t":"SoftBreak"},{"t":"Str","c":"Furthermore,"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"equivocal"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"removed."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"final"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"known"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"SoftBreak"},{"t":"Str","c":"potential."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"RF,"},{"t":"Space"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"R"},{"t":"SoftBreak"},{"t":"Str","c":"software"},{"t":"Space"},{"t":"Str","c":"(R-project"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"Statistical"},{"t":"Space"},{"t":"Str","c":"Computing,"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://www.r-project.org/"}],["https://www.r-project.org/",""]]},{"t":"Emph","c":[{"t":"Str","c":";"}]},{"t":"Space"},{"t":"Str","c":"version"},{"t":"Space"},{"t":"Str","c":"3.3.1),"},{"t":"Space"},{"t":"Str","c":"specific"},{"t":"Space"},{"t":"Str","c":"R"},{"t":"Space"},{"t":"Str","c":"packages"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"SoftBreak"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"identified"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"description"},{"t":"Space"},{"t":"Str","c":"below."},{"t":"Space"},{"t":"Str","c":"During"},{"t":"Space"},{"t":"Str","c":"feature"},{"t":"SoftBreak"},{"t":"Str","c":"selection,"},{"t":"Space"},{"t":"Str","c":"descriptor"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"near"},{"t":"Space"},{"t":"Str","c":"zero"},{"t":"Space"},{"t":"Str","c":"variance"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"removed"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"NearZeroVar"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"caret"}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"percentage"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"more"},{"t":"Space"},{"t":"Str","c":"than"},{"t":"Space"},{"t":"Str","c":"90%"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"when"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"frequency"},{"t":"Space"},{"t":"Str","c":"ratio"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"second"},{"t":"Space"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"greater"},{"t":"Space"},{"t":"Str","c":"than"},{"t":"Space"},{"t":"Str","c":"95:5"},{"t":"SoftBreak"},{"t":"Str","c":"(e.g. 95"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"Space"},{"t":"Str","c":"5"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"less"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"second"},{"t":"Space"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value),"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"descriptor"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"classified"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"having"},{"t":"SoftBreak"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"near"},{"t":"Space"},{"t":"Str","c":"zero"},{"t":"Space"},{"t":"Str","c":"variance."},{"t":"Space"},{"t":"Str","c":"After"},{"t":"Space"},{"t":"Str","c":"that,"},{"t":"Space"},{"t":"Str","c":"highly"},{"t":"Space"},{"t":"Str","c":"correlated"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"SoftBreak"},{"t":"Str","c":"removed"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"findCorrelation"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"caret"}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"cut-off"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.9."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"resulted"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"516"},{"t":"SoftBreak"},{"t":"Str","c":"descriptors."},{"t":"Space"},{"t":"Str","c":"These"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"scaled"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"range"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"0"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"1"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"preProcess"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"caret"}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"scaling"},{"t":"SoftBreak"},{"t":"Str","c":"routine"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"saved"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"order"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"apply"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"scaling"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"testing"},{"t":"SoftBreak"},{"t":"Str","c":"dataset."},{"t":"Space"},{"t":"Str","c":"As"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"three"},{"t":"Space"},{"t":"Str","c":"steps"},{"t":"Space"},{"t":"Str","c":"did"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"consider"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome,"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"SoftBreak"},{"t":"Str","c":"decided"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"they"},{"t":"Space"},{"t":"Str","c":"do"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"need"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"included"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model."},{"t":"Space"},{"t":"Str","c":"To"},{"t":"Space"},{"t":"Str","c":"further"},{"t":"Space"},{"t":"Str","c":"reduce"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"features,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"LASSO"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"least"},{"t":"SoftBreak"},{"t":"Str","c":"absolute"},{"t":"Space"},{"t":"Str","c":"shrinkage"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"selection"},{"t":"Space"},{"t":"Str","c":"operator"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"regression"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed"},{"t":"SoftBreak"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"glmnet"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"glmnet"}]}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"reduced"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"SoftBreak"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"pre-trained"},{"t":"Space"},{"t":"Str","c":"models."}]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"randomForest"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"randomForest"}]}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"forest"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"1000"},{"t":"Space"},{"t":"Str","c":"trees"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"maximal"},{"t":"SoftBreak"},{"t":"Str","c":"terminal"},{"t":"Space"},{"t":"Str","c":"nodes"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"200"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"grown"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"prediction."}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"svm"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"e1071"}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"radial"},{"t":"Space"},{"t":"Str","c":"basis"},{"t":"Space"},{"t":"Str","c":"function"},{"t":"SoftBreak"},{"t":"Str","c":"kernel"}]},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model."}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"h2o.deeplearning"}]}]]},{"t":"Str","c":"-function"},{"t":"SoftBreak"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"h2o"}]}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"hidden"},{"t":"Space"},{"t":"Str","c":"layer"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"70,"},{"t":"Space"},{"t":"Str","c":"50,"},{"t":"Space"},{"t":"Str","c":"50,"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"10"},{"t":"Space"},{"t":"Str","c":"neurons,"},{"t":"Space"},{"t":"Str","c":"respectively."},{"t":"Space"},{"t":"Str","c":"Other"},{"t":"Space"},{"t":"Str","c":"hyperparameter"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"set"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"follows:"},{"t":"SoftBreak"},{"t":"Str","c":"l1=1.0E-7,"},{"t":"Space"},{"t":"Str","c":"l2=1.0E-11,"},{"t":"Space"},{"t":"Str","c":"epsilon"},{"t":"Space"},{"t":"Str","c":"="},{"t":"Space"},{"t":"Str","c":"1.0E-10,"},{"t":"Space"},{"t":"Str","c":"rho"},{"t":"Space"},{"t":"Str","c":"="},{"t":"Space"},{"t":"Str","c":"0.8,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"quantile_alpha"},{"t":"SoftBreak"},{"t":"Str","c":"="},{"t":"Space"},{"t":"Str","c":"0.5."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"hyperparameter,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"default"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"SoftBreak"},{"t":"Str","c":"Weights"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"biases"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"first"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"determined"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"unsupervised"},{"t":"SoftBreak"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model."},{"t":"Space"},{"t":"Str","c":"These"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"then"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"actual,"},{"t":"Space"},{"t":"Str","c":"supervised"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"SoftBreak"},{"t":"Str","c":"model."}]},{"t":"Para","c":[{"t":"Str","c":"To"},{"t":"Space"},{"t":"Str","c":"validate"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"internal"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"approach"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"SoftBreak"},{"t":"Str","c":"chosen."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"randomly"},{"t":"Space"},{"t":"Str","c":"split"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"SoftBreak"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"95%"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"contain"},{"t":"Space"},{"t":"Str","c":"5%"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"data."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"feature"},{"t":"Space"},{"t":"Str","c":"selection"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"LASSO"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed,"},{"t":"SoftBreak"},{"t":"Str","c":"reducing"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"approximately"},{"t":"Space"},{"t":"Str","c":"100."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"SoftBreak"},{"t":"Str","c":"repeated"},{"t":"Space"},{"t":"Str","c":"five"},{"t":"Space"},{"t":"Str","c":"times."},{"t":"Space"},{"t":"Str","c":"Based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"five"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"predictive"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"trained"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"tested"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"data."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"repeated"},{"t":"Space"},{"t":"Str","c":"10"},{"t":"Space"},{"t":"Str","c":"times."},{"t":"Space"},{"t":"Str","c":"Furthermore,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"y-randomisation"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed."},{"t":"Space"},{"t":"Str","c":"During"},{"t":"SoftBreak"},{"t":"Str","c":"y-randomisation,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"(y-variable)"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"randomly"},{"t":"Space"},{"t":"Str","c":"permuted."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"theory"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"after"},{"t":"Space"},{"t":"Str","c":"randomisation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"should"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"SoftBreak"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"able"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"correlate"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"properties"},{"t":"Space"},{"t":"Str","c":"(descriptor"},{"t":"Space"},{"t":"Str","c":"values)"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substances."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"should"},{"t":"Space"},{"t":"Str","c":"therefore"},{"t":"SoftBreak"},{"t":"Str","c":"indicate"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"change"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"about"},{"t":"Space"},{"t":"Str","c":"50%."},{"t":"Space"},{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"SoftBreak"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"true,"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"concluded"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"correlation"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"actual"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"SoftBreak"},{"t":"Str","c":"properties"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"real"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"chance"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Rücker2007","citationHash":0}],[{"t":"Str","c":"@Rücker2007"}]]},{"t":"Str","c":")."}]},{"t":"Para","c":[{"t":"Image","c":[["",[],[["width","6.26875in"],["height","5.486111111111111in"]]],[],["media/image1.png",""]]}]},{"t":"Para","c":[{"t":"Str","c":"Figure"},{"t":"Space"},{"t":"Str","c":"1:"},{"t":"Space"},{"t":"Str","c":"Flowchart"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"SoftBreak"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"R-project"}]},{"t":"Header","c":[4,["applicability-domain-1",[],[]],[{"t":"Str","c":"Applicability"},{"t":"Space"},{"t":"Str","c":"domain"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"AD"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"evaluated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"Jaccard"},{"t":"Space"},{"t":"Str","c":"distance."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"Jaccard"},{"t":"Space"},{"t":"Str","c":"distance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"0"}]]},{"t":"Space"},{"t":"Str","c":"indicates"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"similar,"},{"t":"Space"},{"t":"Str","c":"whereas"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"1"}]]},{"t":"Space"},{"t":"Str","c":"shows"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"SoftBreak"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"different."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"Jaccard"},{"t":"Space"},{"t":"Str","c":"distance"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"below"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"relative"},{"t":"SoftBreak"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset."},{"t":"Space"},{"t":"Str","c":"Therefore,"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"within"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"AD"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"SoftBreak"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"dataset."}]},{"t":"Header","c":[4,["y-randomisation",[],[]],[{"t":"Str","c":"y-randomisation"}]]},{"t":"Para","c":[{"t":"Str","c":"After"},{"t":"Space"},{"t":"Str","c":"y-randomisation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"CCR"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"around"},{"t":"SoftBreak"},{"t":"Str","c":"50%,"},{"t":"Space"},{"t":"Str","c":"indicating"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"chance"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"distribution"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"results."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"shows,"},{"t":"SoftBreak"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"actually"},{"t":"Space"},{"t":"Str","c":"related"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"predictors"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"SoftBreak"},{"t":"Str","c":"chance."}]},{"t":"Header","c":[3,["deep-learning-in-tensorflow",[],[]],[{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"TensorFlow"}]]},{"t":"Para","c":[{"t":"Str","c":"Alternatively,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"established"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"Python-based"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"SoftBreak"},{"t":"Str","c":"program"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://www.tensorflow.org/"}],["https://www.tensorflow.org/",""]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"high-level"},{"t":"Space"},{"t":"Str","c":"API"},{"t":"Space"},{"t":"Str","c":"Keras"},{"t":"SoftBreak"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://www.tensorflow.org/guide/keras"}],["https://www.tensorflow.org/guide/keras",""]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"build"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"models."}]},{"t":"Para","c":[{"t":"Str","c":"Data"},{"t":"Space"},{"t":"Str","c":"pre-processing"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"done"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"rank"},{"t":"Space"},{"t":"Str","c":"transformation"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"QuantileTransformer"}]}]]},{"t":"Space"},{"t":"Str","c":"procedure."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"sequential"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"been"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"SoftBreak"},{"t":"Str","c":"Four"},{"t":"Space"},{"t":"Str","c":"layers"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"been"},{"t":"Space"},{"t":"Str","c":"used:"},{"t":"Space"},{"t":"Str","c":"input"},{"t":"Space"},{"t":"Str","c":"layer,"},{"t":"Space"},{"t":"Str","c":"two"},{"t":"Space"},{"t":"Str","c":"hidden"},{"t":"Space"},{"t":"Str","c":"layers"},{"t":"Space"},{"t":"Str","c":"(with"},{"t":"Space"},{"t":"Str","c":"12,"},{"t":"Space"},{"t":"Str","c":"8"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"8"},{"t":"Space"},{"t":"Str","c":"nodes,"},{"t":"Space"},{"t":"Str","c":"respectively)"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"one"},{"t":"Space"},{"t":"Str","c":"output"},{"t":"Space"},{"t":"Str","c":"layer."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"output"},{"t":"Space"},{"t":"Str","c":"layer,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"sigmoidal"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"function"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"layers"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ReLU"},{"t":"SoftBreak"},{"t":"Str","c":"("},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"Rectified"},{"t":"Space"},{"t":"Str","c":"Linear"},{"t":"Space"},{"t":"Str","c":"Unit"}]}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"function"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"Additionally,"},{"t":"SoftBreak"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"L"},{"t":"Superscript","c":[{"t":"Str","c":"2"}]},{"t":"Str","c":"-penalty"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.001"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"input"},{"t":"Space"},{"t":"Str","c":"layer."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ADAM"},{"t":"Space"},{"t":"Str","c":"algorithm"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"minimise"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-entropy"},{"t":"SoftBreak"},{"t":"Str","c":"loss"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"default"},{"t":"Space"},{"t":"Str","c":"parameters"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"Keras."},{"t":"Space"},{"t":"Str","c":"Training"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"100"},{"t":"Space"},{"t":"Str","c":"epochs"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"batch"},{"t":"Space"},{"t":"Str","c":"size"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"64."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"implemented"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"Python"},{"t":"Space"},{"t":"Str","c":"3.6"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Keras."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"6-fold"},{"t":"SoftBreak"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"Accuracy"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"estimated"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"ROC-AUC"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"SoftBreak"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix."}]},{"t":"Header","c":[2,["validation",[],[]],[{"t":"Str","c":"Validation"}]]},{"t":"Header","c":[1,["results",[],[]],[{"t":"Str","c":"Results"}]]},{"t":"Header","c":[2,["lazar-1",[],[]],[{"t":"Code","c":[["",[],[]],"lazar"]}]]},{"t":"Header","c":[2,["random-forest",[],[]],[{"t":"Str","c":"Random"},{"t":"Space"},{"t":"Str","c":"Forest"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"64%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"66%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"63%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"1."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"1:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2274"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1163"}]}],[{"t":"Plain","c":[{"t":"Str","c":"3437"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"1736"}]}],[{"t":"Plain","c":[{"t":"Str","c":"2907"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4643"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"4010"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4070"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Header","c":[2,["support-vector-machines",[],[]],[{"t":"Str","c":"Support"},{"t":"Space"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"62%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"65%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"60%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"SoftBreak"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"2."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"2:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2057"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1107"}]}],[{"t":"Plain","c":[{"t":"Str","c":"3164"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"1953"}]}],[{"t":"Plain","c":[{"t":"Str","c":"2963"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4916"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"4010"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4070"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Header","c":[2,["deep-learning-r-project",[],[]],[{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"(R-project)"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"R"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"59%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"89%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"30%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"confusion"},{"t":"SoftBreak"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"normalised"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"SoftBreak"},{"t":"Str","c":"3."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"3:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(R-project)"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"3575"}]}],[{"t":"Plain","c":[{"t":"Str","c":"435"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4010"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2853"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1217"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4070"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"6428"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1652"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Header","c":[2,["dl-model-tensorflow",[],[]],[{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"SoftBreak"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"68%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"70%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"46%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"normalised"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"4."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"4:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2851"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1227"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4078"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"1825"}]}],[{"t":"Plain","c":[{"t":"Str","c":"2177"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4002"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"4676"}]}],[{"t":"Plain","c":[{"t":"Str","c":"3404"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"ROC"},{"t":"Space"},{"t":"Str","c":"curves"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"6-fold"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"shown"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Figure"},{"t":"Space"},{"t":"Str","c":"7."}]},{"t":"Para","c":[{"t":"Image","c":[["",[],[["width","3.825in"],["height","2.7327045056867894in"]]],[],["media/image7.png",""]]}]},{"t":"Para","c":[{"t":"Str","c":"Figure"},{"t":"Space"},{"t":"Str","c":"7:"},{"t":"Space"},{"t":"Str","c":"Six-fold"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"show"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"SoftBreak"},{"t":"Str","c":"average"},{"t":"Space"},{"t":"Str","c":"area"},{"t":"Space"},{"t":"Str","c":"under"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ROC-curve"},{"t":"Space"},{"t":"Str","c":"(ROC-AUC;"},{"t":"Space"},{"t":"Str","c":"measure"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"accuracy)"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"68%."}]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"summary,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"methods"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"presented"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"Space"},{"t":"Str","c":"table."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"5"},{"t":"Space"},{"t":"Str","c":"Results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"after"},{"t":"SoftBreak"},{"t":"Str","c":"y-randomisation"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignLeft"},{"t":"AlignLeft"},{"t":"AlignLeft"}],[0.3287671232876712,0.1506849315068493,0.1095890410958904,0.1917808219178082,0.1917808219178082],[[],[{"t":"Plain","c":[{"t":"Str","c":"Accuracy"}]}],[{"t":"Plain","c":[{"t":"Str","c":"CCR"}]}],[{"t":"Plain","c":[{"t":"Str","c":"Sensitivity"}]}],[{"t":"Plain","c":[{"t":"Str","c":"Specificity"}]}]],[[[{"t":"Plain","c":[{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"}]}],[{"t":"Plain","c":[{"t":"Str","c":"64.1%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"64.4%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"66.2%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.6%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.1%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.6%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"65.0%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"60.3%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"LineBreak"},{"t":"Str","c":"(R-project)"}]}],[{"t":"Plain","c":[{"t":"Str","c":"59.3%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"59.5%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"89.2%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"29.9%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"}]}],[{"t":"Plain","c":[{"t":"Str","c":"68%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.2%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"69.9%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"45.6%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"y-randomisation"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.5%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.4%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.3%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.6%"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"CCR"},{"t":"Space"},{"t":"Str","c":"(correct"},{"t":"Space"},{"t":"Str","c":"classification"},{"t":"Space"},{"t":"Str","c":"rate)"}]},{"t":"Header","c":[1,["discussion",[],[]],[{"t":"Str","c":"Discussion"}]]},{"t":"Para","c":[{"t":"Str","c":"General"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"performance"}]},{"t":"Para","c":[{"t":"Str","c":"Based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":","},{"t":"Space"},{"t":"Str","c":"RF,"},{"t":"SoftBreak"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"(R-project)"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"state"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"optimal"},{"t":"Space"},{"t":"Str","c":"due"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"reasons."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"measured"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"(RF,"},{"t":"SoftBreak"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"(R-project"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"TensorFlow))"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"partly"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"CCR"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"SoftBreak"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"59.3"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"68%,"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"R-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"TensorFlow-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"showing"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"worst"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"best"},{"t":"SoftBreak"},{"t":"Str","c":"performance,"},{"t":"Space"},{"t":"Str","c":"respectively."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"R-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"SoftBreak"},{"t":"Str","c":"revealed"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"high"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"(89.2%)"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"unacceptably"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"29.9%"},{"t":"Space"},{"t":"Str","c":"indicating"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"high"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"false"},{"t":"Space"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"estimates."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"TensorFlow-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"however,"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"acceptable"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"SoftBreak"},{"t":"Str","c":"optimal"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"68%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"69.9%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"45.6%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"indicates"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"both"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"tends"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"SoftBreak"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"too"},{"t":"Space"},{"t":"Str","c":"many"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"(genotoxic),"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"therefore"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"high"},{"t":"Space"},{"t":"Str","c":"false"},{"t":"Space"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"rate."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"allows"},{"t":"Space"},{"t":"Str","c":"at"},{"t":"Space"},{"t":"Str","c":"least"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"SoftBreak"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"make"},{"t":"Space"},{"t":"Str","c":"group"},{"t":"Space"},{"t":"Str","c":"statements,"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"confidence"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"estimations"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"single"},{"t":"Space"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"appears"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"insufficiently"},{"t":"Space"},{"t":"Str","c":"low."}]},{"t":"Para","c":[{"t":"Str","c":"Several"},{"t":"Space"},{"t":"Str","c":"factors"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"likely"},{"t":"Space"},{"t":"Str","c":"contributed"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"moderate"},{"t":"SoftBreak"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"methods"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"shown"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation:"}]},{"t":"OrderedList","c":[[1,{"t":"Decimal"},{"t":"Period"}],[[{"t":"Plain","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"AMES"},{"t":"SoftBreak"},{"t":"Str","c":"tests"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"genotoxicity"},{"t":"Space"},{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"ICH"},{"t":"Space"},{"t":"Str","c":"2011"}],["#_ENREF_63",""]]},{"t":"Str","c":"(),"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"vitro"}]},{"t":"Space"},{"t":"Str","c":"test"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"strains"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"bacteria"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"Salmonella"},{"t":"Space"},{"t":"Str","c":"typhimurium"}]},{"t":"Str","c":"."},{"t":"Space"},{"t":"Str","c":"In"},{"t":"SoftBreak"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"test,"},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"evaluated"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"prior"},{"t":"SoftBreak"},{"t":"Str","c":"metabolic"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"test"},{"t":"Space"},{"t":"Str","c":"substance."},{"t":"Space"},{"t":"Str","c":"Metabolic"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"SoftBreak"},{"t":"Str","c":"could"},{"t":"Space"},{"t":"Str","c":"result"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"formation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"metabolites"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"SoftBreak"},{"t":"Str","c":"non-genotoxic"},{"t":"Space"},{"t":"Str","c":"parent"},{"t":"Space"},{"t":"Str","c":"compounds."},{"t":"Space"},{"t":"Str","c":"However,"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"Space"},{"t":"Str","c":"distinction"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"made"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"needed"},{"t":"Space"},{"t":"Str","c":"metabolic"},{"t":"SoftBreak"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"before"},{"t":"Space"},{"t":"Str","c":"being"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"those"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"SoftBreak"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"metabolic"},{"t":"Space"},{"t":"Str","c":"activation."},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"able"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"handle"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"inaccuracy"}]]},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"well"},{"t":"Space"},{"t":"Str","c":"due"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"way"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"algorithm"},{"t":"Space"},{"t":"Str","c":"works:"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"predicts"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"comparable"},{"t":"Space"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"features,"},{"t":"SoftBreak"},{"t":"Str","c":"considering"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"neighbours."},{"t":"Space"},{"t":"Str","c":"Based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"similarity,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"probability"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"SoftBreak"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"independently"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"(meaning"},{"t":"SoftBreak"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"sum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"probabilities"},{"t":"Space"},{"t":"Str","c":"does"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"necessarily"},{"t":"Space"},{"t":"Str","c":"adds"},{"t":"Space"},{"t":"Str","c":"up"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"100%)."},{"t":"SoftBreak"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"higher"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"then"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"overall"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substance."}]}]]]},{"t":"BlockQuote","c":[{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"contrast,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"need"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"trained"},{"t":"Space"},{"t":"Str","c":"first"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"recognise"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"features"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"responsible"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"genotoxicity."},{"t":"SoftBreak"},{"t":"Str","c":"Therefore,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"mixture"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"being"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"without"},{"t":"SoftBreak"},{"t":"Str","c":"metabolic"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"may"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"adversely"},{"t":"SoftBreak"},{"t":"Str","c":"affected"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ability"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"separate"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"two"},{"t":"Space"},{"t":"Str","c":"distinct"},{"t":"Space"},{"t":"Str","c":"classes"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"thus"},{"t":"Space"},{"t":"Str","c":"explains"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"relatively"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"models."}]}]},{"t":"OrderedList","c":[[2,{"t":"Decimal"},{"t":"Period"}],[[{"t":"Plain","c":[{"t":"Str","c":"Machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"algorithms"},{"t":"Space"},{"t":"Str","c":"try"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"find"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"optimized"},{"t":"Space"},{"t":"Str","c":"solution"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"high-dimensional"},{"t":"Space"},{"t":"Str","c":"(one"},{"t":"Space"},{"t":"Str","c":"dimension"},{"t":"Space"},{"t":"Str","c":"per"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"predictor)"},{"t":"Space"},{"t":"Str","c":"space."},{"t":"Space"},{"t":"Str","c":"Sometimes"},{"t":"SoftBreak"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"methods"},{"t":"Space"},{"t":"Str","c":"do"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"find"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"global"},{"t":"Space"},{"t":"Str","c":"optimum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"estimates"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"SoftBreak"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"(not"},{"t":"Space"},{"t":"Str","c":"optimal)"},{"t":"Space"},{"t":"Str","c":"solutions."},{"t":"Space"},{"t":"Str","c":"Strategies"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"find"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"global"},{"t":"SoftBreak"},{"t":"Str","c":"solutions"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"systematic"},{"t":"Space"},{"t":"Str","c":"variation"},{"t":"Space"},{"t":"Str","c":"(grid"},{"t":"Space"},{"t":"Str","c":"search)"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"hyperparameters"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"methods,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"may"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"very"},{"t":"Space"},{"t":"Str","c":"time"},{"t":"Space"},{"t":"Str","c":"consuming"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"particular"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"large"},{"t":"Space"},{"t":"Str","c":"datasets."}]}]]]},{"t":"Header","c":[1,["conclusions",[],[]],[{"t":"Str","c":"Conclusions"}]]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"study,"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"attempt"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"made"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"five"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"techniques"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":","},{"t":"Space"},{"t":"Str","c":"RF,"},{"t":"Space"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"SoftBreak"},{"t":"Str","c":"(R-project"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"TensorFlow)."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"fitted"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"Space"},{"t":"Str","c":"partly"},{"t":"SoftBreak"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"findings"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"literature,"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"best"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"obtained"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"TensorFlow"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model."},{"t":"Space"},{"t":"Str","c":"Therefore,"},{"t":"Space"},{"t":"Str","c":"modelling"},{"t":"Space"},{"t":"Str","c":"allows"},{"t":"Space"},{"t":"Str","c":"statements"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"relative"},{"t":"Space"},{"t":"Str","c":"risks"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"genotoxicity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"groups."},{"t":"Space"},{"t":"Str","c":"Individual"},{"t":"SoftBreak"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"selective"},{"t":"Space"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"appear,"},{"t":"Space"},{"t":"Str","c":"however,"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"reliable"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"current"},{"t":"Space"},{"t":"Str","c":"basis"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset."}]},{"t":"Para","c":[{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"study"},{"t":"Space"},{"t":"Str","c":"emphasises"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"importance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"critical"},{"t":"Space"},{"t":"Str","c":"assessment"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"models."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"includes"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"Space"},{"t":"Str","c":"extensive"},{"t":"Space"},{"t":"Str","c":"literature"},{"t":"SoftBreak"},{"t":"Str","c":"research"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"assess"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"plausibility"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"predictions,"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"also"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"good"},{"t":"SoftBreak"},{"t":"Str","c":"knowledge"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"metabolism"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"test"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"understanding"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"possible"},{"t":"Space"},{"t":"Str","c":"mechanisms"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"toxicity."}]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"further"},{"t":"Space"},{"t":"Str","c":"studies,"},{"t":"Space"},{"t":"Str","c":"additional"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"techniques"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"modified"},{"t":"SoftBreak"},{"t":"Str","c":"(extended)"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"should"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"additional"},{"t":"Space"},{"t":"Str","c":"attempt"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"SoftBreak"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"PAs."}]},{"t":"Header","c":[1,["references",[],[]],[{"t":"Str","c":"References"}]]}],"pandoc-api-version":[1,17,5,4],"meta":{"bibliography":{"t":"MetaInlines","c":[{"t":"Str","c":"bibliography.bib"}]},"documentclass":{"t":"MetaInlines","c":[{"t":"Str","c":"scrartcl"}]},"keywords":{"t":"MetaInlines","c":[{"t":"Str","c":"mutagenicity,"},{"t":"Space"},{"t":"Str","c":"(Q)SAR,"},{"t":"Space"},{"t":"Str","c":"lazar,"},{"t":"Space"},{"t":"Str","c":"random"},{"t":"Space"},{"t":"Str","c":"forest,"},{"t":"Space"},{"t":"Str","c":"support"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"machine,"},{"t":"Space"},{"t":"Str","c":"deep"},{"t":"Space"},{"t":"Str","c":"learning"}]},"author":{"t":"MetaList","c":[{"t":"MetaMap","c":{"email":{"t":"MetaInlines","c":[{"t":"Str","c":"helma@in-silico.ch"}]},"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Christoph Helma"}]},"correspondence":{"t":"MetaInlines","c":[{"t":"Str","c":"yes"}]},"id":{"t":"MetaString","c":"Christoph Helma"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"1"}]}}},{"t":"MetaMap","c":{"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Verena Schöning"}]},"id":{"t":"MetaString","c":"Verena Schöning"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"2"}]}}},{"t":"MetaMap","c":{"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Philipp Boss"}]},"id":{"t":"MetaString","c":"Philipp Boss"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"2"}]}}},{"t":"MetaMap","c":{"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Jürgen Drewe"}]},"id":{"t":"MetaString","c":"Jürgen Drewe"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"2"}]}}}]},"abstract":{"t":"MetaBlocks","c":[{"t":"Para","c":[{"t":"Str","c":"k-nearest"},{"t":"Space"},{"t":"Str","c":"neighbor"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":"),"},{"t":"Space"},{"t":"Str","c":"random"},{"t":"Space"},{"t":"Str","c":"forest,"},{"t":"Space"},{"t":"Str","c":"support"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"deep"},{"t":"SoftBreak"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"algorithms"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"applied"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"new"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"Salmonella"}]},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"SoftBreak"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"8281"},{"t":"Space"},{"t":"Str","c":"unique"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"structures."},{"t":"Space"},{"t":"Str","c":"Algorithm"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"evaluated"},{"t":"SoftBreak"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"5-fold"},{"t":"Space"},{"t":"Str","c":"crossvalidation."},{"t":"SoftBreak"},{"t":"Str","c":"TODO"},{"t":"SoftBreak"},{"t":"Str","c":"-"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"SoftBreak"},{"t":"Str","c":"-"},{"t":"Space"},{"t":"Str","c":"conclusion"}]}]},"title":{"t":"MetaInlines","c":[{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"comparison"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"random"},{"t":"Space"},{"t":"Str","c":"forest,"},{"t":"Space"},{"t":"Str","c":"support"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"machine,"},{"t":"Space"},{"t":"Str","c":"deep"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"lazar"},{"t":"Space"},{"t":"Str","c":"algorithms"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"predicting"},{"t":"Space"},{"t":"Str","c":"mutagenicity"}]},"cito_cites":{"t":"MetaMap","c":{"citation":{"t":"MetaList","c":[{"t":"MetaString","c":"Kazius2005"},{"t":"MetaString","c":"Hansen2009"},{"t":"MetaString","c":"Yap2011"},{"t":"MetaString","c":"Bender2004"},{"t":"MetaString","c":"OBoyle2011a"},{"t":"MetaString","c":"Yap2011"},{"t":"MetaString","c":"Rücker2007"}]}}},"institute":{"t":"MetaList","c":[{"t":"MetaMap","c":{"address":{"t":"MetaInlines","c":[{"t":"Str","c":"Rastatterstrasse"},{"t":"Space"},{"t":"Str","c":"41,"},{"t":"Space"},{"t":"Str","c":"4057"},{"t":"Space"},{"t":"Str","c":"Basel,"},{"t":"Space"},{"t":"Str","c":"Switzerland"}]},"name":{"t":"MetaInlines","c":[{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"silico"},{"t":"Space"},{"t":"Str","c":"toxicology"},{"t":"Space"},{"t":"Str","c":"gmbh"}]},"id":{"t":"MetaString","c":"ist"}}},{"t":"MetaMap","c":{"address":{"t":"MetaInlines","c":[{"t":"Str","c":"Seeblickstrasse"},{"t":"Space"},{"t":"Str","c":"4,"},{"t":"Space"},{"t":"Str","c":"8590"},{"t":"Space"},{"t":"Str","c":"Romanshorn,"},{"t":"Space"},{"t":"Str","c":"Switzerland"}]},"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Zeller"},{"t":"Space"},{"t":"Str","c":"AG"}]},"id":{"t":"MetaString","c":"zeller"}}}]}}}
diff --git a/paper/outfile.epub b/paper/outfile.epub
new file mode 100644
index 0000000..b64e7d8
--- /dev/null
+++ b/paper/outfile.epub
Binary files differ
diff --git a/paper/outfile.html b/paper/outfile.html
new file mode 100644
index 0000000..d2a84a9
--- /dev/null
+++ b/paper/outfile.html
@@ -0,0 +1,822 @@
+<!doctype html>
+<!--
+Template created by Andrew G. York, based on this theme by Diana Mounter:
+https://github.com/broccolini/dinky, which mentioned that
+attribution is appreciated. Thanks, broccolini! -->
+<html lang="en">
+<head>
+ <base target="_blank"></base>
+ <meta charset="utf-8" />
+ <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+ <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no" />
+ <meta name="keywords" content="mutagenicity, (Q)SAR, lazar, random forest, support vector machine, deep learning" />
+ <title>A comparison of random forest, support vector machine, deep learning and lazar algorithms for predicting mutagenicity</title>
+ <style type="text/css">code{white-space: pre;}</style>
+ <style type="text/css">@font-face {
+font-family: 'Arvo';
+font-style: italic;
+font-weight: 400;
+src: local('Arvo Italic'), local('Arvo-Italic'), url(data:font/ttf;base64,) format('truetype');
+}
+@font-face {
+font-family: 'Arvo';
+font-style: normal;
+font-weight: 400;
+src: local('Arvo'), url(data:font/ttf;base64,) format('truetype');
+}
+@font-face {
+font-family: 'Arvo';
+font-style: normal;
+font-weight: 700;
+src: local('Arvo Bold'), local('Arvo-Bold'), url(data:font/ttf;base64,) format('truetype');
+}
+
+html, body, div, span, applet, object, iframe,
+h1, h2, h3, h4, h5, h6, p, blockquote, pre,
+a, abbr, acronym, address, big, cite, code,
+del, dfn, em, img, ins, kbd, q, s, samp,
+small, strike, strong, sub, sup, tt, var,
+b, u, i, center,
+dl, dt, dd, ol, ul, li,
+fieldset, form, label, legend,
+table, caption, tbody, tfoot, thead, tr, th, td,
+article, aside, canvas, details, embed,
+figure, figcaption, footer, header, hgroup,
+article, menu, nav, output, ruby, section, summary,
+time, mark, audio, video {
+margin: 0;
+padding: 0;
+border: 0;
+font: inherit;
+vertical-align: baseline;
+}
+
+body {
+padding: 10px 50px 0 0;
+font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+font-size: 15px;
+color: #030303;
+background-color: #FCFBF8;
+margin: 0;
+line-height: 1.8em;
+-webkit-font-smoothing: antialiased;
+}
+h1, h2, h3, h4, h5, h6 {
+color: #232323;
+margin: 36px 0 10px;
+}
+p, ul, ol, table, dl {
+margin: 0 0 22px;
+}
+sub, sup {
+font-size: 80%;
+}
+sub {
+vertical-align: sub;
+}
+sup {
+vertical-align: sup;
+}
+.author-list {
+margin:0 0 0px;
+font-weight: 700;
+}
+.author-affiliations {
+margin:0 0 0px;
+font-style: italic;
+}
+.contact_email {
+font-style: italic;
+}
+.abstract {
+margin: 0% 1% 2%;
+font-weight: 700;
+}
+h1, h2, h3 {
+border-bottom: 1px solid #ccc;
+font-family: Arvo, Monaco, serif;
+font-weight: normal;
+line-height: 1.3;
+padding-bottom: 5px;
+}
+h1 {
+font-size: 30px;
+}
+h2 {
+font-size: 24px;
+}
+h3 {
+font-size: 18px;
+}
+h4, h5 {
+font-family: Arvo, Monaco, serif;
+font-weight: 700;
+}
+h6 {
+font-family: Arvo, Monaco, serif;
+font-weight: 200;
+}
+a {
+font-weight:200;
+text-decoration:none;
+}
+a:hover {
+text-decoration: underline;
+}
+a small {
+font-size: 12px;
+}
+em {
+font-style: italic;
+}
+strong {
+font-weight:700;
+}
+sup {
+vertical-align: super;
+font-size: smaller;
+}
+ul {
+list-style-position: inside;
+list-style: disc;
+padding-left: 25px;
+}
+ol {
+list-style-position: inside;
+list-style: decimal;
+padding-left: 25px;
+}
+blockquote {
+margin: 0;
+padding: 0 0 0 20px;
+font-style: italic;
+}
+dl, dt, dd, dl p {
+font-color: #444;
+}
+dl dt {
+font-weight: bold;
+}
+dl dd {
+padding-left: 20px;
+font-style: italic;
+}
+dl p {
+padding-left: 20px;
+font-style: italic;
+}
+hr {
+border: 0;
+background: #ccc;
+height: 1px;
+margin: 0 0 24px;
+}
+
+img {
+position: relative;
+margin: 0 auto;
+height: auto;
+max-width: 100%;
+padding: 0px;
+margin: 0px 0 0px 0;
+border: 0px solid #ccc;
+}
+p img {
+display: inline;
+margin: 0;
+padding: 0;
+vertical-align: middle;
+text-align: center;
+border: none;
+}
+figure {
+border: 1px solid #ccc;
+background: #FFFFFF;
+}
+figcaption {
+font-size: 12px;
+background: #FFFFFF;
+line-height: 150%;
+margin-right: 1%;
+margin-left: 1%;
+}
+
+code, pre {
+font-family: Monaco, "Bitstream Vera Sans Mono", "Lucida Console", Terminal, monospace;
+color: #000;
+background: #e7e7e7;
+font-size: 12px;
+}
+pre {
+padding: 4px 12px;
+border-radius:4px;
+border:1px solid #D7D8C8;
+overflow: auto;
+overflow-y: hidden;
+margin-bottom: 32px;
+}
+
+table {
+width: 100%;
+border: 1px solid #ccc;
+margin-bottom: 32px;
+text-align: left;
+}
+table.figure_controls {
+font-size: 12px;
+line-height: 100%;
+margin-bottom: 0px;
+}
+th {
+background: #232323;
+color: #FDFEFB;
+font-family: 'Arvo', Helvetica, Arial, sans-serif;
+font-size: 18px;
+font-weight: normal;
+padding: 10px;
+}
+td {
+background: #eee;
+padding: 0px;
+}
+
+.wrapper {
+width:960px;
+}
+
+.page-header {
+background-color: #474747;
+border-bottom-right-radius: 4px;
+border-top-right-radius: 4px;
+border: 1px solid #000;
+color: #FDFDFB;
+float: left;
+margin: 30px 25px 0 0;
+padding: 34px 25px 22px 50px;
+position: fixed;
+width: 170px;
+-webkit-font-smoothing: antialiased;
+}
+.subtitle {
+font-size: 16px;
+}
+.page-header h1 {
+font-family: Arvo, sans-serif;
+font-size: 30px;
+font-weight: 300;
+line-height: 1.3em;
+border-bottom: none;
+margin-top: 0;
+}
+.page-header h1,
+.page-header a {
+color: #fff;
+}
+.page-header a {
+text-decoration: underline;
+}
+a.name {
+white-space: nowrap;
+}
+.page-header ul {
+list-style:none;
+padding:0;
+}
+.page-header li {
+list-style-type: none;
+width: 135px;
+height: 15px;
+margin-bottom: 12px;
+line-height: 1em;
+padding: 6px 6px 6px 7px;
+background: #1100AF;
+background: -moz-linear-gradient(top, #1100AF 0%, #110082 100%);
+background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%, #dddddd));
+background: -webkit-linear-gradient(top, #1100AF 0%,#110082 100%);
+background: -o-linear-gradient(top, #1100AF 0%,#110082 100%);
+background: -ms-linear-gradient(top, #1100AF 0%,#110082 100%);
+background: linear-gradient(top, #1100AF 0%,#110082 100%);
+border-radius:4px;
+border:1px solid #0D0D0D;
+-webkit-box-shadow: inset 0px 1px 1px 0 rgba(38,2,233, 1);
+box-shadow: inset 0px 1px 1px 0 rgba(38,2,233, 1);
+}
+.page-header li:hover {
+background: #1D00C3;
+background: -moz-linear-gradient(top, #1D00C3 0%, #190195 100%);
+background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd));
+background: -webkit-linear-gradient(top, #1D00C3 0%,#190195 100%);
+background: -o-linear-gradient(top, #1D00C3 0%,#190195 100%);
+background: -ms-linear-gradient(top, #1D00C3 0%,#190195 100%);
+background: linear-gradient(top, #1D00C3 0%,#190195 100%);
+}
+.buttons {
+-webkit-font-smoothing: antialiased;
+background: url() no-repeat;
+font-weight: normal;
+height: 30px;
+padding: 2px 2px 2px 22px;
+text-shadow: rgba(0, 0, 0, 0.4) 0 -1px 0;
+}
+a.buttons {
+text-decoration: none;
+}
+.buttons.github {
+background: url() no-repeat 1px;
+}
+.buttons.pdf {
+background: url() no-repeat 1px;
+}
+.buttons:hover {
+color: #fff;
+text-decoration: none;
+}
+
+article {
+width: 650px;
+float: right;
+padding-bottom: 50px;
+}
+
+footer {
+width: 170px;
+float: left;
+position: fixed;
+bottom: 10px;
+padding-left: 50px;
+}
+@media print, screen and (max-width: 960px) {
+div.wrapper {
+width: auto;
+margin: 0;
+}
+.page-header, article, footer {
+float: none;
+position: static;
+width: auto;
+}
+footer {
+border-top: 1px solid #ccc;
+margin: 0 84px 0 50px;
+padding: 0;
+}
+.page-header {
+padding-right: 320px;
+}
+article {
+padding: 20px 84px 20px 50px;
+margin: 0 0 20px;
+}
+.page-header a small {
+display: inline;
+}
+.page-header ul {
+position: absolute;
+right: 130px;
+top: 84px;
+}
+}
+@media print, screen and (max-width: 720px) {
+body {
+word-wrap:break-word;
+}
+.page-header {
+padding: 10px 20px 0;
+margin-right: 0;
+}
+article {
+margin: 0 0 30px;
+padding: 10px 0 10px 20px;
+}
+footer {
+margin: 0 0 0 30px;
+}
+.page-header ul, .page-header p.view {
+position: static;
+}
+}
+@media print, screen and (max-width: 480px) {
+.page-header ul li.download {
+display: none;
+}
+footer {
+margin: 0 0 0 20px;
+}
+footer a {
+display:block;
+}
+}
+@media print {
+body {
+padding:0.4in;
+font-size:12pt;
+color:#444;
+}
+}
+.onlyprint {display: none;}
+@media print {
+.onlyprint {display: block;}
+}
+</style>
+ <script data-external="1" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js?config=TeX-AMS_CHTML-full"></script>
+ <!--[if lt IE 9]>
+ <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
+ <![endif]-->
+</head>
+<body>
+<div class="wrapper">
+<article typeof="ScholarlyArticle" vocab="http://schema.org/">
+ <!-- <header class="article-header"> -->
+<h1 property="headline">A comparison of random forest, support vector machine, deep learning and lazar algorithms for predicting mutagenicity</h1>
+<!-- </header> -->
+<p class="author-list">
+ <span property="author" typeof="Person">
+ Christoph Helma</span><sup><a href="mailto:helma@in-silico.ch">✉</a> 1</sup>,
+ <span property="author" typeof="Person">
+ Verena Schöning</span><sup>2</sup>,
+ <span property="author" typeof="Person">
+ Philipp Boss</span><sup>2</sup>,
+ <span property="author" typeof="Person">
+ Jürgen Drewe</span><sup>2</sup></p>
+<div class="author_affiliations">
+ <div class="affiliation"><sup>1</sup>in silico toxicology gmbh, Rastatterstrasse 41, 4057 Basel, Switzerland
+ </div>
+ <div class="affiliation"><sup>2</sup>Zeller AG, Seeblickstrasse 4, 8590 Romanshorn, Switzerland
+ </div>
+</div>
+<div class="author-info">
+ <div class="author-correspondence">
+ Correspondence: Christoph Helma <a href="mailto:helma@in-silico.ch">&lt;helma@in-silico.ch&gt;</a>
+ </div>
+ </div>
+<p class="abstract" property="description"><p>k-nearest neighbor (<code>lazar</code>), random forest, support vector machine and deep learning algorithms were applied to a new <em>Salmonella</em> mutagenicity dataset with 8281 unique chemical structures. Algorithm performance was evaluated using 5-fold crossvalidation. TODO - results - conclusion</p></p>
+
+
+
+<div property="articleBody" class="article-body">
+<h1 id="introduction">Introduction</h1>
+<p>TODO: algo history</p>
+<p>TODO: dataset history</p>
+<p>TODO: open problems</p>
+<h1 id="materials-and-methods">Materials and Methods</h1>
+<h2 id="mutagenicity-data">Mutagenicity data</h2>
+<p>For all methods, the same training dataset was used. The training dataset was compiled from the following sources:</p>
+<ul>
+<li><p>Kazius/Bursi Dataset (4337 compounds, <span class="citation" data-cites="Kazius2005">Kazius, McGuire, and Bursi (2005)</span>): <a href="http://cheminformatics.org/datasets/bursi/cas_4337.zip" class="uri">http://cheminformatics.org/datasets/bursi/cas_4337.zip</a></p></li>
+<li><p>Hansen Dataset (6513 compounds, <span class="citation" data-cites="Hansen2009">Hansen et al. (2009)</span>): <a href="http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv" class="uri">http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv</a></p></li>
+<li><p>EFSA Dataset (695 compounds): <a href="https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls" class="uri">https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls</a></p></li>
+</ul>
+<p>Mutagenicity classifications from Kazius and Hansen datasets were used without further processing. To achieve consistency between these datasets, EFSA compounds were classified as mutagenic, if at least one positive result was found for TA98 or T100 Salmonella strains.</p>
+<p>Dataset merges were based on unique SMILES (<em>Simplified Molecular Input Line Entry Specification</em>) strings of the compound structures. Duplicated experimental data with the same outcome was merged into a single value, because it is likely that it originated from the same experiment. Contradictory results were kept as multiple measurements in the database. The combined training dataset contains 8281 unique structures.</p>
+<p>Source code for all data download, extraction and merge operations is publicly available from the git repository <a href="https://git.in-silico.ch/pyrrolizidine" class="uri">https://git.in-silico.ch/pyrrolizidine</a> under a GPL3 License.</p>
+<p>TODO: check/fix git repo</p>
+<p>For the Random Forest (RF), Support Vector Machines (SVM), and Deep Learning (DL) models, molecular descriptors were calculated with the PaDEL-Descriptors program (<a href="http://www.yapcwsoft.com" class="uri">http://www.yapcwsoft.com</a> version 2.21, <span class="citation" data-cites="Yap2011">Yap (2011)</span>).</p>
+<p>TODO: sentence ??</p>
+<p>From these descriptors were chosen, which were actually used for the generation of the DL model.</p>
+<h2 id="algorithms">Algorithms</h2>
+<h3 id="lazar"><code>lazar</code></h3>
+<p><code>lazar</code> (<em>lazy structure activity relationships</em>) is a modular framework for read-across model development and validation. It follows the following basic workflow: For a given chemical structure <code>lazar</code>:</p>
+<ul>
+<li><p>searches in a database for similar structures (neighbours) with experimental data,</p></li>
+<li><p>builds a local QSAR model with these neighbours and</p></li>
+<li><p>uses this model to predict the unknown activity of the query compound.</p></li>
+</ul>
+<p>This procedure resembles an automated version of read across predictions in toxicology, in machine learning terms it would be classified as a k-nearest-neighbour algorithm.</p>
+<p>Apart from this basic workflow, <code>lazar</code> is completely modular and allows the researcher to use any algorithm for similarity searches and local QSAR (<em>Quantitative structure–activity relationship</em>) modelling. Algorithms used within this study are described in the following sections.</p>
+<h4 id="neighbour-identification">Neighbour identification</h4>
+<p>Similarity calculations were based on MolPrint2D fingerprints (<span class="citation" data-cites="Bender2004">Bender et al. (2004)</span>) from the OpenBabel cheminformatics library (<span class="citation" data-cites="OBoyle2011a">O’Boyle et al. (2011)</span>). The MolPrint2D fingerprint uses atom environments as molecular representation, which resembles basically the chemical concept of functional groups. For each atom in a molecule, it represents the chemical environment using the atom types of connected atoms.</p>
+<p>MolPrint2D fingerprints are generated dynamically from chemical structures and do not rely on predefined lists of fragments (such as OpenBabel FP3, FP4 or MACCs fingerprints or lists of toxicophores/toxicophobes). This has the advantage that they may capture substructures of toxicological relevance that are not included in other fingerprints.</p>
+<p>From MolPrint2D fingerprints a feature vector with all atom environments of a compound can be constructed that can be used to calculate chemical similarities.</p>
+<p>The chemical similarity between two compounds a and b is expressed as the proportion between atom environments common in both structures A ∩ B and the total number of atom environments A U B (Jaccard/Tanimoto index).</p>
+<p><span class="math display">\[sim = \frac{\left| A\ \cap B \right|}{\left| A\ \cup B \right|}\]</span></p>
+<p>Threshold selection is a trade-off between prediction accuracy (high threshold) and the number of predictable compounds (low threshold). As it is in many practical cases desirable to make predictions even in the absence of closely related neighbours, we follow a tiered approach:</p>
+<ul>
+<li><p>First a similarity threshold of 0.5 is used to collect neighbours, to create a local QSAR model and to make a prediction for the query compound.</p></li>
+<li><p>If any of these steps fails, the procedure is repeated with a similarity threshold of 0.2 and the prediction is flagged with a warning that it might be out of the applicability domain of the training data.</p></li>
+<li><p>Similarity thresholds of 0.5 and 0.2 are the default values chosen &gt; by the software developers and remained unchanged during the &gt; course of these experiments.</p></li>
+</ul>
+<p>Compounds with the same structure as the query structure are automatically eliminated from neighbours to obtain unbiased predictions in the presence of duplicates.</p>
+<h4 id="local-qsar-models-and-predictions">Local QSAR models and predictions</h4>
+<p>Only similar compounds (neighbours) above the threshold are used for local QSAR models. In this investigation, we are using a weighted majority vote from the neighbour’s experimental data for mutagenicity classifications. Probabilities for both classes (mutagenic/non-mutagenic) are calculated according to the following formula and the class with the higher probability is used as prediction outcome.</p>
+<p><span class="math display">\[p_{c} = \ \frac{\sum_{}^{}\text{sim}_{n,c}}{\sum_{}^{}\text{sim}_{n}}\]</span></p>
+<p><span class="math inline">\(p_{c}\)</span> Probability of class c (e.g. mutagenic or non-mutagenic)<br />
+<span class="math inline">\(\sum_{}^{}\text{sim}_{n,c}\)</span> Sum of similarities of neighbours with class c<br />
+<span class="math inline">\(\sum_{}^{}\text{sim}_{n}\)</span> Sum of all neighbours</p>
+<h4 id="applicability-domain">Applicability domain</h4>
+<p>The applicability domain (AD) of <code>lazar</code> models is determined by the structural diversity of the training data. If no similar compounds are found in the training data no predictions will be generated. Warnings are issued if the similarity threshold had to be lowered from 0.5 to 0.2 in order to enable predictions. Predictions without warnings can be considered as close to the applicability domain and predictions with warnings as more distant from the applicability domain. Quantitative applicability domain information can be obtained from the similarities of individual neighbours.</p>
+<h4 id="availability">Availability</h4>
+<ul>
+<li><p><code>lazar</code> experiments for this manuscript: <a href="https://git.in-silico.ch/pyrrolizidine" class="uri">https://git.in-silico.ch/pyrrolizidine</a> (source code, GPL3)</p></li>
+<li><p><code>lazar</code> framework: <a href="https://git.in-silico.ch/lazar" class="uri">https://git.in-silico.ch/lazar</a> (source code, GPL3)</p></li>
+<li><p><code>lazar</code> GUI: <a href="https://git.in-silico.ch/lazar-gui" class="uri">https://git.in-silico.ch/lazar-gui</a> (source code, GPL3)</p></li>
+<li><p>Public web interface: <a href="https://lazar.in-silico.ch" class="uri">https://lazar.in-silico.ch</a></p></li>
+</ul>
+<h3 id="random-forest-support-vector-machines-and-deep-learning-in-r-project">Random Forest, Support Vector Machines, and Deep Learning in R-project</h3>
+<p>In comparison to <code>lazar</code>, three other models (Random Forest (RF), Support Vector Machines (SVM), and Deep Learning (DL)) were evaluated.</p>
+<p>For the generation of these models, molecular 1D and 2D descriptors of the training dataset were calculated using PaDEL-Descriptors (<a href="http://www.yapcwsoft.com" class="uri">http://www.yapcwsoft.com</a> version 2.21, <span class="citation" data-cites="Yap2011">Yap (2011)</span>).</p>
+<p>As the training dataset contained over 8280 instances, it was decided to delete instances with missing values during data pre-processing. Furthermore, substances with equivocal outcome were removed. The final training dataset contained 8080 instances with known mutagenic potential. The RF, SVM, and DL models were generated using the R software (R-project for Statistical Computing, <a href="https://www.r-project.org/" class="uri">https://www.r-project.org/</a><em>;</em> version 3.3.1), specific R packages used are identified for each step in the description below. During feature selection, descriptor with near zero variance were removed using ‘<em>NearZeroVar</em>’-function (package ‘caret’). If the percentage of the most common value was more than 90% or when the frequency ratio of the most common value to the second most common value was greater than 95:5 (e.g. 95 instances of the most common value and only 5 or less instances of the second most common value), a descriptor was classified as having a near zero variance. After that, highly correlated descriptors were removed using the ‘<em>findCorrelation</em>’-function (package ‘caret’) with a cut-off of 0.9. This resulted in a training dataset with 516 descriptors. These descriptors were scaled to be in the range between 0 and 1 using the ‘<em>preProcess</em>’-function (package ‘caret’). The scaling routine was saved in order to apply the same scaling on the testing dataset. As these three steps did not consider the outcome, it was decided that they do not need to be included in the cross-validation of the model. To further reduce the number of features, a LASSO (<em>least absolute shrinkage and selection operator</em>) regression was performed using the ‘<em>glmnet</em>’-function (package ‘<em>glmnet</em>’). The reduced dataset was used for the generation of the pre-trained models.</p>
+<p>For the RF model, the ‘<em>randomForest</em>’-function (package ‘<em>randomForest</em>’) was used. A forest with 1000 trees with maximal terminal nodes of 200 was grown for the prediction.</p>
+<p>The ‘<em>svm</em>’-function (package ‘e1071’) with a <em>radial basis function kernel</em> was used for the SVM model.</p>
+<p>The DL model was generated using the ‘<em>h2o.deeplearning</em>’-function (package ‘<em>h2o</em>’). The DL contained four hidden layer with 70, 50, 50, and 10 neurons, respectively. Other hyperparameter were set as follows: l1=1.0E-7, l2=1.0E-11, epsilon = 1.0E-10, rho = 0.8, and quantile_alpha = 0.5. For all other hyperparameter, the default values were used. Weights and biases were in a first step determined with an unsupervised DL model. These values were then used for the actual, supervised DL model.</p>
+<p>To validate these models, an internal cross-validation approach was chosen. The training dataset was randomly split in training data, which contained 95% of the data, and validation data, which contain 5% of the data. A feature selection with LASSO on the training data was performed, reducing the number of descriptors to approximately 100. This step was repeated five times. Based on each of the five different training data, the predictive models were trained and the performance tested with the validation data. This step was repeated 10 times. Furthermore, a y-randomisation using the RF model was performed. During y-randomisation, the outcome (y-variable) is randomly permuted. The theory is that after randomisation of the outcome, the model should not be able to correlate the outcome to the properties (descriptor values) of the substances. The performance of the model should therefore indicate a by change prediction with an accuracy of about 50%. If this is true, it can be concluded that correlation between actual outcome and properties of the substances is real and not by chance (<span class="citation" data-cites="Rücker2007">Rücker, Rücker, and Meringer (2007)</span>).</p>
+<p><img src="" style="width:6.26875in;height:5.48611in" /></p>
+<p>Figure 1: Flowchart of the generation and validation of the models generated in R-project</p>
+<h4 id="applicability-domain-1">Applicability domain</h4>
+<p>The AD of the training dataset and the PA dataset was evaluated using the Jaccard distance. A Jaccard distance of ‘0’ indicates that the substances are similar, whereas a value of ‘1’ shows that the substances are different. The Jaccard distance was below 0.2 for all PAs relative to the training dataset. Therefore, PA dataset is within the AD of the training dataset and the models can be used to predict the genotoxic potential of the PA dataset.</p>
+<h4 id="y-randomisation">y-randomisation</h4>
+<p>After y-randomisation of the outcome, the accuracy and CCR are around 50%, indicating a chance in the distribution of the results. This shows, that the outcome is actually related to the predictors and not by chance.</p>
+<h3 id="deep-learning-in-tensorflow">Deep Learning in TensorFlow</h3>
+<p>Alternatively, a DL model was established with Python-based TensorFlow program (<a href="https://www.tensorflow.org/" class="uri">https://www.tensorflow.org/</a>) using the high-level API Keras (<a href="https://www.tensorflow.org/guide/keras" class="uri">https://www.tensorflow.org/guide/keras</a>) to build the models.</p>
+<p>Data pre-processing was done by rank transformation using the ‘<em>QuantileTransformer</em>’ procedure. A sequential model has been used. Four layers have been used: input layer, two hidden layers (with 12, 8 and 8 nodes, respectively) and one output layer. For the output layer, a sigmoidal activation function and for all other layers the ReLU (‘<em>Rectified Linear Unit</em>’) activation function was used. Additionally, a L<sup>2</sup>-penalty of 0.001 was used for the input layer. For training of the model, the ADAM algorithm was used to minimise the cross-entropy loss using the default parameters of Keras. Training was performed for 100 epochs with a batch size of 64. The model was implemented with Python 3.6 and Keras. For training of the model, a 6-fold cross-validation was used. Accuracy was estimated by ROC-AUC and confusion matrix.</p>
+<h2 id="validation">Validation</h2>
+<h1 id="results">Results</h1>
+<h2 id="lazar-1"><code>lazar</code></h2>
+<h2 id="random-forest">Random Forest</h2>
+<p>The validation showed that the RF model has an accuracy of 64%, a sensitivity of 66% and a specificity of 63%. The confusion matrix of the model, calculated for 8080 instances, is provided in Table 1.</p>
+<p>Table 1: Confusion matrix of the RF model</p>
+<table>
+<thead>
+<tr class="header">
+<th></th>
+<th style="text-align: left;">Predicted genotoxicity</th>
+<th></th>
+<th></th>
+<th></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Measured genotoxicity</td>
+<td style="text-align: left;"></td>
+<td><strong><em>PP</em></strong></td>
+<td><strong><em>PN</em></strong></td>
+<td><strong><em>Total</em></strong></td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>TP</em></strong></td>
+<td>2274</td>
+<td>1163</td>
+<td>3437</td>
+</tr>
+<tr class="odd">
+<td></td>
+<td style="text-align: left;"><strong><em>TN</em></strong></td>
+<td>1736</td>
+<td>2907</td>
+<td>4643</td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>Total</em></strong></td>
+<td>4010</td>
+<td>4070</td>
+<td>8080</td>
+</tr>
+</tbody>
+</table>
+<p>PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: True negative</p>
+<h2 id="support-vector-machines">Support Vector Machines</h2>
+<p>The validation showed that the SVM model has an accuracy of 62%, a sensitivity of 65% and a specificity of 60%. The confusion matrix of SVM model, calculated for 8080 instances, is provided in Table 2.</p>
+<p>Table 2: Confusion matrix of the SVM model</p>
+<table>
+<thead>
+<tr class="header">
+<th></th>
+<th style="text-align: left;">Predicted genotoxicity</th>
+<th></th>
+<th></th>
+<th></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Measured genotoxicity</td>
+<td style="text-align: left;"></td>
+<td><strong><em>PP</em></strong></td>
+<td><strong><em>PN</em></strong></td>
+<td><strong><em>Total</em></strong></td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>TP</em></strong></td>
+<td>2057</td>
+<td>1107</td>
+<td>3164</td>
+</tr>
+<tr class="odd">
+<td></td>
+<td style="text-align: left;"><strong><em>TN</em></strong></td>
+<td>1953</td>
+<td>2963</td>
+<td>4916</td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>Total</em></strong></td>
+<td>4010</td>
+<td>4070</td>
+<td>8080</td>
+</tr>
+</tbody>
+</table>
+<p>PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: True negative</p>
+<h2 id="deep-learning-r-project">Deep Learning (R-project)</h2>
+<p>The validation showed that the DL model generated in R has an accuracy of 59%, a sensitivity of 89% and a specificity of 30%. The confusion matrix of the model, normalised to 8080 instances, is provided in Table 3.</p>
+<p>Table 3: Confusion matrix of the DL model (R-project)</p>
+<table>
+<thead>
+<tr class="header">
+<th></th>
+<th style="text-align: left;">Predicted genotoxicity</th>
+<th></th>
+<th></th>
+<th></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Measured genotoxicity</td>
+<td style="text-align: left;"></td>
+<td><strong><em>PP</em></strong></td>
+<td><strong><em>PN</em></strong></td>
+<td><strong><em>Total</em></strong></td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>TP</em></strong></td>
+<td>3575</td>
+<td>435</td>
+<td>4010</td>
+</tr>
+<tr class="odd">
+<td></td>
+<td style="text-align: left;"><strong><em>TN</em></strong></td>
+<td>2853</td>
+<td>1217</td>
+<td>4070</td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>Total</em></strong></td>
+<td>6428</td>
+<td>1652</td>
+<td>8080</td>
+</tr>
+</tbody>
+</table>
+<p>PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: True negative</p>
+<h2 id="dl-model-tensorflow">DL model (TensorFlow)</h2>
+<p>The validation showed that the DL model generated in TensorFlow has an accuracy of 68%, a sensitivity of 70% and a specificity of 46%. The confusion matrix of the model, normalised to 8080 instances, is provided in Table 4.</p>
+<p>Table 4: Confusion matrix of the DL model (TensorFlow)</p>
+<table>
+<thead>
+<tr class="header">
+<th></th>
+<th style="text-align: left;">Predicted genotoxicity</th>
+<th></th>
+<th></th>
+<th></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Measured genotoxicity</td>
+<td style="text-align: left;"></td>
+<td><strong><em>PP</em></strong></td>
+<td><strong><em>PN</em></strong></td>
+<td><strong><em>Total</em></strong></td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>TP</em></strong></td>
+<td>2851</td>
+<td>1227</td>
+<td>4078</td>
+</tr>
+<tr class="odd">
+<td></td>
+<td style="text-align: left;"><strong><em>TN</em></strong></td>
+<td>1825</td>
+<td>2177</td>
+<td>4002</td>
+</tr>
+<tr class="even">
+<td></td>
+<td style="text-align: left;"><strong><em>Total</em></strong></td>
+<td>4676</td>
+<td>3404</td>
+<td>8080</td>
+</tr>
+</tbody>
+</table>
+<p>PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: True negative</p>
+<p>The ROC curves from the 6-fold validation are shown in Figure 7.</p>
+<p><img src="" style="width:3.825in;height:2.7327in" /></p>
+<p>Figure 7: Six-fold cross-validation of TensorFlow DL model show an average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68%.</p>
+<p>In summary, the validation results of the four methods are presented in the following table.</p>
+<p>Table 5 Results of the cross-validation of the four models and after y-randomisation</p>
+<table style="width:97%;">
+<colgroup>
+<col style="width: 32%"></col>
+<col style="width: 15%"></col>
+<col style="width: 10%"></col>
+<col style="width: 19%"></col>
+<col style="width: 19%"></col>
+</colgroup>
+<thead>
+<tr class="header">
+<th></th>
+<th style="text-align: left;">Accuracy</th>
+<th style="text-align: left;">CCR</th>
+<th style="text-align: left;">Sensitivity</th>
+<th style="text-align: left;">Specificity</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>RF model</td>
+<td style="text-align: left;">64.1%</td>
+<td style="text-align: left;">64.4%</td>
+<td style="text-align: left;">66.2%</td>
+<td style="text-align: left;">62.6%</td>
+</tr>
+<tr class="even">
+<td>SVM model</td>
+<td style="text-align: left;">62.1%</td>
+<td style="text-align: left;">62.6%</td>
+<td style="text-align: left;">65.0%</td>
+<td style="text-align: left;">60.3%</td>
+</tr>
+<tr class="odd">
+<td>DL model<br />
+(R-project)</td>
+<td style="text-align: left;">59.3%</td>
+<td style="text-align: left;">59.5%</td>
+<td style="text-align: left;">89.2%</td>
+<td style="text-align: left;">29.9%</td>
+</tr>
+<tr class="even">
+<td>DL model (TensorFlow)</td>
+<td style="text-align: left;">68%</td>
+<td style="text-align: left;">62.2%</td>
+<td style="text-align: left;">69.9%</td>
+<td style="text-align: left;">45.6%</td>
+</tr>
+<tr class="odd">
+<td>y-randomisation</td>
+<td style="text-align: left;">50.5%</td>
+<td style="text-align: left;">50.4%</td>
+<td style="text-align: left;">50.3%</td>
+<td style="text-align: left;">50.6%</td>
+</tr>
+</tbody>
+</table>
+<p>CCR (correct classification rate)</p>
+<h1 id="discussion">Discussion</h1>
+<p>General model performance</p>
+<p>Based on the results of the cross-validation for all models, <code>lazar</code>, RF, SVM, DL (R-project) and DL (TensorFlow) it can be state that the prediction results are not optimal due to different reasons. The accuracy as measured during cross-validation of the four models (RF, SVM, DL (R-project and TensorFlow)) was partly low with CCR values between 59.3 and 68%, with the R-generated DL model and the TensorFlow-generated DL model showing the worst and the best performance, respectively. The validation of the R-generated DL model revealed a high sensitivity (89.2%) but an unacceptably low specificity of 29.9% indicating a high number of false positive estimates. The TensorFlow-generated DL model, however, showed an acceptable but not optimal accuracy of 68%, a sensitivity of 69.9% and a specificity of 45.6%. The low specificity indicates that both DL models tends to predict too many instances as positive (genotoxic), and therefore have a high false positive rate. This allows at least with the TensorFlow generated DL model to make group statements, but the confidence for estimations of single PAs appears to be insufficiently low.</p>
+<p>Several factors have likely contributed to the low to moderate performance of the used methods as shown during the cross-validation:</p>
+<ol type="1">
+<li>The outcome in the training dataset was based on the results of AMES tests for genotoxicity <a href="#_ENREF_63">ICH 2011</a>(), an <em>in vitro</em> test in different strains of the bacteria <em>Salmonella typhimurium</em>. In this test, mutagenicity is evaluated with and without prior metabolic activation of the test substance. Metabolic activation could result in the formation of genotoxic metabolites from non-genotoxic parent compounds. However, no distinction was made in the training dataset between substances that needed metabolic activation before being mutagenic and those that were mutagenic without metabolic activation. <code>lazar</code> is able to handle this ‘inaccuracy’ in the training dataset well due to the way the algorithm works: <code>lazar</code> predicts the genotoxic potential based on the neighbours of substances with comparable structural features, considering mutagenic and not mutagenic neighbours. Based on the structural similarity, a probability for mutagenicity and no mutagenicity is calculated independently from each other (meaning that the sum of probabilities does not necessarily adds up to 100%). The class with the higher outcome is then the overall outcome for the substance.</li>
+</ol>
+<blockquote>
+<p>In contrast, the other models need to be trained first to recognise the structural features that are responsible for genotoxicity. Therefore, the mixture of substances being mutagenic with and without metabolic activation in the training dataset may have adversely affected the ability to separate the dataset in two distinct classes and thus explains the relatively low performance of these models.</p>
+</blockquote>
+<ol start="2" type="1">
+<li>Machine learning algorithms try to find an optimized solution in a high-dimensional (one dimension per each predictor) space. Sometimes these methods do not find the global optimum of estimates but only local (not optimal) solutions. Strategies to find the global solutions are systematic variation (grid search) of the hyperparameters of the methods, which may be very time consuming in particular in large datasets.</li>
+</ol>
+<h1 id="conclusions">Conclusions</h1>
+<p>In this study, an attempt was made to predict the genotoxic potential of PAs using five different machine learning techniques (<code>lazar</code>, RF, SVM, DL (R-project and TensorFlow). The results of all models fitted only partly to the findings in literature, with best results obtained with the TensorFlow DL model. Therefore, modelling allows statements on the relative risks of genotoxicity of the different PA groups. Individual predictions for selective PAs appear, however, not reliable on the current basis of the used training dataset.</p>
+<p>This study emphasises the importance of critical assessment of predictions by QSAR models. This includes not only extensive literature research to assess the plausibility of the predictions, but also a good knowledge of the metabolism of the test substances and understanding for possible mechanisms of toxicity.</p>
+<p>In further studies, additional machine learning techniques or a modified (extended) training dataset should be used for an additional attempt to predict the genotoxic potential of PAs.</p>
+<h1 id="references" class="unnumbered">References</h1>
+<div id="refs" class="references" role="doc-bibliography">
+<div id="ref-Bender2004">
+<p>Bender, Andreas, Hamse Y. Mussa, Robert C. Glen, and Stephan Reiling. 2004. “Molecular Similarity Searching Using Atom Environments, Information-Based Feature Selection, and a Naïve Bayesian Classifier.” <em>Journal of Chemical Information and Computer Sciences</em> 44 (1): 170–78. <a href="https://doi.org/10.1021/ci034207y">https://doi.org/10.1021/ci034207y</a>.</p>
+</div>
+<div id="ref-Hansen2009">
+<p>Hansen, Katja, Sebastian Mika, Timon Schroeter, Andreas Sutter, Antonius ter Laak, Thomas Steger-Hartmann, Nikolaus Heinrich, and Klaus-Robert Müller. 2009. “Benchmark Data Set for in Silico Prediction of Ames Mutagenicity.” <em>Journal of Chemical Information and Modeling</em> 49 (9): 2077–81. <a href="https://doi.org/10.1021/ci900161g">https://doi.org/10.1021/ci900161g</a>.</p>
+</div>
+<div id="ref-Kazius2005">
+<p>Kazius, J., R. McGuire, and R. Bursi. 2005. “Derivation and Validation of Toxicophores for Mutagenicity Prediction.” <em>J Med Chem</em>, no. 48: 312–20.</p>
+</div>
+<div id="ref-OBoyle2011a">
+<p>O’Boyle, Noel, Michael Banck, Craig James, Chris Morley, Tim Vandermeersch, and Geoffrey Hutchison. 2011. “Open Babel: An open chemical toolbox.” <em>J. Cheminf.</em> 3 (1): 33. <a href="https://doi.org/doi:10.1186/1758-2946-3-33">https://doi.org/doi:10.1186/1758-2946-3-33</a>.</p>
+</div>
+<div id="ref-Rücker2007">
+<p>Rücker, C, G Rücker, and M. Meringer. 2007. “Y-Randomization and Its Variants in Qspr/Qsar.” <em>J. Chem. Inf. Model.</em>, no. 47: 2345–57.</p>
+</div>
+<div id="ref-Yap2011">
+<p>Yap, CW. 2011. “PaDEL-Descriptor: An Open Source Software to Calculate Molecular Descriptors and Fingerprints.” <em>Journal of Computational Chemistry</em>, no. 32: 1466–74.</p>
+</div>
+</div>
+</div>
+</article>
+<footer>
+ <p><small>Generated using <a href="https://github.com/pandoc-scholar/pandoc-scholar">pandoc scholar</a></small></p>
+</footer>
+</div>
+ <!--[if !IE]><script>fixScale(document);</script><![endif]-->
+</body>
+</html>
diff --git a/paper/outfile.latex b/paper/outfile.latex
new file mode 100644
index 0000000..9af84b1
--- /dev/null
+++ b/paper/outfile.latex
@@ -0,0 +1,779 @@
+\documentclass[]{scrartcl}
+\usepackage{lmodern}
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\usepackage{fixltx2e} % provides \textsubscript
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[T1]{fontenc}
+ \usepackage[utf8]{inputenc}
+\else % if luatex or xelatex
+ \ifxetex
+ \usepackage{mathspec}
+ \else
+ \usepackage{fontspec}
+ \fi
+ \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
+\fi
+% use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+% use microtype if available
+\IfFileExists{microtype.sty}{%
+\usepackage{microtype}
+\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+\usepackage[unicode=true]{hyperref}
+\hypersetup{
+ pdftitle={A comparison of random forest, support vector machine, deep learning and lazar algorithms for predicting mutagenicity},
+ pdfkeywords={mutagenicity, (Q)SAR, lazar, random forest, support vector machine, deep
+learning},
+ pdfborder={0 0 0},
+ breaklinks=true}
+\urlstyle{same} % don't use monospace font for urls
+\usepackage{longtable,booktabs}
+\usepackage{graphicx,grffile}
+\makeatletter
+\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
+\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
+\makeatother
+% Scale images if necessary, so that they will not overflow the page
+% margins by default, and it is still possible to overwrite the defaults
+% using explicit options in \includegraphics[width, height, ...]{}
+\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
+\IfFileExists{parskip.sty}{%
+\usepackage{parskip}
+}{% else
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{6pt plus 2pt minus 1pt}
+}
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+\providecommand{\tightlist}{%
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+\setcounter{secnumdepth}{0}
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+\let\oldparagraph\paragraph
+\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+\let\oldsubparagraph\subparagraph
+\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+
+\title{A comparison of random forest, support vector machine, deep learning and
+lazar algorithms for predicting mutagenicity}
+\usepackage{authblk}
+\author[%
+ 1%
+ ]{%
+ Christoph Helma%
+ %
+ \textsuperscript{*\,}%
+ %%
+ %
+}
+\author[%
+ 2%
+ ]{%
+ Verena Schöning%
+ %
+ %
+}
+\author[%
+ 2%
+ ]{%
+ Philipp Boss%
+ %
+ %
+}
+\author[%
+ 2%
+ ]{%
+ Jürgen Drewe%
+ %
+ %
+}
+\affil[1]{\normalsize in silico toxicology gmbh, \footnotesize Rastatterstrasse 41, 4057 Basel, Switzerland}
+\affil[2]{\normalsize Zeller AG, \footnotesize Seeblickstrasse 4, 8590 Romanshorn, Switzerland}
+\date{}
+
+\makeatletter
+\def\@maketitle{%
+ \newpage \null \vskip 2em
+ \begin {center}%
+ \let \footnote \thanks
+ {\LARGE \@title \par}%
+ \vskip 1.5em%
+ {\large \lineskip .5em%
+ \begin {tabular}[t]{c}%
+ \@author
+ \end {tabular}\par}%
+ \vskip 0.2em{\textsuperscript{*}\,Correspondence:
+ Christoph Helma <helma@in-silico.ch>\\
+ }%
+ % \vskip 1em{\large \@date}%
+ \end {center}%
+ \par
+ \vskip 1.5em}
+\makeatother
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+k-nearest neighbor (\texttt{lazar}), random forest, support vector
+machine and deep learning algorithms were applied to a new
+\emph{Salmonella} mutagenicity dataset with 8281 unique chemical
+structures. Algorithm performance was evaluated using 5-fold
+crossvalidation. TODO - results - conclusion
+\end{abstract}
+
+\hypertarget{introduction}{%
+\section{Introduction}\label{introduction}}
+
+TODO: algo history
+
+TODO: dataset history
+
+TODO: open problems
+
+\hypertarget{materials-and-methods}{%
+\section{Materials and Methods}\label{materials-and-methods}}
+
+\hypertarget{mutagenicity-data}{%
+\subsection{Mutagenicity data}\label{mutagenicity-data}}
+
+For all methods, the same training dataset was used. The training
+dataset was compiled from the following sources:
+
+\begin{itemize}
+\item
+ Kazius/Bursi Dataset (4337 compounds, Kazius, McGuire, and Bursi
+ (2005)): \url{http://cheminformatics.org/datasets/bursi/cas_4337.zip}
+\item
+ Hansen Dataset (6513 compounds, Hansen et al. (2009)):
+ \url{http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv}
+\item
+ EFSA Dataset (695 compounds):
+ \url{https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX\%20data\%20and\%20dictionary.xls}
+\end{itemize}
+
+Mutagenicity classifications from Kazius and Hansen datasets were used
+without further processing. To achieve consistency between these
+datasets, EFSA compounds were classified as mutagenic, if at least one
+positive result was found for TA98 or T100 Salmonella strains.
+
+Dataset merges were based on unique SMILES (\emph{Simplified Molecular
+Input Line Entry Specification}) strings of the compound structures.
+Duplicated experimental data with the same outcome was merged into a
+single value, because it is likely that it originated from the same
+experiment. Contradictory results were kept as multiple measurements in
+the database. The combined training dataset contains 8281 unique
+structures.
+
+Source code for all data download, extraction and merge operations is
+publicly available from the git repository
+\url{https://git.in-silico.ch/pyrrolizidine} under a GPL3 License.
+
+TODO: check/fix git repo
+
+For the Random Forest (RF), Support Vector Machines (SVM), and Deep
+Learning (DL) models, molecular descriptors were calculated with the
+PaDEL-Descriptors program (\url{http://www.yapcwsoft.com} version 2.21,
+Yap (2011)).
+
+TODO: sentence ??
+
+From these descriptors were chosen, which were actually used for the
+generation of the DL model.
+
+\hypertarget{algorithms}{%
+\subsection{Algorithms}\label{algorithms}}
+
+\hypertarget{lazar}{%
+\subsubsection{\texorpdfstring{\texttt{lazar}}{lazar}}\label{lazar}}
+
+\texttt{lazar} (\emph{lazy structure activity relationships}) is a
+modular framework for read-across model development and validation. It
+follows the following basic workflow: For a given chemical structure
+\texttt{lazar}:
+
+\begin{itemize}
+\item
+ searches in a database for similar structures (neighbours) with
+ experimental data,
+\item
+ builds a local QSAR model with these neighbours and
+\item
+ uses this model to predict the unknown activity of the query compound.
+\end{itemize}
+
+This procedure resembles an automated version of read across predictions
+in toxicology, in machine learning terms it would be classified as a
+k-nearest-neighbour algorithm.
+
+Apart from this basic workflow, \texttt{lazar} is completely modular and
+allows the researcher to use any algorithm for similarity searches and
+local QSAR (\emph{Quantitative structure--activity relationship})
+modelling. Algorithms used within this study are described in the
+following sections.
+
+\hypertarget{neighbour-identification}{%
+\paragraph{Neighbour identification}\label{neighbour-identification}}
+
+Similarity calculations were based on MolPrint2D fingerprints (Bender et
+al. (2004)) from the OpenBabel cheminformatics library (O'Boyle et al.
+(2011)). The MolPrint2D fingerprint uses atom environments as molecular
+representation, which resembles basically the chemical concept of
+functional groups. For each atom in a molecule, it represents the
+chemical environment using the atom types of connected atoms.
+
+MolPrint2D fingerprints are generated dynamically from chemical
+structures and do not rely on predefined lists of fragments (such as
+OpenBabel FP3, FP4 or MACCs fingerprints or lists of
+toxicophores/toxicophobes). This has the advantage that they may capture
+substructures of toxicological relevance that are not included in other
+fingerprints.
+
+From MolPrint2D fingerprints a feature vector with all atom environments
+of a compound can be constructed that can be used to calculate chemical
+similarities.
+
+The chemical similarity between two compounds a and b is expressed as
+the proportion between atom environments common in both structures A ∩ B
+and the total number of atom environments A U B (Jaccard/Tanimoto
+index).
+
+\[sim = \frac{\left| A\ \cap B \right|}{\left| A\ \cup B \right|}\]
+
+Threshold selection is a trade-off between prediction accuracy (high
+threshold) and the number of predictable compounds (low threshold). As
+it is in many practical cases desirable to make predictions even in the
+absence of closely related neighbours, we follow a tiered approach:
+
+\begin{itemize}
+\item
+ First a similarity threshold of 0.5 is used to collect neighbours, to
+ create a local QSAR model and to make a prediction for the query
+ compound.
+\item
+ If any of these steps fails, the procedure is repeated with a
+ similarity threshold of 0.2 and the prediction is flagged with a
+ warning that it might be out of the applicability domain of the
+ training data.
+\item
+ Similarity thresholds of 0.5 and 0.2 are the default values chosen
+ \textgreater{} by the software developers and remained unchanged
+ during the \textgreater{} course of these experiments.
+\end{itemize}
+
+Compounds with the same structure as the query structure are
+automatically eliminated from neighbours to obtain unbiased predictions
+in the presence of duplicates.
+
+\hypertarget{local-qsar-models-and-predictions}{%
+\paragraph{Local QSAR models and
+predictions}\label{local-qsar-models-and-predictions}}
+
+Only similar compounds (neighbours) above the threshold are used for
+local QSAR models. In this investigation, we are using a weighted
+majority vote from the neighbour's experimental data for mutagenicity
+classifications. Probabilities for both classes
+(mutagenic/non-mutagenic) are calculated according to the following
+formula and the class with the higher probability is used as prediction
+outcome.
+
+\[p_{c} = \ \frac{\sum_{}^{}\text{sim}_{n,c}}{\sum_{}^{}\text{sim}_{n}}\]
+
+\(p_{c}\) Probability of class c (e.g.~mutagenic or non-mutagenic)\\
+\(\sum_{}^{}\text{sim}_{n,c}\) Sum of similarities of neighbours with
+class c\\
+\(\sum_{}^{}\text{sim}_{n}\) Sum of all neighbours
+
+\hypertarget{applicability-domain}{%
+\paragraph{Applicability domain}\label{applicability-domain}}
+
+The applicability domain (AD) of \texttt{lazar} models is determined by
+the structural diversity of the training data. If no similar compounds
+are found in the training data no predictions will be generated.
+Warnings are issued if the similarity threshold had to be lowered from
+0.5 to 0.2 in order to enable predictions. Predictions without warnings
+can be considered as close to the applicability domain and predictions
+with warnings as more distant from the applicability domain.
+Quantitative applicability domain information can be obtained from the
+similarities of individual neighbours.
+
+\hypertarget{availability}{%
+\paragraph{Availability}\label{availability}}
+
+\begin{itemize}
+\item
+ \texttt{lazar} experiments for this manuscript:
+ \url{https://git.in-silico.ch/pyrrolizidine} (source code, GPL3)
+\item
+ \texttt{lazar} framework: \url{https://git.in-silico.ch/lazar} (source
+ code, GPL3)
+\item
+ \texttt{lazar} GUI: \url{https://git.in-silico.ch/lazar-gui} (source
+ code, GPL3)
+\item
+ Public web interface: \url{https://lazar.in-silico.ch}
+\end{itemize}
+
+\hypertarget{random-forest-support-vector-machines-and-deep-learning-in-r-project}{%
+\subsubsection{Random Forest, Support Vector Machines, and Deep Learning
+in
+R-project}\label{random-forest-support-vector-machines-and-deep-learning-in-r-project}}
+
+In comparison to \texttt{lazar}, three other models (Random Forest (RF),
+Support Vector Machines (SVM), and Deep Learning (DL)) were evaluated.
+
+For the generation of these models, molecular 1D and 2D descriptors of
+the training dataset were calculated using PaDEL-Descriptors
+(\url{http://www.yapcwsoft.com} version 2.21, Yap (2011)).
+
+As the training dataset contained over 8280 instances, it was decided to
+delete instances with missing values during data pre-processing.
+Furthermore, substances with equivocal outcome were removed. The final
+training dataset contained 8080 instances with known mutagenic
+potential. The RF, SVM, and DL models were generated using the R
+software (R-project for Statistical Computing,
+\url{https://www.r-project.org/}\emph{;} version 3.3.1), specific R
+packages used are identified for each step in the description below.
+During feature selection, descriptor with near zero variance were
+removed using `\emph{NearZeroVar}'-function (package `caret'). If the
+percentage of the most common value was more than 90\% or when the
+frequency ratio of the most common value to the second most common value
+was greater than 95:5 (e.g.~95 instances of the most common value and
+only 5 or less instances of the second most common value), a descriptor
+was classified as having a near zero variance. After that, highly
+correlated descriptors were removed using the
+`\emph{findCorrelation}'-function (package `caret') with a cut-off of
+0.9. This resulted in a training dataset with 516 descriptors. These
+descriptors were scaled to be in the range between 0 and 1 using the
+`\emph{preProcess}'-function (package `caret'). The scaling routine was
+saved in order to apply the same scaling on the testing dataset. As
+these three steps did not consider the outcome, it was decided that they
+do not need to be included in the cross-validation of the model. To
+further reduce the number of features, a LASSO (\emph{least absolute
+shrinkage and selection operator}) regression was performed using the
+`\emph{glmnet}'-function (package `\emph{glmnet}'). The reduced dataset
+was used for the generation of the pre-trained models.
+
+For the RF model, the `\emph{randomForest}'-function (package
+`\emph{randomForest}') was used. A forest with 1000 trees with maximal
+terminal nodes of 200 was grown for the prediction.
+
+The `\emph{svm}'-function (package `e1071') with a \emph{radial basis
+function kernel} was used for the SVM model.
+
+The DL model was generated using the `\emph{h2o.deeplearning}'-function
+(package `\emph{h2o}'). The DL contained four hidden layer with 70, 50,
+50, and 10 neurons, respectively. Other hyperparameter were set as
+follows: l1=1.0E-7, l2=1.0E-11, epsilon = 1.0E-10, rho = 0.8, and
+quantile\_alpha = 0.5. For all other hyperparameter, the default values
+were used. Weights and biases were in a first step determined with an
+unsupervised DL model. These values were then used for the actual,
+supervised DL model.
+
+To validate these models, an internal cross-validation approach was
+chosen. The training dataset was randomly split in training data, which
+contained 95\% of the data, and validation data, which contain 5\% of
+the data. A feature selection with LASSO on the training data was
+performed, reducing the number of descriptors to approximately 100. This
+step was repeated five times. Based on each of the five different
+training data, the predictive models were trained and the performance
+tested with the validation data. This step was repeated 10 times.
+Furthermore, a y-randomisation using the RF model was performed. During
+y-randomisation, the outcome (y-variable) is randomly permuted. The
+theory is that after randomisation of the outcome, the model should not
+be able to correlate the outcome to the properties (descriptor values)
+of the substances. The performance of the model should therefore
+indicate a by change prediction with an accuracy of about 50\%. If this
+is true, it can be concluded that correlation between actual outcome and
+properties of the substances is real and not by chance (Rücker, Rücker,
+and Meringer (2007)).
+
+\includegraphics[width=6.26875in,height=5.48611in]{media/image1.png}
+
+Figure 1: Flowchart of the generation and validation of the models
+generated in R-project
+
+\hypertarget{applicability-domain-1}{%
+\paragraph{Applicability domain}\label{applicability-domain-1}}
+
+The AD of the training dataset and the PA dataset was evaluated using
+the Jaccard distance. A Jaccard distance of `0' indicates that the
+substances are similar, whereas a value of `1' shows that the substances
+are different. The Jaccard distance was below 0.2 for all PAs relative
+to the training dataset. Therefore, PA dataset is within the AD of the
+training dataset and the models can be used to predict the genotoxic
+potential of the PA dataset.
+
+\hypertarget{y-randomisation}{%
+\paragraph{y-randomisation}\label{y-randomisation}}
+
+After y-randomisation of the outcome, the accuracy and CCR are around
+50\%, indicating a chance in the distribution of the results. This
+shows, that the outcome is actually related to the predictors and not by
+chance.
+
+\hypertarget{deep-learning-in-tensorflow}{%
+\subsubsection{Deep Learning in
+TensorFlow}\label{deep-learning-in-tensorflow}}
+
+Alternatively, a DL model was established with Python-based TensorFlow
+program (\url{https://www.tensorflow.org/}) using the high-level API
+Keras (\url{https://www.tensorflow.org/guide/keras}) to build the
+models.
+
+Data pre-processing was done by rank transformation using the
+`\emph{QuantileTransformer}' procedure. A sequential model has been
+used. Four layers have been used: input layer, two hidden layers (with
+12, 8 and 8 nodes, respectively) and one output layer. For the output
+layer, a sigmoidal activation function and for all other layers the ReLU
+(`\emph{Rectified Linear Unit}') activation function was used.
+Additionally, a L\textsuperscript{2}-penalty of 0.001 was used for the
+input layer. For training of the model, the ADAM algorithm was used to
+minimise the cross-entropy loss using the default parameters of Keras.
+Training was performed for 100 epochs with a batch size of 64. The model
+was implemented with Python 3.6 and Keras. For training of the model, a
+6-fold cross-validation was used. Accuracy was estimated by ROC-AUC and
+confusion matrix.
+
+\hypertarget{validation}{%
+\subsection{Validation}\label{validation}}
+
+\hypertarget{results}{%
+\section{Results}\label{results}}
+
+\hypertarget{lazar-1}{%
+\subsection{\texorpdfstring{\texttt{lazar}}{lazar}}\label{lazar-1}}
+
+\hypertarget{random-forest}{%
+\subsection{Random Forest}\label{random-forest}}
+
+The validation showed that the RF model has an accuracy of 64\%, a
+sensitivity of 66\% and a specificity of 63\%. The confusion matrix of
+the model, calculated for 8080 instances, is provided in Table 1.
+
+Table 1: Confusion matrix of the RF model
+
+\begin{longtable}[]{@{}lllll@{}}
+\toprule
+& Predicted genotoxicity & & &\tabularnewline
+\midrule
+\endhead
+Measured genotoxicity & & \textbf{\emph{PP}} & \textbf{\emph{PN}} &
+\textbf{\emph{Total}}\tabularnewline
+& \textbf{\emph{TP}} & 2274 & 1163 & 3437\tabularnewline
+& \textbf{\emph{TN}} & 1736 & 2907 & 4643\tabularnewline
+& \textbf{\emph{Total}} & 4010 & 4070 & 8080\tabularnewline
+\bottomrule
+\end{longtable}
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+\hypertarget{support-vector-machines}{%
+\subsection{Support Vector Machines}\label{support-vector-machines}}
+
+The validation showed that the SVM model has an accuracy of 62\%, a
+sensitivity of 65\% and a specificity of 60\%. The confusion matrix of
+SVM model, calculated for 8080 instances, is provided in Table 2.
+
+Table 2: Confusion matrix of the SVM model
+
+\begin{longtable}[]{@{}lllll@{}}
+\toprule
+& Predicted genotoxicity & & &\tabularnewline
+\midrule
+\endhead
+Measured genotoxicity & & \textbf{\emph{PP}} & \textbf{\emph{PN}} &
+\textbf{\emph{Total}}\tabularnewline
+& \textbf{\emph{TP}} & 2057 & 1107 & 3164\tabularnewline
+& \textbf{\emph{TN}} & 1953 & 2963 & 4916\tabularnewline
+& \textbf{\emph{Total}} & 4010 & 4070 & 8080\tabularnewline
+\bottomrule
+\end{longtable}
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+\hypertarget{deep-learning-r-project}{%
+\subsection{Deep Learning (R-project)}\label{deep-learning-r-project}}
+
+The validation showed that the DL model generated in R has an accuracy
+of 59\%, a sensitivity of 89\% and a specificity of 30\%. The confusion
+matrix of the model, normalised to 8080 instances, is provided in Table
+3.
+
+Table 3: Confusion matrix of the DL model (R-project)
+
+\begin{longtable}[]{@{}lllll@{}}
+\toprule
+& Predicted genotoxicity & & &\tabularnewline
+\midrule
+\endhead
+Measured genotoxicity & & \textbf{\emph{PP}} & \textbf{\emph{PN}} &
+\textbf{\emph{Total}}\tabularnewline
+& \textbf{\emph{TP}} & 3575 & 435 & 4010\tabularnewline
+& \textbf{\emph{TN}} & 2853 & 1217 & 4070\tabularnewline
+& \textbf{\emph{Total}} & 6428 & 1652 & 8080\tabularnewline
+\bottomrule
+\end{longtable}
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+\hypertarget{dl-model-tensorflow}{%
+\subsection{DL model (TensorFlow)}\label{dl-model-tensorflow}}
+
+The validation showed that the DL model generated in TensorFlow has an
+accuracy of 68\%, a sensitivity of 70\% and a specificity of 46\%. The
+confusion matrix of the model, normalised to 8080 instances, is provided
+in Table 4.
+
+Table 4: Confusion matrix of the DL model (TensorFlow)
+
+\begin{longtable}[]{@{}lllll@{}}
+\toprule
+& Predicted genotoxicity & & &\tabularnewline
+\midrule
+\endhead
+Measured genotoxicity & & \textbf{\emph{PP}} & \textbf{\emph{PN}} &
+\textbf{\emph{Total}}\tabularnewline
+& \textbf{\emph{TP}} & 2851 & 1227 & 4078\tabularnewline
+& \textbf{\emph{TN}} & 1825 & 2177 & 4002\tabularnewline
+& \textbf{\emph{Total}} & 4676 & 3404 & 8080\tabularnewline
+\bottomrule
+\end{longtable}
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+The ROC curves from the 6-fold validation are shown in Figure 7.
+
+\includegraphics[width=3.825in,height=2.7327in]{media/image7.png}
+
+Figure 7: Six-fold cross-validation of TensorFlow DL model show an
+average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68\%.
+
+In summary, the validation results of the four methods are presented in
+the following table.
+
+Table 5 Results of the cross-validation of the four models and after
+y-randomisation
+
+\begin{longtable}[]{@{}lllll@{}}
+\toprule
+\begin{minipage}[b]{0.28\columnwidth}\raggedright
+\strut
+\end{minipage} & \begin{minipage}[b]{0.13\columnwidth}\raggedright
+Accuracy\strut
+\end{minipage} & \begin{minipage}[b]{0.09\columnwidth}\raggedright
+CCR\strut
+\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\raggedright
+Sensitivity\strut
+\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\raggedright
+Specificity\strut
+\end{minipage}\tabularnewline
+\midrule
+\endhead
+\begin{minipage}[t]{0.28\columnwidth}\raggedright
+RF model\strut
+\end{minipage} & \begin{minipage}[t]{0.13\columnwidth}\raggedright
+64.1\%\strut
+\end{minipage} & \begin{minipage}[t]{0.09\columnwidth}\raggedright
+64.4\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+66.2\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+62.6\%\strut
+\end{minipage}\tabularnewline
+\begin{minipage}[t]{0.28\columnwidth}\raggedright
+SVM model\strut
+\end{minipage} & \begin{minipage}[t]{0.13\columnwidth}\raggedright
+62.1\%\strut
+\end{minipage} & \begin{minipage}[t]{0.09\columnwidth}\raggedright
+62.6\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+65.0\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+60.3\%\strut
+\end{minipage}\tabularnewline
+\begin{minipage}[t]{0.28\columnwidth}\raggedright
+DL model\\
+(R-project)\strut
+\end{minipage} & \begin{minipage}[t]{0.13\columnwidth}\raggedright
+59.3\%\strut
+\end{minipage} & \begin{minipage}[t]{0.09\columnwidth}\raggedright
+59.5\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+89.2\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+29.9\%\strut
+\end{minipage}\tabularnewline
+\begin{minipage}[t]{0.28\columnwidth}\raggedright
+DL model (TensorFlow)\strut
+\end{minipage} & \begin{minipage}[t]{0.13\columnwidth}\raggedright
+68\%\strut
+\end{minipage} & \begin{minipage}[t]{0.09\columnwidth}\raggedright
+62.2\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+69.9\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+45.6\%\strut
+\end{minipage}\tabularnewline
+\begin{minipage}[t]{0.28\columnwidth}\raggedright
+y-randomisation\strut
+\end{minipage} & \begin{minipage}[t]{0.13\columnwidth}\raggedright
+50.5\%\strut
+\end{minipage} & \begin{minipage}[t]{0.09\columnwidth}\raggedright
+50.4\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+50.3\%\strut
+\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedright
+50.6\%\strut
+\end{minipage}\tabularnewline
+\bottomrule
+\end{longtable}
+
+CCR (correct classification rate)
+
+\hypertarget{discussion}{%
+\section{Discussion}\label{discussion}}
+
+General model performance
+
+Based on the results of the cross-validation for all models,
+\texttt{lazar}, RF, SVM, DL (R-project) and DL (TensorFlow) it can be
+state that the prediction results are not optimal due to different
+reasons. The accuracy as measured during cross-validation of the four
+models (RF, SVM, DL (R-project and TensorFlow)) was partly low with CCR
+values between 59.3 and 68\%, with the R-generated DL model and the
+TensorFlow-generated DL model showing the worst and the best
+performance, respectively. The validation of the R-generated DL model
+revealed a high sensitivity (89.2\%) but an unacceptably low specificity
+of 29.9\% indicating a high number of false positive estimates. The
+TensorFlow-generated DL model, however, showed an acceptable but not
+optimal accuracy of 68\%, a sensitivity of 69.9\% and a specificity of
+45.6\%. The low specificity indicates that both DL models tends to
+predict too many instances as positive (genotoxic), and therefore have a
+high false positive rate. This allows at least with the TensorFlow
+generated DL model to make group statements, but the confidence for
+estimations of single PAs appears to be insufficiently low.
+
+Several factors have likely contributed to the low to moderate
+performance of the used methods as shown during the cross-validation:
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\tightlist
+\item
+ The outcome in the training dataset was based on the results of AMES
+ tests for genotoxicity \protect\hyperlink{_ENREF_63}{ICH 2011}(), an
+ \emph{in vitro} test in different strains of the bacteria
+ \emph{Salmonella typhimurium}. In this test, mutagenicity is evaluated
+ with and without prior metabolic activation of the test substance.
+ Metabolic activation could result in the formation of genotoxic
+ metabolites from non-genotoxic parent compounds. However, no
+ distinction was made in the training dataset between substances that
+ needed metabolic activation before being mutagenic and those that were
+ mutagenic without metabolic activation. \texttt{lazar} is able to
+ handle this `inaccuracy' in the training dataset well due to the way
+ the algorithm works: \texttt{lazar} predicts the genotoxic potential
+ based on the neighbours of substances with comparable structural
+ features, considering mutagenic and not mutagenic neighbours. Based on
+ the structural similarity, a probability for mutagenicity and no
+ mutagenicity is calculated independently from each other (meaning that
+ the sum of probabilities does not necessarily adds up to 100\%). The
+ class with the higher outcome is then the overall outcome for the
+ substance.
+\end{enumerate}
+
+\begin{quote}
+In contrast, the other models need to be trained first to recognise the
+structural features that are responsible for genotoxicity. Therefore,
+the mixture of substances being mutagenic with and without metabolic
+activation in the training dataset may have adversely affected the
+ability to separate the dataset in two distinct classes and thus
+explains the relatively low performance of these models.
+\end{quote}
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\setcounter{enumi}{1}
+\tightlist
+\item
+ Machine learning algorithms try to find an optimized solution in a
+ high-dimensional (one dimension per each predictor) space. Sometimes
+ these methods do not find the global optimum of estimates but only
+ local (not optimal) solutions. Strategies to find the global solutions
+ are systematic variation (grid search) of the hyperparameters of the
+ methods, which may be very time consuming in particular in large
+ datasets.
+\end{enumerate}
+
+\hypertarget{conclusions}{%
+\section{Conclusions}\label{conclusions}}
+
+In this study, an attempt was made to predict the genotoxic potential of
+PAs using five different machine learning techniques (\texttt{lazar},
+RF, SVM, DL (R-project and TensorFlow). The results of all models fitted
+only partly to the findings in literature, with best results obtained
+with the TensorFlow DL model. Therefore, modelling allows statements on
+the relative risks of genotoxicity of the different PA groups.
+Individual predictions for selective PAs appear, however, not reliable
+on the current basis of the used training dataset.
+
+This study emphasises the importance of critical assessment of
+predictions by QSAR models. This includes not only extensive literature
+research to assess the plausibility of the predictions, but also a good
+knowledge of the metabolism of the test substances and understanding for
+possible mechanisms of toxicity.
+
+In further studies, additional machine learning techniques or a modified
+(extended) training dataset should be used for an additional attempt to
+predict the genotoxic potential of PAs.
+
+\hypertarget{references}{%
+\section*{References}\label{references}}
+\addcontentsline{toc}{section}{References}
+
+\hypertarget{refs}{}
+\leavevmode\hypertarget{ref-Bender2004}{}%
+Bender, Andreas, Hamse Y. Mussa, Robert C. Glen, and Stephan Reiling.
+2004. ``Molecular Similarity Searching Using Atom Environments,
+Information-Based Feature Selection, and a Naïve Bayesian Classifier.''
+\emph{Journal of Chemical Information and Computer Sciences} 44 (1):
+170--78. \url{https://doi.org/10.1021/ci034207y}.
+
+\leavevmode\hypertarget{ref-Hansen2009}{}%
+Hansen, Katja, Sebastian Mika, Timon Schroeter, Andreas Sutter, Antonius
+ter Laak, Thomas Steger-Hartmann, Nikolaus Heinrich, and Klaus-Robert
+Müller. 2009. ``Benchmark Data Set for in Silico Prediction of Ames
+Mutagenicity.'' \emph{Journal of Chemical Information and Modeling} 49
+(9): 2077--81. \url{https://doi.org/10.1021/ci900161g}.
+
+\leavevmode\hypertarget{ref-Kazius2005}{}%
+Kazius, J., R. McGuire, and R. Bursi. 2005. ``Derivation and Validation
+of Toxicophores for Mutagenicity Prediction.'' \emph{J Med Chem}, no.
+48: 312--20.
+
+\leavevmode\hypertarget{ref-OBoyle2011a}{}%
+O'Boyle, Noel, Michael Banck, Craig James, Chris Morley, Tim
+Vandermeersch, and Geoffrey Hutchison. 2011. ``Open Babel: An open
+chemical toolbox.'' \emph{J. Cheminf.} 3 (1): 33.
+\url{https://doi.org/doi:10.1186/1758-2946-3-33}.
+
+\leavevmode\hypertarget{ref-Ruxfccker2007}{}%
+Rücker, C, G Rücker, and M. Meringer. 2007. ``Y-Randomization and Its
+Variants in Qspr/Qsar.'' \emph{J. Chem. Inf. Model.}, no. 47: 2345--57.
+
+\leavevmode\hypertarget{ref-Yap2011}{}%
+Yap, CW. 2011. ``PaDEL-Descriptor: An Open Source Software to Calculate
+Molecular Descriptors and Fingerprints.'' \emph{Journal of Computational
+Chemistry}, no. 32: 1466--74.
+
+\end{document}
diff --git a/paper/outfile.odt b/paper/outfile.odt
new file mode 100644
index 0000000..eace967
--- /dev/null
+++ b/paper/outfile.odt
Binary files differ
diff --git a/paper/outfile.pdf b/paper/outfile.pdf
new file mode 100644
index 0000000..71422d6
--- /dev/null
+++ b/paper/outfile.pdf
Binary files differ
diff --git a/paper/pandoc-options.inc.mk b/paper/pandoc-options.inc.mk
new file mode 100644
index 0000000..32ec745
--- /dev/null
+++ b/paper/pandoc-options.inc.mk
@@ -0,0 +1,47 @@
+# Settings for Pandoc
+# ===================
+
+TEMPLATE_FILE_LATEX ?= $(PANDOC_SCHOLAR_PATH)/templates/pandoc-scholar.latex
+TEMPLATE_FILE_HTML ?= $(PANDOC_SCHOLAR_PATH)/templates/pandoc-scholar.html
+TEMPLATE_FILE_JATS ?= $(PANDOC_SCHOLAR_PATH)/templates/pandoc-scholar.jats
+
+TEMPLATE_STYLE_HTML ?= $(PANDOC_SCHOLAR_PATH)/templates/styles/pandoc-scholar.css
+
+## Pandoc options
+PANDOC_READER_OPTIONS ?=
+
+ifndef PANDOC_WRITER_OPTIONS
+PANDOC_WRITER_OPTIONS = --standalone
+PANDOC_WRITER_OPTIONS += --filter=pandoc-citeproc
+ifdef BIBLIOGRAPHY_FILE
+PANDOC_WRITER_OPTIONS += --metadata "bibliography:$(BIBLIOGRAPHY_FILE)"
+PANDOC_WRITER_OPTIONS += --bibliography=$(BIBLIOGRAPHY_FILE)
+endif
+endif
+
+PANDOC_ODT_OPTIONS ?=
+PANDOC_DOCX_OPTIONS ?=
+PANDOC_HTML_OPTIONS ?=
+PANDOC_EPUB_OPTIONS ?=
+ifndef PANDOC_LATEX_OPTIONS
+PANDOC_LATEX_OPTIONS = --pdf-engine=xelatex
+endif
+
+ifdef ODT_REFERENCE_FILE
+PANDOC_ODT_OPTIONS += --reference-doc=$(ODT_REFERENCE_FILE)
+endif
+ifdef DOCX_REFERENCE_FILE
+PANDOC_DOCX_OPTIONS += --reference-doc=$(DOCX_REFERENCE_FILE)
+endif
+ifdef TEMPLATE_FILE_LATEX
+PANDOC_LATEX_OPTIONS += --template=$(TEMPLATE_FILE_LATEX)
+endif
+ifdef TEMPLATE_FILE_HTML
+PANDOC_HTML_OPTIONS += --template=$(TEMPLATE_FILE_HTML)
+endif
+ifdef TEMPLATE_FILE_EPUB
+PANDOC_EPUB_OPTIONS += --template=$(TEMPLATE_FILE_EPUB)
+endif
+ifdef TEMPLATE_FILE_JATS
+PANDOC_JATS_OPTIONS += --template=$(TEMPLATE_FILE_JATS)
+endif
diff --git a/paper/scholar-filters/dkjson.lua b/paper/scholar-filters/dkjson.lua
new file mode 100644
index 0000000..fa50b9f
--- /dev/null
+++ b/paper/scholar-filters/dkjson.lua
@@ -0,0 +1,714 @@
+-- Module options:
+local always_try_using_lpeg = true
+local register_global_module_table = false
+local global_module_name = 'json'
+
+--[==[
+
+David Kolf's JSON module for Lua 5.1/5.2
+
+Version 2.5
+
+
+For the documentation see the corresponding readme.txt or visit
+<http://dkolf.de/src/dkjson-lua.fsl/>.
+
+You can contact the author by sending an e-mail to 'david' at the
+domain 'dkolf.de'.
+
+
+Copyright (C) 2010-2013 David Heiko Kolf
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--]==]
+
+-- global dependencies:
+local pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset =
+ pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset
+local error, require, pcall, select = error, require, pcall, select
+local floor, huge = math.floor, math.huge
+local strrep, gsub, strsub, strbyte, strchar, strfind, strlen, strformat =
+ string.rep, string.gsub, string.sub, string.byte, string.char,
+ string.find, string.len, string.format
+local strmatch = string.match
+local concat = table.concat
+
+local json = { version = "dkjson 2.5" }
+
+if register_global_module_table then
+ _G[global_module_name] = json
+end
+
+local _ENV = nil -- blocking globals in Lua 5.2
+
+pcall (function()
+ -- Enable access to blocked metatables.
+ -- Don't worry, this module doesn't change anything in them.
+ local debmeta = require "debug".getmetatable
+ if debmeta then getmetatable = debmeta end
+end)
+
+json.null = setmetatable ({}, {
+ __tojson = function () return "null" end
+})
+
+local function isarray (tbl)
+ local max, n, arraylen = 0, 0, 0
+ for k,v in pairs (tbl) do
+ if k == 'n' and type(v) == 'number' then
+ arraylen = v
+ if v > max then
+ max = v
+ end
+ else
+ if type(k) ~= 'number' or k < 1 or floor(k) ~= k then
+ return false
+ end
+ if k > max then
+ max = k
+ end
+ n = n + 1
+ end
+ end
+ if max > 10 and max > arraylen and max > n * 2 then
+ return false -- don't create an array with too many holes
+ end
+ return true, max
+end
+
+local escapecodes = {
+ ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b", ["\f"] = "\\f",
+ ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t"
+}
+
+local function escapeutf8 (uchar)
+ local value = escapecodes[uchar]
+ if value then
+ return value
+ end
+ local a, b, c, d = strbyte (uchar, 1, 4)
+ a, b, c, d = a or 0, b or 0, c or 0, d or 0
+ if a <= 0x7f then
+ value = a
+ elseif 0xc0 <= a and a <= 0xdf and b >= 0x80 then
+ value = (a - 0xc0) * 0x40 + b - 0x80
+ elseif 0xe0 <= a and a <= 0xef and b >= 0x80 and c >= 0x80 then
+ value = ((a - 0xe0) * 0x40 + b - 0x80) * 0x40 + c - 0x80
+ elseif 0xf0 <= a and a <= 0xf7 and b >= 0x80 and c >= 0x80 and d >= 0x80 then
+ value = (((a - 0xf0) * 0x40 + b - 0x80) * 0x40 + c - 0x80) * 0x40 + d - 0x80
+ else
+ return ""
+ end
+ if value <= 0xffff then
+ return strformat ("\\u%.4x", value)
+ elseif value <= 0x10ffff then
+ -- encode as UTF-16 surrogate pair
+ value = value - 0x10000
+ local highsur, lowsur = 0xD800 + floor (value/0x400), 0xDC00 + (value % 0x400)
+ return strformat ("\\u%.4x\\u%.4x", highsur, lowsur)
+ else
+ return ""
+ end
+end
+
+local function fsub (str, pattern, repl)
+ -- gsub always builds a new string in a buffer, even when no match
+ -- exists. First using find should be more efficient when most strings
+ -- don't contain the pattern.
+ if strfind (str, pattern) then
+ return gsub (str, pattern, repl)
+ else
+ return str
+ end
+end
+
+local function quotestring (value)
+ -- based on the regexp "escapable" in https://github.com/douglascrockford/JSON-js
+ value = fsub (value, "[%z\1-\31\"\\\127]", escapeutf8)
+ if strfind (value, "[\194\216\220\225\226\239]") then
+ value = fsub (value, "\194[\128-\159\173]", escapeutf8)
+ value = fsub (value, "\216[\128-\132]", escapeutf8)
+ value = fsub (value, "\220\143", escapeutf8)
+ value = fsub (value, "\225\158[\180\181]", escapeutf8)
+ value = fsub (value, "\226\128[\140-\143\168-\175]", escapeutf8)
+ value = fsub (value, "\226\129[\160-\175]", escapeutf8)
+ value = fsub (value, "\239\187\191", escapeutf8)
+ value = fsub (value, "\239\191[\176-\191]", escapeutf8)
+ end
+ return "\"" .. value .. "\""
+end
+json.quotestring = quotestring
+
+local function replace(str, o, n)
+ local i, j = strfind (str, o, 1, true)
+ if i then
+ return strsub(str, 1, i-1) .. n .. strsub(str, j+1, -1)
+ else
+ return str
+ end
+end
+
+-- locale independent num2str and str2num functions
+local decpoint, numfilter
+
+local function updatedecpoint ()
+ decpoint = strmatch(tostring(0.5), "([^05+])")
+ -- build a filter that can be used to remove group separators
+ numfilter = "[^0-9%-%+eE" .. gsub(decpoint, "[%^%$%(%)%%%.%[%]%*%+%-%?]", "%%%0") .. "]+"
+end
+
+updatedecpoint()
+
+local function num2str (num)
+ return replace(fsub(tostring(num), numfilter, ""), decpoint, ".")
+end
+
+local function str2num (str)
+ local num = tonumber(replace(str, ".", decpoint))
+ if not num then
+ updatedecpoint()
+ num = tonumber(replace(str, ".", decpoint))
+ end
+ return num
+end
+
+local function addnewline2 (level, buffer, buflen)
+ buffer[buflen+1] = "\n"
+ buffer[buflen+2] = strrep (" ", level)
+ buflen = buflen + 2
+ return buflen
+end
+
+function json.addnewline (state)
+ if state.indent then
+ state.bufferlen = addnewline2 (state.level or 0,
+ state.buffer, state.bufferlen or #(state.buffer))
+ end
+end
+
+local encode2 -- forward declaration
+
+local function addpair (key, value, prev, indent, level, buffer, buflen, tables, globalorder, state)
+ local kt = type (key)
+ if kt ~= 'string' and kt ~= 'number' then
+ return nil, "type '" .. kt .. "' is not supported as a key by JSON."
+ end
+ if prev then
+ buflen = buflen + 1
+ buffer[buflen] = ","
+ end
+ if indent then
+ buflen = addnewline2 (level, buffer, buflen)
+ end
+ buffer[buflen+1] = quotestring (key)
+ buffer[buflen+2] = ":"
+ return encode2 (value, indent, level, buffer, buflen + 2, tables, globalorder, state)
+end
+
+local function appendcustom(res, buffer, state)
+ local buflen = state.bufferlen
+ if type (res) == 'string' then
+ buflen = buflen + 1
+ buffer[buflen] = res
+ end
+ return buflen
+end
+
+local function exception(reason, value, state, buffer, buflen, defaultmessage)
+ defaultmessage = defaultmessage or reason
+ local handler = state.exception
+ if not handler then
+ return nil, defaultmessage
+ else
+ state.bufferlen = buflen
+ local ret, msg = handler (reason, value, state, defaultmessage)
+ if not ret then return nil, msg or defaultmessage end
+ return appendcustom(ret, buffer, state)
+ end
+end
+
+function json.encodeexception(reason, value, state, defaultmessage)
+ return quotestring("<" .. defaultmessage .. ">")
+end
+
+encode2 = function (value, indent, level, buffer, buflen, tables, globalorder, state)
+ local valtype = type (value)
+ local valmeta = getmetatable (value)
+ valmeta = type (valmeta) == 'table' and valmeta -- only tables
+ local valtojson = valmeta and valmeta.__tojson
+ if valtojson then
+ if tables[value] then
+ return exception('reference cycle', value, state, buffer, buflen)
+ end
+ tables[value] = true
+ state.bufferlen = buflen
+ local ret, msg = valtojson (value, state)
+ if not ret then return exception('custom encoder failed', value, state, buffer, buflen, msg) end
+ tables[value] = nil
+ buflen = appendcustom(ret, buffer, state)
+ elseif value == nil then
+ buflen = buflen + 1
+ buffer[buflen] = "null"
+ elseif valtype == 'number' then
+ local s
+ if value ~= value or value >= huge or -value >= huge then
+ -- This is the behaviour of the original JSON implementation.
+ s = "null"
+ else
+ s = num2str (value)
+ end
+ buflen = buflen + 1
+ buffer[buflen] = s
+ elseif valtype == 'boolean' then
+ buflen = buflen + 1
+ buffer[buflen] = value and "true" or "false"
+ elseif valtype == 'string' then
+ buflen = buflen + 1
+ buffer[buflen] = quotestring (value)
+ elseif valtype == 'table' then
+ if tables[value] then
+ return exception('reference cycle', value, state, buffer, buflen)
+ end
+ tables[value] = true
+ level = level + 1
+ local isa, n = isarray (value)
+ if n == 0 and valmeta and valmeta.__jsontype == 'object' then
+ isa = false
+ end
+ local msg
+ if isa then -- JSON array
+ buflen = buflen + 1
+ buffer[buflen] = "["
+ for i = 1, n do
+ buflen, msg = encode2 (value[i], indent, level, buffer, buflen, tables, globalorder, state)
+ if not buflen then return nil, msg end
+ if i < n then
+ buflen = buflen + 1
+ buffer[buflen] = ","
+ end
+ end
+ buflen = buflen + 1
+ buffer[buflen] = "]"
+ else -- JSON object
+ local prev = false
+ buflen = buflen + 1
+ buffer[buflen] = "{"
+ local order = valmeta and valmeta.__jsonorder or globalorder
+ if order then
+ local used = {}
+ n = #order
+ for i = 1, n do
+ local k = order[i]
+ local v = value[k]
+ if v then
+ used[k] = true
+ buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
+ prev = true -- add a seperator before the next element
+ end
+ end
+ for k,v in pairs (value) do
+ if not used[k] then
+ buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
+ if not buflen then return nil, msg end
+ prev = true -- add a seperator before the next element
+ end
+ end
+ else -- unordered
+ for k,v in pairs (value) do
+ buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
+ if not buflen then return nil, msg end
+ prev = true -- add a seperator before the next element
+ end
+ end
+ if indent then
+ buflen = addnewline2 (level - 1, buffer, buflen)
+ end
+ buflen = buflen + 1
+ buffer[buflen] = "}"
+ end
+ tables[value] = nil
+ else
+ return exception ('unsupported type', value, state, buffer, buflen,
+ "type '" .. valtype .. "' is not supported by JSON.")
+ end
+ return buflen
+end
+
+function json.encode (value, state)
+ state = state or {}
+ local oldbuffer = state.buffer
+ local buffer = oldbuffer or {}
+ state.buffer = buffer
+ updatedecpoint()
+ local ret, msg = encode2 (value, state.indent, state.level or 0,
+ buffer, state.bufferlen or 0, state.tables or {}, state.keyorder, state)
+ if not ret then
+ error (msg, 2)
+ elseif oldbuffer == buffer then
+ state.bufferlen = ret
+ return true
+ else
+ state.bufferlen = nil
+ state.buffer = nil
+ return concat (buffer)
+ end
+end
+
+local function loc (str, where)
+ local line, pos, linepos = 1, 1, 0
+ while true do
+ pos = strfind (str, "\n", pos, true)
+ if pos and pos < where then
+ line = line + 1
+ linepos = pos
+ pos = pos + 1
+ else
+ break
+ end
+ end
+ return "line " .. line .. ", column " .. (where - linepos)
+end
+
+local function unterminated (str, what, where)
+ return nil, strlen (str) + 1, "unterminated " .. what .. " at " .. loc (str, where)
+end
+
+local function scanwhite (str, pos)
+ while true do
+ pos = strfind (str, "%S", pos)
+ if not pos then return nil end
+ local sub2 = strsub (str, pos, pos + 1)
+ if sub2 == "\239\187" and strsub (str, pos + 2, pos + 2) == "\191" then
+ -- UTF-8 Byte Order Mark
+ pos = pos + 3
+ elseif sub2 == "//" then
+ pos = strfind (str, "[\n\r]", pos + 2)
+ if not pos then return nil end
+ elseif sub2 == "/*" then
+ pos = strfind (str, "*/", pos + 2)
+ if not pos then return nil end
+ pos = pos + 2
+ else
+ return pos
+ end
+ end
+end
+
+local escapechars = {
+ ["\""] = "\"", ["\\"] = "\\", ["/"] = "/", ["b"] = "\b", ["f"] = "\f",
+ ["n"] = "\n", ["r"] = "\r", ["t"] = "\t"
+}
+
+local function unichar (value)
+ if value < 0 then
+ return nil
+ elseif value <= 0x007f then
+ return strchar (value)
+ elseif value <= 0x07ff then
+ return strchar (0xc0 + floor(value/0x40),
+ 0x80 + (floor(value) % 0x40))
+ elseif value <= 0xffff then
+ return strchar (0xe0 + floor(value/0x1000),
+ 0x80 + (floor(value/0x40) % 0x40),
+ 0x80 + (floor(value) % 0x40))
+ elseif value <= 0x10ffff then
+ return strchar (0xf0 + floor(value/0x40000),
+ 0x80 + (floor(value/0x1000) % 0x40),
+ 0x80 + (floor(value/0x40) % 0x40),
+ 0x80 + (floor(value) % 0x40))
+ else
+ return nil
+ end
+end
+
+local function scanstring (str, pos)
+ local lastpos = pos + 1
+ local buffer, n = {}, 0
+ while true do
+ local nextpos = strfind (str, "[\"\\]", lastpos)
+ if not nextpos then
+ return unterminated (str, "string", pos)
+ end
+ if nextpos > lastpos then
+ n = n + 1
+ buffer[n] = strsub (str, lastpos, nextpos - 1)
+ end
+ if strsub (str, nextpos, nextpos) == "\"" then
+ lastpos = nextpos + 1
+ break
+ else
+ local escchar = strsub (str, nextpos + 1, nextpos + 1)
+ local value
+ if escchar == "u" then
+ value = tonumber (strsub (str, nextpos + 2, nextpos + 5), 16)
+ if value then
+ local value2
+ if 0xD800 <= value and value <= 0xDBff then
+ -- we have the high surrogate of UTF-16. Check if there is a
+ -- low surrogate escaped nearby to combine them.
+ if strsub (str, nextpos + 6, nextpos + 7) == "\\u" then
+ value2 = tonumber (strsub (str, nextpos + 8, nextpos + 11), 16)
+ if value2 and 0xDC00 <= value2 and value2 <= 0xDFFF then
+ value = (value - 0xD800) * 0x400 + (value2 - 0xDC00) + 0x10000
+ else
+ value2 = nil -- in case it was out of range for a low surrogate
+ end
+ end
+ end
+ value = value and unichar (value)
+ if value then
+ if value2 then
+ lastpos = nextpos + 12
+ else
+ lastpos = nextpos + 6
+ end
+ end
+ end
+ end
+ if not value then
+ value = escapechars[escchar] or escchar
+ lastpos = nextpos + 2
+ end
+ n = n + 1
+ buffer[n] = value
+ end
+ end
+ if n == 1 then
+ return buffer[1], lastpos
+ elseif n > 1 then
+ return concat (buffer), lastpos
+ else
+ return "", lastpos
+ end
+end
+
+local scanvalue -- forward declaration
+
+local function scantable (what, closechar, str, startpos, nullval, objectmeta, arraymeta)
+ local len = strlen (str)
+ local tbl, n = {}, 0
+ local pos = startpos + 1
+ if what == 'object' then
+ setmetatable (tbl, objectmeta)
+ else
+ setmetatable (tbl, arraymeta)
+ end
+ while true do
+ pos = scanwhite (str, pos)
+ if not pos then return unterminated (str, what, startpos) end
+ local char = strsub (str, pos, pos)
+ if char == closechar then
+ return tbl, pos + 1
+ end
+ local val1, err
+ val1, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta)
+ if err then return nil, pos, err end
+ pos = scanwhite (str, pos)
+ if not pos then return unterminated (str, what, startpos) end
+ char = strsub (str, pos, pos)
+ if char == ":" then
+ if val1 == nil then
+ return nil, pos, "cannot use nil as table index (at " .. loc (str, pos) .. ")"
+ end
+ pos = scanwhite (str, pos + 1)
+ if not pos then return unterminated (str, what, startpos) end
+ local val2
+ val2, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta)
+ if err then return nil, pos, err end
+ tbl[val1] = val2
+ pos = scanwhite (str, pos)
+ if not pos then return unterminated (str, what, startpos) end
+ char = strsub (str, pos, pos)
+ else
+ n = n + 1
+ tbl[n] = val1
+ end
+ if char == "," then
+ pos = pos + 1
+ end
+ end
+end
+
+scanvalue = function (str, pos, nullval, objectmeta, arraymeta)
+ pos = pos or 1
+ pos = scanwhite (str, pos)
+ if not pos then
+ return nil, strlen (str) + 1, "no valid JSON value (reached the end)"
+ end
+ local char = strsub (str, pos, pos)
+ if char == "{" then
+ return scantable ('object', "}", str, pos, nullval, objectmeta, arraymeta)
+ elseif char == "[" then
+ return scantable ('array', "]", str, pos, nullval, objectmeta, arraymeta)
+ elseif char == "\"" then
+ return scanstring (str, pos)
+ else
+ local pstart, pend = strfind (str, "^%-?[%d%.]+[eE]?[%+%-]?%d*", pos)
+ if pstart then
+ local number = str2num (strsub (str, pstart, pend))
+ if number then
+ return number, pend + 1
+ end
+ end
+ pstart, pend = strfind (str, "^%a%w*", pos)
+ if pstart then
+ local name = strsub (str, pstart, pend)
+ if name == "true" then
+ return true, pend + 1
+ elseif name == "false" then
+ return false, pend + 1
+ elseif name == "null" then
+ return nullval, pend + 1
+ end
+ end
+ return nil, pos, "no valid JSON value at " .. loc (str, pos)
+ end
+end
+
+local function optionalmetatables(...)
+ if select("#", ...) > 0 then
+ return ...
+ else
+ return {__jsontype = 'object'}, {__jsontype = 'array'}
+ end
+end
+
+function json.decode (str, pos, nullval, ...)
+ local objectmeta, arraymeta = optionalmetatables(...)
+ return scanvalue (str, pos, nullval, objectmeta, arraymeta)
+end
+
+function json.use_lpeg ()
+ local g = require ("lpeg")
+
+ if g.version() == "0.11" then
+ error "due to a bug in LPeg 0.11, it cannot be used for JSON matching"
+ end
+
+ local pegmatch = g.match
+ local P, S, R = g.P, g.S, g.R
+
+ local function ErrorCall (str, pos, msg, state)
+ if not state.msg then
+ state.msg = msg .. " at " .. loc (str, pos)
+ state.pos = pos
+ end
+ return false
+ end
+
+ local function Err (msg)
+ return g.Cmt (g.Cc (msg) * g.Carg (2), ErrorCall)
+ end
+
+ local SingleLineComment = P"//" * (1 - S"\n\r")^0
+ local MultiLineComment = P"/*" * (1 - P"*/")^0 * P"*/"
+ local Space = (S" \n\r\t" + P"\239\187\191" + SingleLineComment + MultiLineComment)^0
+
+ local PlainChar = 1 - S"\"\\\n\r"
+ local EscapeSequence = (P"\\" * g.C (S"\"\\/bfnrt" + Err "unsupported escape sequence")) / escapechars
+ local HexDigit = R("09", "af", "AF")
+ local function UTF16Surrogate (match, pos, high, low)
+ high, low = tonumber (high, 16), tonumber (low, 16)
+ if 0xD800 <= high and high <= 0xDBff and 0xDC00 <= low and low <= 0xDFFF then
+ return true, unichar ((high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000)
+ else
+ return false
+ end
+ end
+ local function UTF16BMP (hex)
+ return unichar (tonumber (hex, 16))
+ end
+ local U16Sequence = (P"\\u" * g.C (HexDigit * HexDigit * HexDigit * HexDigit))
+ local UnicodeEscape = g.Cmt (U16Sequence * U16Sequence, UTF16Surrogate) + U16Sequence/UTF16BMP
+ local Char = UnicodeEscape + EscapeSequence + PlainChar
+ local String = P"\"" * g.Cs (Char ^ 0) * (P"\"" + Err "unterminated string")
+ local Integer = P"-"^(-1) * (P"0" + (R"19" * R"09"^0))
+ local Fractal = P"." * R"09"^0
+ local Exponent = (S"eE") * (S"+-")^(-1) * R"09"^1
+ local Number = (Integer * Fractal^(-1) * Exponent^(-1))/str2num
+ local Constant = P"true" * g.Cc (true) + P"false" * g.Cc (false) + P"null" * g.Carg (1)
+ local SimpleValue = Number + String + Constant
+ local ArrayContent, ObjectContent
+
+ -- The functions parsearray and parseobject parse only a single value/pair
+ -- at a time and store them directly to avoid hitting the LPeg limits.
+ local function parsearray (str, pos, nullval, state)
+ local obj, cont
+ local npos
+ local t, nt = {}, 0
+ repeat
+ obj, cont, npos = pegmatch (ArrayContent, str, pos, nullval, state)
+ if not npos then break end
+ pos = npos
+ nt = nt + 1
+ t[nt] = obj
+ until cont == 'last'
+ return pos, setmetatable (t, state.arraymeta)
+ end
+
+ local function parseobject (str, pos, nullval, state)
+ local obj, key, cont
+ local npos
+ local t = {}
+ repeat
+ key, obj, cont, npos = pegmatch (ObjectContent, str, pos, nullval, state)
+ if not npos then break end
+ pos = npos
+ t[key] = obj
+ until cont == 'last'
+ return pos, setmetatable (t, state.objectmeta)
+ end
+
+ local Array = P"[" * g.Cmt (g.Carg(1) * g.Carg(2), parsearray) * Space * (P"]" + Err "']' expected")
+ local Object = P"{" * g.Cmt (g.Carg(1) * g.Carg(2), parseobject) * Space * (P"}" + Err "'}' expected")
+ local Value = Space * (Array + Object + SimpleValue)
+ local ExpectedValue = Value + Space * Err "value expected"
+ ArrayContent = Value * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp()
+ local Pair = g.Cg (Space * String * Space * (P":" + Err "colon expected") * ExpectedValue)
+ ObjectContent = Pair * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp()
+ local DecodeValue = ExpectedValue * g.Cp ()
+
+ function json.decode (str, pos, nullval, ...)
+ local state = {}
+ state.objectmeta, state.arraymeta = optionalmetatables(...)
+ local obj, retpos = pegmatch (DecodeValue, str, pos, nullval, state)
+ if state.msg then
+ return nil, state.pos, state.msg
+ else
+ return obj, retpos
+ end
+ end
+
+ -- use this function only once:
+ json.use_lpeg = function () return json end
+
+ json.using_lpeg = true
+
+ return json -- so you can get the module using json = require "dkjson".use_lpeg()
+end
+
+if always_try_using_lpeg then
+ pcall (json.use_lpeg)
+end
+
+return json
+
diff --git a/paper/scholar-filters/json-ld.lua b/paper/scholar-filters/json-ld.lua
new file mode 100644
index 0000000..b9cd101
--- /dev/null
+++ b/paper/scholar-filters/json-ld.lua
@@ -0,0 +1,233 @@
+-- json-ld.lua: add a JSON-LD metadata field describing the document.
+--
+-- Copyright (c) 2017-2018 Albert Krewinkel
+--
+-- This program is free software; you can redistribute it and/or modify it
+-- under the terms of the GNU public license version 2 or later.
+-- See the LICENSE file for details.
+local SCRIPT_DIR = PANDOC_SCRIPT_FILE:gsub('/[^/]*$', '')
+
+package.path = SCRIPT_DIR .. '/?.lua;' .. package.path
+
+local json = require "dkjson"
+local List = require 'pandoc.List'
+
+local function stringify(x)
+ if x == nil then
+ return nil
+ elseif type(x) == 'string' then
+ return x
+ end
+ return pandoc.utils.stringify(x)
+end
+
+local function Organizations(orgs)
+ local orgs_json = {}
+ for i, org in ipairs(orgs) do
+ orgs_json[i] = {
+ ["@type"] = "Organization",
+ ["name"] = org.name and stringify(org.name),
+ ['url'] = org.url and stringify(org.url),
+ }
+ end
+ return orgs_json
+end
+
+local function Authors(authors)
+ local authors_json = pandoc.MetaList{}
+ for i, author in ipairs(authors) do
+ authors_json[i] = {
+ ['@type'] = "Person",
+ ['@id'] = authors[i].orcid and
+ ("https://orcid.org/" .. stringify(authors[i].orcid)),
+ ["name"] = author.name and stringify(author.name),
+ ["affiliation"] = author.institute and Organizations(author.institute),
+ ['email'] = author.email and stringify(author.email),
+ ['url'] = author.url and stringify(author.url),
+ }
+ end
+ return authors_json
+end
+
+local function Cito (bibjson, cites_by_cito_property)
+ function find_citation(id)
+ -- sloooow
+ for i = 1, #bibjson do
+ if bibjson[i].id == id then
+ return bibjson[i]
+ end
+ end
+ end
+
+ local result = {}
+ local bibentry, citation_ld
+ for citation_type, typed_citation_ids in pairs(cites_by_cito_property) do
+ for i = 1, #typed_citation_ids do
+ bibentry = find_citation(typed_citation_ids[i])
+ if bibentry and bibentry.DOI then
+ citation_ld = {
+ ["@id"] = "http://dx.doi.org/" .. bibentry.DOI
+ }
+ cito_type_str = "cito:" .. citation_type
+ if not result[cito_type_str] then
+ result[cito_type_str] = {}
+ end
+ table.insert(result[cito_type_str], citation_ld)
+ end
+ end
+ end
+ return result
+end
+
+local function Citations (bibjson, citation_ids)
+ function find_citation(id)
+ -- sloooow
+ for i = 1, #bibjson do
+ if bibjson[i].id == id then
+ return bibjson[i]
+ end
+ end
+ end
+
+ function CitationSchema(record)
+ local type
+ if record.type == "report" then
+ type = "Report"
+ elseif record.type == "article-journal" then
+ type = "ScholarlyArticle"
+ else
+ type = "Article"
+ end
+
+ local authors = {}
+ if record.author then
+ for i = 1, #record.author do
+ local name = {
+ record.author[i].family,
+ record.author[i].given
+ }
+ authors[i] = {
+ name = table.concat(name, ", ")
+ }
+ end
+ end
+
+ return {
+ ["@context"] = {
+ ["@vocab"] = "http://schema.org/",
+ ["title"] = "headline",
+ ["page"] = "pagination",
+ ["date"] = "datePublished",
+ ["publisher"] = "publisher",
+ ["author"] = "author",
+ },
+ ["@type"] = type,
+ ["@id"] = record.DOI and ("http://dx.doi.org/" .. record.DOI),
+ ["title"] = record.title,
+ ["author"] = Authors(authors),
+ ["date"] = record.issued and
+ record.issued["date-parts"] and
+ table.concat(record.issued["date-parts"][1], "-"),
+ ["publisher"] = record.publisher and
+ { ["@type"] = "Organization", ["name"] = record.publisher },
+ ["page"] = record.page,
+ }
+ end
+
+ local res = {}
+ for cit_id, _ in pairs(citation_ids) do
+ local citation_record = find_citation(cit_id)
+ if citation_record then
+ res[#res + 1] = CitationSchema(citation_record)
+ end
+ end
+ return res
+end
+
+function json_ld (meta)
+ local default_image = "https://upload.wikimedia.org/wikipedia/commons/f/fa/Globe.svg"
+ local accessible_for_free
+ if meta.accessible_for_free ~= nil then
+ accessible_for_free = meta.accessible_for_free
+ else
+ accessible_for_free = true
+ end
+ local context = {
+ ["@vocab"] = "http://schema.org/",
+ ["cito"] = "http://purl.org/spar/cito/",
+ ["author"] = "author",
+ ["name"] = "name",
+ ["title"] = "headline",
+ ["subtitle"] = "alternativeTitle",
+ ["publisher"] = "publisher",
+ ["date"] = "datePublished",
+ ["isFree"] = accessible_for_free and "isAccessibleForFree" or nil,
+ ["image"] = "image",
+ ["citation"] = "citation",
+ }
+
+ local citation_ids = {}
+ for _, ids in pairs(meta.cito_cites) do
+ for _, id in ipairs(ids) do citation_ids[id] = true end
+ end
+ local result = {
+ ["@context"] = context,
+ ["@type"] = "ScholarlyArticle",
+ ["author"] = Authors(meta.author),
+ ["name"] = stringify(meta.title),
+ ["title"] = stringify(meta.title),
+ ["subtitle"] = meta.subtitle and stringify(meta.subtitle),
+ ["date"] = meta.date and stringify(meta.date) or os.date("%Y-%m-%d"),
+ -- -- ["image"] = meta.image or default_image,
+ ["isFree"] = accessible_for_free,
+ ["citation"] = Citations(meta.bibliography_records, citation_ids),
+ }
+ for k, v in pairs(Cito(meta.bibliography_records, meta.cito_cites)) do
+ result[k] = v
+ end
+ return result
+end
+
+local function bibliography(bibfilename)
+ if not bibfilename then
+ return {}
+ end
+ local bibfile = io.popen("pandoc-citeproc --bib2json " .. bibfilename, "r")
+ local jsonstr = bibfile:read("*a")
+ bibfile:close()
+ return json.decode(jsonstr)
+end
+
+local function institute_resolver (institutes)
+ return function (inst_idx)
+ return institutes[tonumber(stringify(inst_idx))]
+ end
+end
+
+function Meta (meta)
+ local function clone (obj)
+ local result = {}
+ for k, v in pairs(obj) do result[k] = v end
+ return result
+ end
+ local metadata = clone(meta)
+
+ local resolve_institute = function (idx)
+ return meta.institute[tonumber(idx)]
+ end
+ local tmp_authors = {}
+ for i, author_orig in ipairs(meta.author) do
+ local author = clone(author_orig)
+ if author.institute then
+ author.institute = List.map(author.institute, resolve_institute)
+ end
+ tmp_authors[i] = author
+ end
+ metadata.author = tmp_authors
+
+ metadata.bibliography_records = bibliography(meta.bibliography)
+ local jsonld_object = json_ld(metadata)
+ meta.jsonld = json.encode(jsonld_object)
+
+ return meta
+end
diff --git a/paper/scholar-filters/template-helper.lua b/paper/scholar-filters/template-helper.lua
new file mode 100644
index 0000000..86af339
--- /dev/null
+++ b/paper/scholar-filters/template-helper.lua
@@ -0,0 +1,43 @@
+--[[
+template-helper: generate meta fields to be used in templates.
+
+Copyright © 2017–2018 Albert Krewinkel
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+]]
+
+local List = require 'pandoc.List'
+
+function Meta (meta)
+ local function resolve_institute (idx)
+ return meta.institute[tonumber(idx)]
+ end
+
+ for i, author in ipairs(meta.author) do
+ local institute_indices = List:new(author.institute)
+ local institutes = institute_indices:map(resolve_institute)
+ author.institute_indices = institute_indices
+ author.institute = institutes
+ meta.has_equal_contributors = meta.has_equal_contributors
+ or author.equal_contributor
+ meta.has_correspondence = meta.has_correspondence
+ or author.correspondence and author.email
+ end
+
+ for i, institute in ipairs(meta.institute) do
+ institute.index = tostring(i)
+ end
+
+ return meta
+end
+
diff --git a/paper/templates/images/arrow-down.png b/paper/templates/images/arrow-down.png
new file mode 100644
index 0000000..5c55c6a
--- /dev/null
+++ b/paper/templates/images/arrow-down.png
Binary files differ
diff --git a/paper/templates/images/octocat-small.png b/paper/templates/images/octocat-small.png
new file mode 100644
index 0000000..57c1e44
--- /dev/null
+++ b/paper/templates/images/octocat-small.png
Binary files differ
diff --git a/paper/templates/images/pdf.png b/paper/templates/images/pdf.png
new file mode 100644
index 0000000..43270eb
--- /dev/null
+++ b/paper/templates/images/pdf.png
Binary files differ
diff --git a/paper/templates/pandoc-scholar.html b/paper/templates/pandoc-scholar.html
new file mode 100644
index 0000000..6b457c7
--- /dev/null
+++ b/paper/templates/pandoc-scholar.html
@@ -0,0 +1,119 @@
+<!doctype html>
+<!--
+Template created by Andrew G. York, based on this theme by Diana Mounter:
+https://github.com/broccolini/dinky, which mentioned that
+attribution is appreciated. Thanks, broccolini! -->
+<html lang="en">
+<head>
+ <base target="_blank"/>
+ <meta charset="utf-8"/>
+ <meta http-equiv="X-UA-Compatible" content="IE=edge"/>
+ <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no"/>
+$for(author-meta)$
+ <meta name="author" content="$author-meta$"/>
+$endfor$
+$if(date-meta)$
+ <meta name="dcterms.date" content="$date-meta$"/>
+$endif$
+$if(keywords)$
+ <meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$"/>
+$endif$
+ <title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title>
+ <style type="text/css">code{white-space: pre;}</style>
+$if(quotes)$
+ <style type="text/css">q { quotes: "“" "”" "‘" "’"; }</style>
+$endif$
+$if(highlighting-css)$
+ <style type="text/css">
+$highlighting-css$
+ </style>
+$endif$
+$for(css)$
+ <link rel="stylesheet" href="$css$">
+$endfor$
+$if(math)$
+$if(mathjax)$
+$-- MathJax is handled specially. We need to add the data-external attribute
+$-- so it doesn't get inlined (and thus broken) by the --self-contained option.
+$-- (2.7.2 is the default MathJax version as of Pandoc 2.2.1.)
+ <script data-external="1" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js?config=TeX-AMS_CHTML-full"></script>
+$else$
+ $math$
+$endif$
+$endif$
+ <!--[if lt IE 9]>
+ <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
+ <![endif]-->
+$for(header-includes)$
+ $header-includes$
+$endfor$
+</head>
+<body>
+<div class="wrapper">
+$if(project)$
+<header class="page-header">
+ <h1 class="header">$project.title$</h1>
+ <ul>
+$if(project.zip-url)$
+ <li class="download"><a class="buttons" href="$project.zip-url$">Download ZIP</a></li>
+$endif$
+$if(project.github-url)$
+ <li><a class="buttons github" href="$project.github-url$">View On GitHub</a></li>
+$endif$
+$if(project.pdf-url)$
+ <li><a class="buttons pdf" href="$project.pdf-url$">Download PDF</a></li>
+$endif$
+ </ul>
+ <p class="header">This project is maintained by $project.maintainer$</p>
+</header>
+$endif$
+<article typeof="ScholarlyArticle" vocab="http://schema.org/">
+ <!-- <header class="article-header"> -->
+<h1 property="headline">$title$</h1>
+$if(subtitle)$
+<p property="alternativeHeadline" class="subtitle">$subtitle$</p>
+$endif$
+<!-- </header> -->
+<p class="author-list">
+$for(author)$
+ $if(author.last)$and $endif$<span property="author" typeof="Person">
+ $author.name$</span><sup>$if(author.correspondence)$$if(author.email)$<a href="mailto:$author.email$">✉</a> $endif$$endif$$for(author.institute_indices)$$author.institute_indices$$sep$,$endfor$$if(author.equal_contributor)$,$if(equal_contributor_symbol)$$equal_contributor_symbol$$else$*$endif$$endif$</sup>$sep$,
+$endfor$
+</p>
+<div class="author_affiliations">
+$for(institute)$
+ <div class="affiliation"><sup>$institute.index$</sup>$institute.name$$if(institute.address)$, $institute.address$$endif$
+ </div>
+$endfor$
+</div>
+<div class="author-info">
+ $if(has_equal_contributors)$
+ <div class="author-contrib">
+ <sup>$if(equal_contributor_symbol)$$equal_contributor_symbol$$else$*$endif$</sup>These authors contributed equally to this work
+ </div>
+ $endif$
+ $if(has_correspondence)$
+ <div class="author-correspondence">
+ Correspondence: $for(author)$$if(author.correspondence)$$if(author.email)$$author.name$ <a href="mailto:$author.email$">&lt;$author.email$&gt;</a> $endif$$endif$$endfor$
+ </div>
+ $endif$
+</div>
+$if(abstract)$<p class="abstract" property="description">$abstract$</p>$endif$
+$if(doi)$<p><a href="https://doi.org/$doi$">doi: $doi$</a></p>$endif$
+
+$for(include-before)$
+$include-before$
+
+$endfor$
+
+<div property="articleBody" class="article-body">
+$body$
+</div>
+</article>
+<footer>
+ <p><small>Generated using <a href="https://github.com/pandoc-scholar/pandoc-scholar">pandoc scholar</a></small></p>
+</footer>
+</div>
+ <!--[if !IE]><script>fixScale(document);</script><![endif]-->
+</body>
+</html>
diff --git a/paper/templates/pandoc-scholar.latex b/paper/templates/pandoc-scholar.latex
new file mode 100644
index 0000000..11e10b9
--- /dev/null
+++ b/paper/templates/pandoc-scholar.latex
@@ -0,0 +1,309 @@
+\documentclass[$if(fontsize)$$fontsize$,$endif$$if(lang)$$babel-lang$,$endif$$if(papersize)$$papersize$paper,$endif$$for(classoption)$$classoption$$sep$,$endfor$]{$documentclass$}
+$if(beamerarticle)$
+\usepackage{beamerarticle} % needs to be loaded first
+$endif$
+$if(fontfamily)$
+\usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$}
+$else$
+\usepackage{lmodern}
+$endif$
+$if(linestretch)$
+\usepackage{setspace}
+\setstretch{$linestretch$}
+$endif$
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\usepackage{fixltx2e} % provides \textsubscript
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[$if(fontenc)$$fontenc$$else$T1$endif$]{fontenc}
+ \usepackage[utf8]{inputenc}
+$if(euro)$
+ \usepackage{eurosym}
+$endif$
+\else % if luatex or xelatex
+ \ifxetex
+ \usepackage{mathspec}
+ \else
+ \usepackage{fontspec}
+ \fi
+ \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
+$for(fontfamilies)$
+ \newfontfamily{$fontfamilies.name$}[$fontfamilies.options$]{$fontfamilies.font$}
+$endfor$
+$if(euro)$
+ \newcommand{\euro}{€}
+$endif$
+$if(mainfont)$
+ \setmainfont[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$]{$mainfont$}
+$endif$
+$if(sansfont)$
+ \setsansfont[$for(sansfontoptions)$$sansfontoptions$$sep$,$endfor$]{$sansfont$}
+$endif$
+$if(monofont)$
+ \setmonofont[Mapping=tex-ansi$if(monofontoptions)$,$for(monofontoptions)$$monofontoptions$$sep$,$endfor$$endif$]{$monofont$}
+$endif$
+$if(mathfont)$
+ \setmathfont(Digits,Latin,Greek)[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$}
+$endif$
+$if(CJKmainfont)$
+ \usepackage{xeCJK}
+ \setCJKmainfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$}
+$endif$
+\fi
+% use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+% use microtype if available
+\IfFileExists{microtype.sty}{%
+\usepackage{microtype}
+\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+$if(geometry)$
+\usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry}
+$endif$
+\usepackage[unicode=true]{hyperref}
+$if(colorlinks)$
+\PassOptionsToPackage{usenames,dvipsnames}{color} % color is loaded by hyperref
+$endif$
+\hypersetup{
+$if(title-meta)$
+ pdftitle={$title-meta$},
+$endif$
+$if(author-meta)$
+ pdfauthor={$author-meta$},
+$endif$
+$if(keywords)$
+ pdfkeywords={$for(keywords)$$keywords$$sep$; $endfor$},
+$endif$
+$if(colorlinks)$
+ colorlinks=true,
+ linkcolor=$if(linkcolor)$$linkcolor$$else$Maroon$endif$,
+ citecolor=$if(citecolor)$$citecolor$$else$Blue$endif$,
+ urlcolor=$if(urlcolor)$$urlcolor$$else$Blue$endif$,
+$else$
+ pdfborder={0 0 0},
+$endif$
+ breaklinks=true}
+\urlstyle{same} % don't use monospace font for urls
+$if(lang)$
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[shorthands=off,$for(babel-otherlangs)$$babel-otherlangs$,$endfor$main=$babel-lang$]{babel}
+$if(babel-newcommands)$
+ $babel-newcommands$
+$endif$
+\else
+ \usepackage{polyglossia}
+ \setmainlanguage[$polyglossia-lang.options$]{$polyglossia-lang.name$}
+$for(polyglossia-otherlangs)$
+ \setotherlanguage[$polyglossia-otherlangs.options$]{$polyglossia-otherlangs.name$}
+$endfor$
+\fi
+$endif$
+$if(natbib)$
+\usepackage{natbib}
+\bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$}
+$endif$
+$if(biblatex)$
+\usepackage[$if(biblio-style)$style=$biblio-style$,$endif$$for(biblatexoptions)$$biblatexoptions$$sep$,$endfor$]{biblatex}
+$for(bibliography)$
+\addbibresource{$bibliography$}
+$endfor$
+$endif$
+$if(listings)$
+\usepackage{listings}
+$endif$
+$if(lhs)$
+\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{}
+$endif$
+$if(highlighting-macros)$
+$highlighting-macros$
+$endif$
+$if(verbatim-in-note)$
+\usepackage{fancyvrb}
+\VerbatimFootnotes % allows verbatim text in footnotes
+$endif$
+$if(tables)$
+\usepackage{longtable,booktabs}
+$endif$
+$if(graphics)$
+\usepackage{graphicx,grffile}
+\makeatletter
+\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
+\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
+\makeatother
+% Scale images if necessary, so that they will not overflow the page
+% margins by default, and it is still possible to overwrite the defaults
+% using explicit options in \includegraphics[width, height, ...]{}
+\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
+$endif$
+$if(links-as-notes)$
+% Make links footnotes instead of hotlinks:
+\renewcommand{\href}[2]{#2\footnote{\url{#1}}}
+$endif$
+$if(strikeout)$
+\usepackage[normalem]{ulem}
+% avoid problems with \sout in headers with hyperref:
+\pdfstringdefDisableCommands{\renewcommand{\sout}{}}
+$endif$
+$if(indent)$
+$else$
+\IfFileExists{parskip.sty}{%
+\usepackage{parskip}
+}{% else
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{6pt plus 2pt minus 1pt}
+}
+$endif$
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+\providecommand{\tightlist}{%
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+$if(numbersections)$
+\setcounter{secnumdepth}{$if(secnumdepth)$$secnumdepth$$else$5$endif$}
+$else$
+\setcounter{secnumdepth}{0}
+$endif$
+$if(subparagraph)$
+$else$
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+\let\oldparagraph\paragraph
+\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+\let\oldsubparagraph\subparagraph
+\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+$endif$
+$if(dir)$
+\ifxetex
+ % load bidi as late as possible as it modifies e.g. graphicx
+ $if(latex-dir-rtl)$
+ \usepackage[RTLdocument]{bidi}
+ $else$
+ \usepackage{bidi}
+ $endif$
+\fi
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \TeXXeTstate=1
+ \newcommand{\RL}[1]{\beginR #1\endR}
+ \newcommand{\LR}[1]{\beginL #1\endL}
+ \newenvironment{RTL}{\beginR}{\endR}
+ \newenvironment{LTR}{\beginL}{\endL}
+\fi
+$endif$
+$for(header-includes)$
+$header-includes$
+$endfor$
+
+$if(title)$
+\title{$title$$if(thanks)$\thanks{$thanks$}$endif$}
+$endif$
+$if(subtitle)$
+\usepackage{etoolbox}
+\makeatletter
+\providecommand{\subtitle}[1]{% add subtitle to \maketitle
+ \apptocmd{\@title}{\par {\large #1}}{}{}
+}
+\makeatother
+\subtitle{$subtitle$}
+$endif$
+\usepackage{authblk}
+$for(author)$
+\author[$for(author.institute_indices)$%
+ $author.institute_indices$%
+ $sep$,$endfor$]{%
+ $author.name$%
+ $if(author.correspondence)$$if(author.email)$%
+ \textsuperscript{*\,}%
+ %$endif$$endif$%
+ $if(author.equal_contributor)$
+ \textsuperscript{$if(equal_contributor_symbol)$$equal_contributor_symbol$$else$\textdagger$endif$\,}%
+ $endif$%
+}
+$endfor$
+$for(institute)$
+\affil[$institute.index$]{\normalsize $institute.name$, \footnotesize $institute.address$}
+$endfor$
+\date{$date$}
+
+\makeatletter
+\def\@maketitle{%
+ \newpage \null \vskip 2em
+ \begin {center}%
+ \let \footnote \thanks
+ {\LARGE \@title \par}%
+ \vskip 1.5em%
+ {\large \lineskip .5em%
+ \begin {tabular}[t]{c}%
+ \@author
+ \end {tabular}\par}%
+ $if(has_equal_contributors)$
+ \vskip 0.5em{\textsuperscript{$if(equal_contributor_symbol)$$equal_contributor_symbol$$else$\textdagger$endif$}\,%
+ These authors contributed equally to this work.}
+ $endif$
+ $if(has_correspondence)$
+ \vskip 0.2em{\textsuperscript{*}\,Correspondence:
+ $for(author)$$if(author.correspondence)$$if(author.email)$
+ $author.name$ <$author.email$>\\
+ $endif$$endif$$endfor$}%
+ %$endif$
+ \vskip 1em{\large \@date}%
+ \end {center}%
+ \par
+ \vskip 1.5em}
+\makeatother
+
+\begin{document}
+
+$if(title)$
+\maketitle
+$endif$
+
+$if(abstract)$
+\begin{abstract}
+$abstract$
+\end{abstract}
+$endif$
+
+$for(include-before)$
+$include-before$
+
+$endfor$
+$if(toc)$
+{
+$if(colorlinks)$
+\hypersetup{linkcolor=$if(toccolor)$$toccolor$$else$black$endif$}
+$endif$
+\setcounter{tocdepth}{$toc-depth$}
+\tableofcontents
+}
+$endif$
+$if(lot)$
+\listoftables
+$endif$
+$if(lof)$
+\listoffigures
+$endif$
+$body$
+
+$if(natbib)$
+$if(bibliography)$
+$if(biblio-title)$
+$if(book-class)$
+\renewcommand\bibname{$biblio-title$}
+$else$
+\renewcommand\refname{$biblio-title$}
+$endif$
+$endif$
+\bibliography{$for(bibliography)$$bibliography$$sep$,$endfor$}
+
+$endif$
+$endif$
+$if(biblatex)$
+\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$
+
+$endif$
+$for(include-after)$
+$include-after$
+
+$endfor$
+\end{document}
diff --git a/paper/templates/styles/pandoc-scholar.css b/paper/templates/styles/pandoc-scholar.css
new file mode 100644
index 0000000..7588b69
--- /dev/null
+++ b/paper/templates/styles/pandoc-scholar.css
@@ -0,0 +1,478 @@
+@import url(https://fonts.googleapis.com/css?family=Arvo:400,700,400italic);
+
+/* MeyerWeb Reset */
+
+html, body, div, span, applet, object, iframe,
+h1, h2, h3, h4, h5, h6, p, blockquote, pre,
+a, abbr, acronym, address, big, cite, code,
+del, dfn, em, img, ins, kbd, q, s, samp,
+small, strike, strong, sub, sup, tt, var,
+b, u, i, center,
+dl, dt, dd, ol, ul, li,
+fieldset, form, label, legend,
+table, caption, tbody, tfoot, thead, tr, th, td,
+article, aside, canvas, details, embed,
+figure, figcaption, footer, header, hgroup,
+article, menu, nav, output, ruby, section, summary,
+time, mark, audio, video {
+ margin: 0;
+ padding: 0;
+ border: 0;
+ font: inherit;
+ vertical-align: baseline;
+}
+
+
+/* Base text styles */
+
+body {
+ padding: 10px 50px 0 0;
+ font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+ font-size: 15px;
+ color: #030303;
+ background-color: #FCFBF8;
+ margin: 0;
+ line-height: 1.8em;
+ -webkit-font-smoothing: antialiased;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #232323;
+ margin: 36px 0 10px;
+}
+
+p, ul, ol, table, dl {
+ margin: 0 0 22px;
+}
+
+sub, sup {
+ font-size: 80%;
+}
+
+sub {
+ vertical-align: sub;
+}
+
+sup {
+ vertical-align: sup;
+}
+
+.author-list {
+ margin:0 0 0px;
+ font-weight: 700;
+}
+
+.author-affiliations {
+ margin:0 0 0px;
+ font-style: italic;
+}
+
+.contact_email {
+ font-style: italic;
+}
+
+.abstract {
+ margin: 0% 1% 2%;
+ font-weight: 700;
+}
+
+h1, h2, h3 {
+ border-bottom: 1px solid #ccc;
+ font-family: Arvo, Monaco, serif;
+ font-weight: normal;
+ line-height: 1.3;
+ padding-bottom: 5px;
+}
+
+h1 {
+ font-size: 30px;
+}
+
+h2 {
+ font-size: 24px;
+}
+
+h3 {
+ font-size: 18px;
+}
+
+h4, h5 {
+ font-family: Arvo, Monaco, serif;
+ font-weight: 700;
+}
+
+h6 {
+ font-family: Arvo, Monaco, serif;
+ font-weight: 200;
+}
+
+a {
+ font-weight:200;
+ text-decoration:none;
+}
+
+a:hover {
+ text-decoration: underline;
+}
+
+a small {
+ font-size: 12px;
+}
+
+em {
+ font-style: italic;
+}
+
+strong {
+ font-weight:700;
+}
+
+sup {
+ vertical-align: super;
+ font-size: smaller;
+}
+
+ul {
+ list-style-position: inside;
+ list-style: disc;
+ padding-left: 25px;
+}
+
+ol {
+ list-style-position: inside;
+ list-style: decimal;
+ padding-left: 25px;
+}
+
+blockquote {
+ margin: 0;
+ padding: 0 0 0 20px;
+ font-style: italic;
+}
+
+dl, dt, dd, dl p {
+ font-color: #444;
+}
+
+dl dt {
+ font-weight: bold;
+}
+
+dl dd {
+ padding-left: 20px;
+ font-style: italic;
+}
+
+dl p {
+ padding-left: 20px;
+ font-style: italic;
+}
+
+hr {
+ border: 0;
+ background: #ccc;
+ height: 1px;
+ margin: 0 0 24px;
+}
+
+/* Images */
+
+img {
+ position: relative;
+ margin: 0 auto;
+ height: auto;
+ max-width: 100%;
+ padding: 0px;
+ margin: 0px 0 0px 0;
+ border: 0px solid #ccc;
+}
+
+p img {
+ display: inline;
+ margin: 0;
+ padding: 0;
+ vertical-align: middle;
+ text-align: center;
+ border: none;
+}
+
+figure {
+ border: 1px solid #ccc;
+ background: #FFFFFF;
+}
+
+figcaption {
+ font-size: 12px;
+ background: #FFFFFF;
+ line-height: 150%;
+ margin-right: 1%;
+ margin-left: 1%;
+}
+
+/* Code blocks */
+
+code, pre {
+ font-family: Monaco, "Bitstream Vera Sans Mono", "Lucida Console", Terminal, monospace;
+ color: #000;
+ background: #e7e7e7;
+ font-size: 12px;
+}
+
+pre {
+ padding: 4px 12px;
+ border-radius:4px;
+ border:1px solid #D7D8C8;
+ overflow: auto;
+ overflow-y: hidden;
+ margin-bottom: 32px;
+}
+
+
+/* Tables */
+
+table {
+ width: 100%;
+ border: 1px solid #ccc;
+ margin-bottom: 32px;
+ text-align: left;
+ }
+
+table.figure_controls {
+ font-size: 12px;
+ line-height: 100%;
+ margin-bottom: 0px;
+}
+
+th {
+ background: #232323;
+ color: #FDFEFB;
+ font-family: 'Arvo', Helvetica, Arial, sans-serif;
+ font-size: 18px;
+ font-weight: normal;
+ padding: 10px;
+}
+
+td {
+ background: #eee;
+ padding: 0px;
+}
+
+
+/* Wrapper */
+.wrapper {
+ width:960px;
+}
+
+
+/* Header */
+
+.page-header {
+ background-color: #474747;
+ border-bottom-right-radius: 4px;
+ border-top-right-radius: 4px;
+ border: 1px solid #000;
+ color: #FDFDFB;
+ float: left;
+ margin: 30px 25px 0 0;
+ padding: 34px 25px 22px 50px;
+ position: fixed;
+ width: 170px;
+ -webkit-font-smoothing: antialiased;
+}
+
+.subtitle {
+ font-size: 16px;
+}
+
+.page-header h1 {
+ font-family: Arvo, sans-serif;
+ font-size: 30px;
+ font-weight: 300;
+ line-height: 1.3em;
+ border-bottom: none;
+ margin-top: 0;
+}
+
+
+.page-header h1,
+.page-header a {
+ color: #fff;
+}
+
+.page-header a {
+ text-decoration: underline;
+}
+
+a.name {
+ white-space: nowrap;
+}
+
+.page-header ul {
+ list-style:none;
+ padding:0;
+}
+
+.page-header li {
+ list-style-type: none;
+ width: 135px;
+ height: 15px;
+ margin-bottom: 12px;
+ line-height: 1em;
+ padding: 6px 6px 6px 7px;
+ background: #1100AF;
+ background: -moz-linear-gradient(top, #1100AF 0%, #110082 100%);
+ background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%, #dddddd));
+ background: -webkit-linear-gradient(top, #1100AF 0%,#110082 100%);
+ background: -o-linear-gradient(top, #1100AF 0%,#110082 100%);
+ background: -ms-linear-gradient(top, #1100AF 0%,#110082 100%);
+ background: linear-gradient(top, #1100AF 0%,#110082 100%);
+ border-radius:4px;
+ border:1px solid #0D0D0D;
+ -webkit-box-shadow: inset 0px 1px 1px 0 rgba(38,2,233, 1);
+ box-shadow: inset 0px 1px 1px 0 rgba(38,2,233, 1);
+}
+
+.page-header li:hover {
+ background: #1D00C3;
+ background: -moz-linear-gradient(top, #1D00C3 0%, #190195 100%);
+ background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd));
+ background: -webkit-linear-gradient(top, #1D00C3 0%,#190195 100%);
+ background: -o-linear-gradient(top, #1D00C3 0%,#190195 100%);
+ background: -ms-linear-gradient(top, #1D00C3 0%,#190195 100%);
+ background: linear-gradient(top, #1D00C3 0%,#190195 100%);
+}
+
+.buttons {
+ -webkit-font-smoothing: antialiased;
+ background: url(../images/arrow-down.png) no-repeat;
+ font-weight: normal;
+ height: 30px;
+ padding: 2px 2px 2px 22px;
+ text-shadow: rgba(0, 0, 0, 0.4) 0 -1px 0;
+}
+
+a.buttons {
+ text-decoration: none;
+}
+
+.buttons.github {
+ background: url(../images/octocat-small.png) no-repeat 1px;
+}
+
+.buttons.pdf {
+ background: url(../images/pdf.png) no-repeat 1px;
+}
+
+.buttons:hover {
+ color: #fff;
+ text-decoration: none;
+}
+
+
+/* Article - for main page content */
+
+article {
+ width: 650px;
+ float: right;
+ padding-bottom: 50px;
+}
+
+
+/* Footer */
+
+footer {
+ width: 170px;
+ float: left;
+ position: fixed;
+ bottom: 10px;
+ padding-left: 50px;
+}
+
+@media print, screen and (max-width: 960px) {
+ div.wrapper {
+ width: auto;
+ margin: 0;
+ }
+
+ .page-header, article, footer {
+ float: none;
+ position: static;
+ width: auto;
+ }
+
+ footer {
+ border-top: 1px solid #ccc;
+ margin: 0 84px 0 50px;
+ padding: 0;
+ }
+
+ .page-header {
+ padding-right: 320px;
+ }
+
+ article {
+ padding: 20px 84px 20px 50px;
+ margin: 0 0 20px;
+ }
+
+ .page-header a small {
+ display: inline;
+ }
+
+ .page-header ul {
+ position: absolute;
+ right: 130px;
+ top: 84px;
+ }
+}
+
+@media print, screen and (max-width: 720px) {
+ body {
+ word-wrap:break-word;
+ }
+
+ .page-header {
+ padding: 10px 20px 0;
+ margin-right: 0;
+ }
+
+ article {
+ margin: 0 0 30px;
+ padding: 10px 0 10px 20px;
+ }
+
+ footer {
+ margin: 0 0 0 30px;
+ }
+
+ .page-header ul, .page-header p.view {
+ position: static;
+ }
+}
+
+@media print, screen and (max-width: 480px) {
+ .page-header ul li.download {
+ display: none;
+ }
+
+ footer {
+ margin: 0 0 0 20px;
+ }
+
+ footer a {
+ display:block;
+ }
+}
+
+@media print {
+ body {
+ padding:0.4in;
+ font-size:12pt;
+ color:#444;
+ }
+}
+
+.onlyprint {display: none;}
+@media print {
+ .onlyprint {display: block;}
+}
diff --git a/paper/writers/jsonld.lua b/paper/writers/jsonld.lua
new file mode 100644
index 0000000..697dc9f
--- /dev/null
+++ b/paper/writers/jsonld.lua
@@ -0,0 +1,14 @@
+--
+-- jsonld.lua
+--
+-- Copyright (c) 2017 Albert Krewinkel, Robert Winkler
+--
+-- This program is free software; you can redistribute it and/or modify it
+-- under the terms of the GNU public license version 2 or later.
+-- See the LICENSE file for details.
+
+function Doc (body, meta, variables)
+ return meta.jsonld
+end
+
+setmetatable(_G, {__index = function () return function () return '' end end})