',
+ html = '',
+ latex = '\\newpage{}',
+ ooxml = '',
+}
+
+local function pagebreaks_from_config (meta)
+ local html_class =
+ (meta.newpage_html_class and stringify(meta.newpage_html_class))
+ or os.getenv 'PANDOC_NEWPAGE_HTML_CLASS'
+ if html_class and html_class ~= '' then
+ pagebreak.html = string.format('', html_class)
+ end
+
+ local odt_style =
+ (meta.newpage_odt_style and stringify(meta.newpage_odt_style))
+ or os.getenv 'PANDOC_NEWPAGE_ODT_STYLE'
+ if odt_style and odt_style ~= '' then
+ pagebreak.odt = string.format('', odt_style)
+ end
+end
+
+--- Return a block element causing a page break in the given format.
+local function newpage(format)
+ if format == 'docx' then
+ return pandoc.RawBlock('openxml', pagebreak.ooxml)
+ elseif format:match 'latex' then
+ return pandoc.RawBlock('tex', pagebreak.latex)
+ elseif format:match 'html.*' then
+ return pandoc.RawBlock('html', pagebreak.html)
+ elseif format:match 'epub' then
+ return pandoc.RawBlock('html', pagebreak.epub)
+ else
+ -- fall back to insert a form feed character
+ return pandoc.Para{pandoc.Str '\f'}
+ end
+end
+
+local function is_newpage_command(command)
+ return command:match '^\\newpage%{?%}?$'
+ or command:match '^\\pagebreak%{?%}?$'
+end
+
+-- Filter function called on each RawBlock element.
+function RawBlock (el)
+ -- Don't do anything if the output is TeX
+ if FORMAT:match 'tex$' then
+ return nil
+ end
+ -- check that the block is TeX or LaTeX and contains only
+ -- \newpage or \pagebreak.
+ if el.format:match 'tex' and is_newpage_command(el.text) then
+ -- use format-specific pagebreak marker. FORMAT is set by pandoc to
+ -- the targeted output format.
+ return newpage(FORMAT)
+ end
+ -- otherwise, leave the block unchanged
+ return nil
+end
+
+-- Turning paragraphs which contain nothing but a form feed
+-- characters into line breaks.
+function Para (el)
+ if #el.content == 1 and el.content[1].text == '\f' then
+ return newpage(FORMAT)
+ end
+end
+
+return {
+ {Meta = pagebreaks_from_config},
+ {RawBlock = RawBlock, Para = Para}
+}
diff --git a/paper/lua-filters/pagebreak/sample.md b/paper/lua-filters/pagebreak/sample.md
new file mode 100644
index 0000000..dc49ce1
--- /dev/null
+++ b/paper/lua-filters/pagebreak/sample.md
@@ -0,0 +1,14 @@
+Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec
+hendrerit tempor tellus. Donec pretium posuere tellus.
+
+\newpage
+
+Cum sociis natoque penatibus et magnis dis parturient montes,
+nascetur ridiculus mus. Nulla posuere. Donec vitae dolor.
+
+
+
+Pellentesque dapibus suscipit ligula. Donec posuere augue in
+quam. Suspendisse potenti.
+
+Final paragraph without a preceding pagebreak.
diff --git a/paper/lua-filters/plantuml/Makefile b/paper/lua-filters/plantuml/Makefile
new file mode 100644
index 0000000..feff6f4
--- /dev/null
+++ b/paper/lua-filters/plantuml/Makefile
@@ -0,0 +1,3 @@
+test:
+ @pandoc --self-contained --lua-filter=plantuml.lua --metadata title=README readme.md -o output.html
+
diff --git a/paper/lua-filters/plantuml/output.html b/paper/lua-filters/plantuml/output.html
new file mode 100644
index 0000000..67c4b58
--- /dev/null
+++ b/paper/lua-filters/plantuml/output.html
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+ readme
+
+
+
+
+
PlantUML Pandoc filter
+
PlantUML Pandoc filter to process code blocks with class “plantuml” containing PlantUML notation into images.
+
+
For textual output formats, use –extract-media=DIR
+
For HTML formats, you may alternatively use –self-contained
download PlantUML from http://plantuml.com (needs JAVA)
+
3 ways to set the environment
+
+
plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder
+
set a Environment Variable PLANTUML with the path to plantuml.jar
+
+
Windows - with powershell: Set-Item env:PLANTUML “c:.jar”
+
+
change path to plantuml.jar in plantuml.lua
+
+
+
This script based on the example “Converting ABC code to music notation” from https://pandoc.org/lua-filters.html
+
This script was only tested with markdown to html on a windows environment!
+
+
diff --git a/paper/lua-filters/plantuml/plantuml.lua b/paper/lua-filters/plantuml/plantuml.lua
new file mode 100644
index 0000000..7903e6a
--- /dev/null
+++ b/paper/lua-filters/plantuml/plantuml.lua
@@ -0,0 +1,56 @@
+--[[
+# PlantUML Pandoc filter
+PlantUML Pandoc filter to process code blocks with class "plantuml" containing PlantUML notation into images.
+
+* For textual output formats, use --extract-media=DIR
+* For HTML formats, you may alternatively use --self-contained
+
+## Example in markdown-file
+```plantuml
+@startuml
+Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response
+Alice -> Bob: Another authentication Request Alice <-- Bob: another authentication Response @enduml
+```
+## Run pandoc
+```
+pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm
+```
+
+## Prerequisites
+* download PlantUML from http://plantuml.com (needs JAVA)
+* 3 ways to set the environment
+ 1. plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder
+ 2. set a Environment Variable PLANTUML with the path to plantuml.jar
+ * Windows - with powershell: Set-Item env:PLANTUML "c:\bin\plantuml.jar"
+ 3. change path to plantuml.jar in plantuml.lua
+
+This script based on the example "Converting ABC code to music notation" from https://pandoc.org/lua-filters.html
+**This script was only tested with markdown to html on a windows environment!**
+]]
+
+-- Path to PlantUML.jar
+-- if you use opinion 3 change the path to plantuml.jar like this:
+-- local plantumlPath = os.getenv("PLANTUML") or "c:\\bin\\plantuml.jar"
+local plantumlPath = os.getenv("PLANTUML") or "plantuml.jar"
+
+-- SVG has a much better quality
+-- local filetype = "png"
+-- local mimetype = "image/png"
+local filetype = "svg"
+local mimetype = "image/svg+xml"
+
+-- call plantuml.jar wit some parameters (see plantuml help)
+local function plantuml(puml, filetype, plantumlPath)
+ local final = pandoc.pipe("java", {"-jar", plantumlPath, "-t" .. filetype, "-pipe", "-charset", "UTF8"}, puml)
+ return final
+end
+
+-- search for class "plantuml" and replace with image
+function CodeBlock(block)
+ if block.classes[1] == "plantuml" then
+ local img = plantuml(block.text, filetype, plantumlPath)
+ local fname = pandoc.sha1(img) .. "." .. filetype
+ pandoc.mediabag.insert(fname, mimetype, img)
+ return pandoc.Para{ pandoc.Image({pandoc.Str("PlantUML Diagramm")}, fname) }
+ end
+end
diff --git a/paper/lua-filters/plantuml/readme.md b/paper/lua-filters/plantuml/readme.md
new file mode 100644
index 0000000..de5ba74
--- /dev/null
+++ b/paper/lua-filters/plantuml/readme.md
@@ -0,0 +1,30 @@
+# PlantUML Pandoc filter
+PlantUML Pandoc filter to process code blocks with class "plantuml" containing PlantUML notation into images.
+
+* For textual output formats, use --extract-media=DIR
+* For HTML formats, you may alternatively use --self-contained
+
+## Example in markdown-file
+```plantuml
+@startuml
+Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response
+Alice -> Bob: Another authentication Request Alice <-- Bob: another authentication Response
+@enduml
+```
+## Run pandoc
+```
+pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm
+```
+
+## Prerequisites
+* download PlantUML from http://plantuml.com (needs JAVA)
+* 3 ways to set the environment
+ 1. plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder
+ 2. set a Environment Variable PLANTUML with the path to plantuml.jar
+ * Windows - with powershell: Set-Item env:PLANTUML "c:\bin\plantuml.jar"
+ 3. change path to plantuml.jar in plantuml.lua
+
+
+This script based on the example "Converting ABC code to music notation" from https://pandoc.org/lua-filters.html
+
+This script was only tested with markdown to html on a windows environment!
diff --git a/paper/lua-filters/runtests.sh b/paper/lua-filters/runtests.sh
new file mode 100755
index 0000000..bbcbbba
--- /dev/null
+++ b/paper/lua-filters/runtests.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# argument is list of filters
+
+FILTERS=$*
+let err=0
+for d in $FILTERS ; do
+ make --no-print-directory -C $d test
+ if [ $? -eq 0 ]; then
+ echo "PASS $d"
+ else
+ echo "FAIL $d"
+ err=1
+ fi
+done
+exit $err
+
diff --git a/paper/lua-filters/scholarly-metadata/Makefile b/paper/lua-filters/scholarly-metadata/Makefile
new file mode 100644
index 0000000..fadf7dd
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/Makefile
@@ -0,0 +1,8 @@
+test: sample.md scholarly-metadata.lua
+ @pandoc --lua-filter=scholarly-metadata.lua --standalone --to=markdown $< \
+ | diff -u expected.md -
+
+expected.md: sample.md scholarly-metadata.lua
+ pandoc --lua-filter=scholarly-metadata.lua --standalone --output $@ $<
+
+.PHONY: test
diff --git a/paper/lua-filters/scholarly-metadata/README.md b/paper/lua-filters/scholarly-metadata/README.md
new file mode 100644
index 0000000..7fb1d2a
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/README.md
@@ -0,0 +1,91 @@
+# scholarly-metadata
+
+The filter turns metadata entries for authors and their
+affiliations into a canonical form. This allows users to
+conveniently declare document authors and their affiliations,
+while making it possible to rely on default object metadata
+structures when using the data in other filters or when accessing
+the data from custom templates.
+
+
+## Canonical format for authors and affiliations
+
+Authors and affiliations entries are treated as *named objects*.
+All named objects will have an ID and a name, i.e. they are
+metadata objects with *at least* those two keys:
+
+ - id: namedObjectExample
+ name: Example for a named object.
+
+The filter converts the *author* and *institute* metadata fields
+into lists of named objects.
+
+E.g., the following YAML data
+
+ author:
+ - Jane Doe:
+ email: 'jane.doe@example.edu'
+ - John Q. Doe
+
+
+will be transformed into
+
+ author:
+ - email: 'jane.doe\@example.edu'
+ id: Jane Doe
+ name: Jane Doe
+ - id: 'John Q. Doe'
+ name: 'John Q. Doe'
+
+Internally, `id` will be a simple string, while `name` is of type
+`MetaInlines`.
+
+
+## Referencing affiliations
+
+Author affiliations are a common feature of scholarly
+publications. It is possible to add institutes to each author
+object. Three methods of doing this are supported.
+
+1. **Referencing institutes by list index**: affiliations can be
+ listed in the *institute* metadata field and then referenced
+ by using the numerical index:
+
+ institute:
+ - Acme Corporation
+ - Federation of Planets
+ author:
+ - Jane Doe:
+ institute: [1, 2]
+ - John Q. Doe:
+ institute: [2]
+
+ This is also the canonical representation used to keep track
+ of author affiliations.
+
+2. **Referencing institutes by ID**: using numerical indices is
+ error prone and difficult to maintain when adding or removing
+ authors or affilications. It is hence possible to use IDs
+ instead:
+
+ institute:
+ - acme: Acme Corporation
+ - federation: Federation of Planets
+ author:
+ - Jane Doe:
+ institute: [acme, federation]
+ - John Q. Doe:
+ institute: [federation]
+
+3. **Adding institute as an attribute**: sometimes it might be
+ more convenient to give an affiliation directly in the
+ author's YAML object. Those objects can still be referenced
+ by ID from authors listed below such entry.
+
+ author:
+ - Jane Doe:
+ institute:
+ - Acme Cooproration
+ - federation: Federation of Planets
+ - John Q. Doe:
+ institute: [federation]
diff --git a/paper/lua-filters/scholarly-metadata/expected.md b/paper/lua-filters/scholarly-metadata/expected.md
new file mode 100644
index 0000000..353436f
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/expected.md
@@ -0,0 +1,41 @@
+---
+author:
+- id: Jane Doe
+ institute:
+ - 1
+ - 2
+ name: Jane Doe
+- id: 'John Q. Doe'
+ institute:
+ - 1
+ name: 'John Q. Doe'
+- id: Peder Ås
+ institute:
+ - 1
+ name: Peder Ås
+- id: Juan Pérez
+ institute:
+ - 3
+ name: Juan Pérez
+- id: Max Mustermann
+ name: Max Mustermann
+institute:
+- address: '23 Science Street, Eureka, Mississippi, USA'
+ id: fosg
+ name: Formatting Open Science Group
+- id: fop
+ name: Federation of Planets
+- id: Acme Corporation
+ name: Acme Corporation
+---
+
+Abstract
+========
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim
+veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea
+commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
+velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint
+occaecat cupidatat non proident, sunt in culpa qui officia deserunt
+mollit anim id est laborum.
diff --git a/paper/lua-filters/scholarly-metadata/sample.md b/paper/lua-filters/scholarly-metadata/sample.md
new file mode 100644
index 0000000..855272e
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/sample.md
@@ -0,0 +1,30 @@
+---
+author:
+ - Jane Doe:
+ institute:
+ - fosg
+ - fop
+ - John Q. Doe:
+ institute: fosg
+ - Peder Ås:
+ institute: fosg
+ - Juan Pérez:
+ institute:
+ - name: Acme Corporation
+ - Max Mustermann
+institute:
+ - fosg:
+ name: Formatting Open Science Group
+ address: 23 Science Street, Eureka, Mississippi, USA
+ - fop: Federation of Planets
+...
+
+# Abstract
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
+eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut
+enim ad minim veniam, quis nostrud exercitation ullamco laboris
+nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in
+reprehenderit in voluptate velit esse cillum dolore eu fugiat
+nulla pariatur. Excepteur sint occaecat cupidatat non proident,
+sunt in culpa qui officia deserunt mollit anim id est laborum.
diff --git a/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua b/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua
new file mode 100644
index 0000000..3ec529c
--- /dev/null
+++ b/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua
@@ -0,0 +1,180 @@
+--[[
+ScholarlyMeta – normalize author/affiliation meta variables
+
+Copyright (c) 2017-2019 Albert Krewinkel, Robert Winkler
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+]]
+local List = require 'pandoc.List'
+
+-- Split a string at commas.
+local function comma_separated_values(str)
+ local acc = List:new{}
+ for substr in str:gmatch('([^,]*)') do
+ acc[#acc + 1] = substr:gsub('^%s*', ''):gsub('%s*$', '') -- trim
+ end
+ return acc
+end
+
+--- Ensure the return value is a list.
+local function ensure_list (val)
+ if type(val) ~= 'table' then
+ -- create singleton list (or empty list if val == nil).
+ return List:new{val}
+ elseif val.t == 'MetaInlines' then
+ -- check if this is really a comma-separated list
+ local csv = comma_separated_values(pandoc.utils.stringify(val))
+ if #csv >= 2 then
+ return csv
+ end
+ return List:new{val}
+ elseif val.t == 'MetaList' then
+ return List:new(val)
+ else
+ -- MetaBlocks or MetaMap, use as a singleton
+ return List:new{val}
+ end
+end
+
+--- Returns a function which checks whether an object has the given ID.
+local function has_id (id)
+ return function(x) return x.id == id end
+end
+
+--- Copy all key-value pairs of the first table into the second iff there is no
+-- such key yet in the second table.
+-- @returns the second argument
+function add_missing_entries(a, b)
+ for k, v in pairs(a) do
+ b[k] = b[k] or v
+ end
+ return b
+end
+
+--- Create an object with a name. The name is either taken directly from the
+-- `name` field, or from the *only* field name (i.e., key) if the object is a
+-- dictionary with just one entry. If neither exists, the name is left unset
+-- (`nil`).
+function to_named_object (obj)
+ local named = {}
+ if type(obj) ~= 'table' then
+ -- if the object isn't a table, just use its value as a name.
+ named.name = pandoc.MetaInlines{pandoc.Str(tostring(obj))}
+ named.id = tostring(obj)
+ elseif obj.t == 'MetaInlines' then
+ -- Treat inlines as the name
+ named.name = obj
+ named.id = pandoc.utils.stringify(obj)
+ elseif obj.name ~= nil then
+ -- object has name attribute → just create a copy of the object
+ add_missing_entries(obj, named)
+ named.id = pandoc.utils.stringify(named.id or named.name)
+ elseif next(obj) and next(obj, next(obj)) == nil then
+ -- the entry's key is taken as the name, the value contains the
+ -- attributes.
+ key, attribs = next(obj)
+ if type(attribs) == "string" or attribs.t == 'MetaInlines' then
+ named.name = attribs
+ else
+ add_missing_entries(attribs, named)
+ named.name = named.name or pandoc.MetaInlines{pandoc.Str(tostring(key))}
+ end
+ named.id = named.id and pandoc.utils.stringify(named.id) or key
+ else
+ -- this is not a named object adhering to the usual conventions.
+ error('not a named object: ' .. tostring(obj))
+ end
+ return named
+end
+
+--- Resolve institute placeholders to full named objects
+local function resolve_institutes (institute, known_institutes)
+ local unresolved_institutes
+ if institute == nil then
+ unresolved_institutes = {}
+ elseif type(institute) == "string" or type(institute) == "number" then
+ unresolved_institutes = {institute}
+ else
+ unresolved_institutes = institute
+ end
+
+ local result = List:new{}
+ for i, inst in ipairs(unresolved_institutes) do
+ result[i] =
+ known_institutes[tonumber(inst)] or
+ known_institutes:find_if(has_id(pandoc.utils.stringify(inst))) or
+ to_named_object(inst)
+ end
+ return result
+end
+
+--- Insert a named object into a list; if an object of the same name exists
+-- already, add all properties only present in the new object to the existing
+-- item.
+function merge_on_id (list, namedObj)
+ local elem, idx = list:find_if(has_id(namedObj.id))
+ local res = elem and add_missing_entries(namedObj, elem) or namedObj
+ local obj_idx = idx or (#list + 1)
+ -- return res, obj_idx
+ list[obj_idx] = res
+ return res, #list
+end
+
+--- Flatten a list of lists.
+local function flatten (lists)
+ local result = List:new{}
+ for _, lst in ipairs(lists) do
+ result:extend(lst)
+ end
+ return result
+end
+
+--- Canonicalize authors and institutes
+local function canonicalize(raw_author, raw_institute)
+ local institutes = ensure_list(raw_institute):map(to_named_object)
+ local authors = ensure_list(raw_author):map(to_named_object)
+
+ for _, author in ipairs(authors) do
+ author.institute = resolve_institutes(
+ ensure_list(author.institute),
+ institutes
+ )
+ end
+
+ -- Merge institutes defined in author objects with those defined in the
+ -- top-level list.
+ local author_insts = flatten(authors:map(function(x) return x.institute end))
+ for _, inst in ipairs(author_insts) do
+ merge_on_id(institutes, inst)
+ end
+
+ -- replace institutes with their indices
+ local to_index = function (inst)
+ return tostring(select(2, institutes:find_if(has_id(inst.id))))
+ end
+ for _, author in ipairs(authors) do
+ author.institute = pandoc.MetaList(author.institute:map(to_index))
+ end
+
+ return authors, institutes
+end
+
+
+return {
+ {
+ Meta = function(meta)
+ meta.author, meta.institute = canonicalize(meta.author, meta.institute)
+ return meta
+ end
+ }
+}
diff --git a/paper/lua-filters/scrlttr2/Makefile b/paper/lua-filters/scrlttr2/Makefile
new file mode 100644
index 0000000..acd4c7e
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/Makefile
@@ -0,0 +1,9 @@
+test: sample.md scrlttr2.lua sample.pdf
+ @pandoc --to=latex --lua-filter=scrlttr2.lua -s sample.md | \
+ sh expected-strings.sh
+ @rm sample.pdf
+
+%.pdf: %.md scrlttr2.lua
+ @pandoc --lua-filter=scrlttr2.lua --output=$@ $<
+
+.PHONY: test
diff --git a/paper/lua-filters/scrlttr2/README.md b/paper/lua-filters/scrlttr2/README.md
new file mode 100644
index 0000000..2a4e440
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/README.md
@@ -0,0 +1,60 @@
+# scrlttr2
+
+This filter allows to write DIN 5008 letter using the [scrlttr2]
+LaTeX document class from KOMA script. It converts metadata to
+the appropriate KOMA variables and allows using the default LaTeX
+template shipped with pandoc.
+
+[scrlttr2]: https://www.ctan.org/pkg/scrlttr2
+
+## Base variables
+
+ - `opening`: phrase used as an opening;
+ defaults to "Dear Sir/Madam,"
+ - `closing`: closing phrase; defaults to "Sincerely,"
+ - `address`: recipient's street address;
+ defaults to "no address given"
+ - `date`: the date of the letter; defaults to the current day.
+
+## KOMA Variables
+
+Currently, the following metadata fields are translated to KOMA
+variables:
+
+- `fromaddress` (alias: `return-address`): address of the sender
+- `fromfax` (alias: `fax`): sender's fax number
+- `fromemail` (alias: `email`): sender's email
+- `fromlogo` (alias: `logo`): image to be used as the sender's logo
+- `fromname` (alias: `author`): sender name
+- `fromphone` (alias: `phone`): sender's phone number
+- `fromurl` (alias: `url`): sender's URL
+- `customer`: customer number
+- `invoice`: invoice number
+- `myref`: sender's reference
+- `place`: sender's place used near date
+- `signature`: sender's signature
+- `subject`: letter's subject
+- `title`: letter title
+- `yourref`: addressee's reference
+
+The values of these variables are converted to MetaInlines. If a
+list is given, then each list item is used as a line, e.g.,
+
+ fromaddress:
+ - 35 Industry Way
+ - Springfield
+
+The `KOMAoptions` value is inferred from the given variables, but
+can be overwritten by specifying it explicitly.
+
+See the scrlttr2 documentation for details.
+
+## Intended Usage
+
+Many sender variables don't change, so it is sensible to provide
+default values for these. Authors using Markdown to draft letters
+can use a separate YAML file for this. E.g., if there is a file
+`default.yml` which contains the sender's details, then only the
+addressee's data must be specified.
+
+ pandoc --lua-filter=scrlttr2 letter.md default.yml -o out.pdf
diff --git a/paper/lua-filters/scrlttr2/expected-strings.sh b/paper/lua-filters/scrlttr2/expected-strings.sh
new file mode 100644
index 0000000..f2b54c2
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/expected-strings.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+latex_result="$(cat -)"
+
+assert_contains ()
+{
+ printf '%s' "$latex_result" | grep -qF "$1" -
+ if [ $? -ne 0 ]; then
+ printf 'Output does not contain `%s`.\n' "$1" >&2
+ exit 1
+ fi
+}
+
+# whether we are using the scrlttr2 class
+assert_contains '{scrlttr2}'
+
+assert_contains '\setkomavar{fromname}{Jane Doe}'
+assert_contains '\setkomavar{fromaddress}{35 Industry Way\\ Springfield}'
+assert_contains '\setkomavar{subject}{Letter of Reference}'
+assert_contains '\setkomavar{date}{February 29, 2020}'
+
+# Custom opening and default closing
+assert_contains '\opening{To Whom It May Concern,}'
+assert_contains '\closing{Sincerely,}'
+
+# Author and date
+assert_contains '\author{Jane Doe}'
+assert_contains '\date{February 29, 2020}'
+
+# Recipient address
+assert_contains '\begin{letter}{Fireworks Inc.\\ 123 Fake St\\ 58008 Springfield}'
diff --git a/paper/lua-filters/scrlttr2/sample.md b/paper/lua-filters/scrlttr2/sample.md
new file mode 100644
index 0000000..5f13554
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/sample.md
@@ -0,0 +1,16 @@
+---
+author: Jane Doe
+fromaddress:
+ - 35 Industry Way
+ - Springfield
+opening: To Whom It May Concern,
+subject: Letter of Reference
+date: February 29, 2020
+address:
+ - Fireworks Inc.
+ - 123 Fake St
+ - 58008 Springfield
+...
+
+I strongly recommend to embiggen your team by giving John Doe the position of a
+yak shaver. He has shown cromulent performance as a bike shedder.
diff --git a/paper/lua-filters/scrlttr2/scrlttr2.lua b/paper/lua-filters/scrlttr2/scrlttr2.lua
new file mode 100644
index 0000000..78f38fd
--- /dev/null
+++ b/paper/lua-filters/scrlttr2/scrlttr2.lua
@@ -0,0 +1,161 @@
+-- Ensure unpack also works if pandoc was compiled against Lua 5.1
+local unpack = unpack or table.unpack
+local List = require 'pandoc.List'
+local stringify = (require 'pandoc.utils')['stringify']
+
+--- Set some default options
+local default = {
+ opening = 'Dear Sir/Madam,',
+ closing = 'Sincerely,',
+ address = 'no address given'
+}
+
+--- Return a list of inlines representing a call to a latex command.
+local function latex_command (command, ...)
+ local entry = {
+ pandoc.RawInline('latex', '\\' .. command),
+ }
+ for _, arg in ipairs{...} do
+ entry[#entry + 1] = pandoc.RawInline('latex', '{')
+ if type(arg) ~= 'table' then
+ entry[#entry + 1] = pandoc.RawInline('latex', tostring(arg))
+ else
+ List.extend(entry, arg)
+ end
+ entry[#entry + 1] = pandoc.RawInline('latex', '}')
+ end
+ return entry
+end
+
+--- Convert the given meta-value to a list of inlines
+local function ensure_inlines (val)
+ if not val or type(val) == 'string' or type(val) == 'boolean' then
+ return pandoc.MetaInlines{pandoc.Str(tostring(val))}
+ elseif type(val) == 'table' and val.t == 'MetaInlines' then
+ return val
+ elseif type(val) == 'table' then
+ local res = List:new{}
+ for i = 1, #val do
+ res:extend(val[i])
+ res[#res + 1] = pandoc.RawInline('latex', '\\\\ ')
+ end
+ res[#res] = nil -- drop last linebreak
+ return pandoc.MetaInlines(res)
+ else
+ return pandoc.MetaInlines{pandoc.Str(pandoc.utils.stringify(val))}
+ end
+end
+
+--- Convert the given value to a MetaList
+local function ensure_meta_list (val)
+ if not val or val.t ~= 'MetaList' then
+ return pandoc.MetaList{}
+ else
+ return val
+ end
+end
+
+--- Set supported variables as KOMA variables.
+function setkomavar_commands (meta)
+ local set_vars = {}
+ local res = {}
+ local function set_koma_var (name, value, enable)
+ if value ~= nil then
+ res[#res + 1] = latex_command('setkomavar', name, ensure_inlines(value))
+ if enable then
+ set_vars[#set_vars + 1] = name
+ end
+ end
+ end
+
+ set_koma_var('fromname', meta.fromname or meta.author)
+ set_koma_var('fromaddress', meta.fromaddress or meta['return-address'])
+ set_koma_var('subject', meta.subject)
+ set_koma_var('title', meta.title)
+ set_koma_var('signature', meta.signature)
+ set_koma_var('customer', meta.customer)
+ set_koma_var('yourref', meta.yourref)
+ set_koma_var('myref', meta.myref)
+ set_koma_var('invoice', meta.invoice)
+ set_koma_var('place', meta.place)
+
+ set_koma_var('fromfax', meta.fromfax or meta.fax, true)
+ set_koma_var('fromurl', meta.fromurl or meta.url, true)
+ set_koma_var('fromlogo', meta.fromlogo or meta.logo, true)
+ set_koma_var('fromemail', meta.fromemail or meta.email, true)
+ set_koma_var('fromphone', meta.fromphone or meta.phone, true)
+
+ -- don't set date if date is set to `false`
+ if meta.date == nil or meta.date == true then
+ if meta['date-format'] then
+ set_koma_var('date', os.date(stringify(date_format)))
+ else
+ set_koma_var('date', pandoc.MetaInlines{pandoc.RawInline('latex', '\\today')})
+ end
+ elseif meta.date then
+ set_koma_var('date', meta.date)
+ end
+
+ if meta['KOMAoptions'] or #set_vars >= 1 then
+ res[#res + 1] = latex_command(
+ 'KOMAoptions',
+ meta['KOMAoptions']
+ or table.concat(set_vars, '=true,') .. '=true'
+ )
+ end
+
+ return res
+end
+
+--- Bring Metadata in a form suitable for the scrlttr KOMA class
+local function make_koma_metadata(meta)
+ local header_includes = ensure_meta_list(meta['header-includes'])
+ List.extend(header_includes, setkomavar_commands(meta))
+
+ local include_before = ensure_meta_list(meta['include-before'])
+ List.extend(
+ include_before,
+ {
+ pandoc.MetaInlines(
+ latex_command(
+ 'begin',
+ 'letter',
+ ensure_inlines(meta.address or default.address)
+ )
+ ),
+
+ pandoc.MetaInlines(
+ latex_command('opening', meta.opening or default.opening)
+ ),
+ }
+ )
+
+ local include_after = ensure_meta_list(meta['include-after'])
+ List.extend(
+ include_after,
+ {
+ pandoc.MetaInlines(
+ latex_command('closing', meta.closing or default.closing)
+ ),
+ pandoc.MetaInlines(latex_command('end', 'letter')),
+ }
+ )
+
+ -- unset or reset some unwanted vars
+ meta.data = nil -- set via komavar 'date'
+ meta.title = nil -- set via komavar 'subject'
+ meta.indent = true -- disable parskib
+ -- set documentclass to scrlttr2 if it's unset
+ meta.documentclass = meta.documentclass or pandoc.MetaString'scrlttr2'
+
+
+ meta['header-includes'] = header_includes
+ meta['include-before'] = include_before
+ meta['include-after'] = include_after
+
+ return meta
+end
+
+return {
+ {Meta = make_koma_metadata}
+}
diff --git a/paper/lua-filters/section-refs/Makefile b/paper/lua-filters/section-refs/Makefile
new file mode 100644
index 0000000..dcceb70
--- /dev/null
+++ b/paper/lua-filters/section-refs/Makefile
@@ -0,0 +1,26 @@
+OPTIONS_test_default := -t native \
+ -M bibliography=bibliography.bib \
+ -F pandoc-citeproc \
+ --lua-filter=section-refs.lua
+
+OPTIONS_test_no_citeproc := -t native \
+ --lua-filter=section-refs.lua
+
+OPTIONS_test_refs_name := -t native \
+ -M bibliography=bibliography.bib \
+ -M reference-section-title="Works Cited" \
+ -F pandoc-citeproc \
+ --lua-filter=section-refs.lua
+
+OPTIONS_test_section_level := -t native \
+ -M bibliography=bibliography.bib \
+ -M reference-section-title="Works Cited" \
+ -M section-refs-level=2\
+ -F pandoc-citeproc \
+ --lua-filter=section-refs.lua
+
+.PHONY: test
+test: test_default test_no_citeproc test_refs_name test_section_level
+
+test_%: expected_%.native sample.md bibliography.bib
+ @pandoc sample.md $(OPTIONS_$@) | diff --strip-trailing-cr -u $< -
diff --git a/paper/lua-filters/section-refs/README.md b/paper/lua-filters/section-refs/README.md
new file mode 100644
index 0000000..941bc36
--- /dev/null
+++ b/paper/lua-filters/section-refs/README.md
@@ -0,0 +1,19 @@
+# section-refs
+
+This filter allows the user to put bibliographies at the end of each
+section, containing only those references in the section. It works on
+the output of `pandoc-citeproc`, and so must be run after
+`pandoc-citeproc`. For example:
+
+~~~
+pandoc input.md -F pandoc-citerproc --lua-filter section-refs.lua
+~~~
+
+It allows curstomization through two metadata fields:
+`reference-section-title` and `section-refs-level` (default 1). The
+`section-refs-level` variable controls what level the biblography will
+occur at the end of. The header of the generated references section will
+be one level higher than `section-refs-level` (so if it occurs at the
+end of a level-1 section, it will receive a level-2 header, and so on).
+
+This filter requires pandoc version >= 2.1.
diff --git a/paper/lua-filters/section-refs/bibliography.bib b/paper/lua-filters/section-refs/bibliography.bib
new file mode 100644
index 0000000..7ce54d5
--- /dev/null
+++ b/paper/lua-filters/section-refs/bibliography.bib
@@ -0,0 +1,70 @@
+@BOOK{ainsworth:sheppard,
+ title = {Jack Sheppard: A Romance},
+ author = {William Harrison Ainsworth},
+ address = {London},
+ publisher = {George Routledge \& Sons},
+ year = {1900},
+ shorttitle = {Jack Sheppard},
+}
+
+@Article{altick:aldine,
+ author = {Richard D. Altick},
+ title = {From Aldine to Everyman: Cheap Reprint Series of the
+ English Classics 1830--1906},
+ journal = {Studies in Bibliography},
+ year = 1958,
+ volume = 11,
+ pages = {3--24}
+}
+
+@BOOK{cohen:jokes,
+ title = {Jokes: Philosophical Thoughts on Joking Matters},
+ publisher = {University of Chicago Press},
+ year = 1999,
+ author = {Ted Cohen},
+ address = {Chicago},
+ shorttitle = {Jokes}
+}
+
+@Book{dames:physiology,
+ author = {Nicholas Dames},
+ title = {The Physiology of the Novel: Reading, Neural
+ Science, and the Form of Victorian Fiction},
+ publisher = {Oxford University Press},
+ year = 2007,
+ address = {Oxford},
+ shorttitle = {Physiology}
+}
+
+@Book{kant:critique2,
+ author = {Immanuel Kant},
+ editor = {Mary Gregor},
+ translator = {Mary Gregor},
+ title = {Critique of Practical Reason},
+ publisher = {Cambridge University Press},
+ year = 2001,
+ address = {Cambridge, UK},
+ shorttitle = {Practical}
+}
+
+@Book{lukacs:european,
+ author = {Georg Luk{\'a}cs},
+ title = {Studies in European Realism: A Sociological Survey
+ of the Writings of Balzac, Stendhal, Zola, Tolstoy,
+ Gorki, and Others},
+ publisher = {The Merlin Press},
+ year = 1989,
+ translator = {Edith Bone},
+ address = {London},
+ shorttitle = {Studies}
+}
+
+@Book{trollope:autobiography,
+ author = {Anthony Trollope},
+ editor = {Michael Sadleir and Frederick Page},
+ title = {An Autobiography},
+ publisher = {Oxford University Press},
+ year = 1999,
+ address = {Oxford},
+ origdate = 1883
+}
diff --git a/paper/lua-filters/section-refs/expected_default.native b/paper/lua-filters/section-refs/expected_default.native
new file mode 100644
index 0000000..b1c6945
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_default.native
@@ -0,0 +1,25 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."]
+,Div ("refs-1",["references"],[])
+ [Div ("ref-ainsworth:sheppard",[],[])
+ [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]]
+ ,Div ("ref-altick:aldine",[],[])
+ [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-kant:critique2",[],[])
+ [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."]
+,Div ("refs-2",["references"],[])
+ [Div ("ref-cohen:jokes",[],[])
+ [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]]
+ ,Div ("ref-lukacs:european",[],[])
+ [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]]
+ ,Div ("ref-trollope:autobiography",[],[])
+ [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]]
diff --git a/paper/lua-filters/section-refs/expected_no_citeproc.native b/paper/lua-filters/section-refs/expected_no_citeproc.native
new file mode 100644
index 0000000..2219b6e
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_no_citeproc.native
@@ -0,0 +1,7 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@ainsworth:sheppard",Space,Str "27]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@dames:physiology]"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@kant:critique2",Space,Str "29]"],Str "."]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@altick:aldine",Space,Str "20]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@dames:physiology]"],Str "."]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@lukacs:european",Space,Str "125]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@cohen:jokes",Space,Str "3]"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@trollope:autobiography",Space,Str "392]"],Str "."]]
diff --git a/paper/lua-filters/section-refs/expected_refs_name.native b/paper/lua-filters/section-refs/expected_refs_name.native
new file mode 100644
index 0000000..0d026ed
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_refs_name.native
@@ -0,0 +1,27 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."]
+,Div ("refs-1",["references"],[])
+ [Header 2 ("bibliography-1",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-ainsworth:sheppard",[],[])
+ [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]]
+ ,Div ("ref-altick:aldine",[],[])
+ [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-kant:critique2",[],[])
+ [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."]
+,Div ("refs-2",["references"],[])
+ [Header 2 ("bibliography-2",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-cohen:jokes",[],[])
+ [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]]
+ ,Div ("ref-lukacs:european",[],[])
+ [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]]
+ ,Div ("ref-trollope:autobiography",[],[])
+ [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]]
diff --git a/paper/lua-filters/section-refs/expected_section_level.native b/paper/lua-filters/section-refs/expected_section_level.native
new file mode 100644
index 0000000..1d3c89e
--- /dev/null
+++ b/paper/lua-filters/section-refs/expected_section_level.native
@@ -0,0 +1,31 @@
+[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"]
+,Div ("refs-1",["references"],[])
+ [Header 3 ("bibliography-1",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]]
+,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."]
+,Div ("refs-2",["references"],[])
+ [Header 3 ("bibliography-2",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-ainsworth:sheppard",[],[])
+ [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]
+ ,Div ("ref-kant:critique2",[],[])
+ [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]]
+,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."]
+,Div ("refs-3",["references"],[])
+ [Header 3 ("bibliography-3",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-altick:aldine",[],[])
+ [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]]
+ ,Div ("ref-dames:physiology",[],[])
+ [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]
+,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"]
+,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."]
+,Div ("refs-4",["references"],[])
+ [Header 3 ("bibliography-4",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]
+ ,Div ("ref-cohen:jokes",[],[])
+ [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]]
+ ,Div ("ref-lukacs:european",[],[])
+ [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]]
+ ,Div ("ref-trollope:autobiography",[],[])
+ [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]]
diff --git a/paper/lua-filters/section-refs/sample.md b/paper/lua-filters/section-refs/sample.md
new file mode 100644
index 0000000..00ddb86
--- /dev/null
+++ b/paper/lua-filters/section-refs/sample.md
@@ -0,0 +1,18 @@
+# Here is one section
+
+## A subsection
+
+Here is something [@ainsworth:sheppard 27]. And here is something else
+[@dames:physiology]. Finally, we want to make sure that we have one last
+citation here [@kant:critique2 29].
+
+## Another subsection.
+
+Here is something [@altick:aldine 20]. And here is something repeated
+[@dames:physiology].
+
+# Here is another section
+
+Here is something [@lukacs:european 125]. And here is something else
+[@cohen:jokes 3]. Finally, we want to make sure that we have one last
+citation here [@trollope:autobiography 392].
diff --git a/paper/lua-filters/section-refs/section-refs.lua b/paper/lua-filters/section-refs/section-refs.lua
new file mode 100644
index 0000000..68e61d0
--- /dev/null
+++ b/paper/lua-filters/section-refs/section-refs.lua
@@ -0,0 +1,138 @@
+function is_ref_div (blk)
+ return (blk.t == "Div" and blk.identifier == "refs")
+end
+
+function is_ref_header (blk)
+ return (blk.t == "Header" and blk.identifier == "bibliography")
+end
+
+function get_all_refs (blks)
+ for _, b in pairs(blks) do
+ if is_ref_div(b) then
+ return b.content
+ end
+ end
+end
+
+function remove_all_refs (blks)
+ local out = {}
+ for _, b in pairs(blks) do
+ if not (is_ref_div(b) or is_ref_header(b)) then
+ table.insert(out, b)
+ end
+ end
+ return out
+end
+
+-- We return a {number, ref} pair so we can sort in the individual
+-- bibliographies.
+function citation_to_numbered_ref (citation, all_refs)
+ local div_id = "ref-" .. citation.id
+ for i, d in ipairs(all_refs) do
+ if d.t == "Div" and d.identifier == div_id then
+ return {i, d}
+ end
+ end
+end
+
+
+function get_partial_refs (blocks, all_refs)
+ local cites = {}
+ local citegetter = {
+ Cite = function (el)
+ for _, c in pairs(el.citations) do
+ table.insert(cites, c)
+ end
+ end
+ }
+
+ for _, b in pairs(blocks) do
+ pandoc.walk_block(b, citegetter)
+ end
+
+
+ -- first we make a list of the {number, ref} pairs so we can sort
+ -- them. Then after sorting, we're going to make a new list with
+ -- only the second element.
+ local numbered_refs = {}
+ for _, c in pairs(cites) do
+ local r = citation_to_numbered_ref(c, all_refs)
+ if r then
+ table.insert(numbered_refs, r)
+ end
+ end
+
+ table.sort(numbered_refs, function(x, y) return x[1] < y[1] end)
+
+ local refs = {}
+ for _, nr in pairs(numbered_refs) do
+ table.insert(refs, nr[2])
+ end
+
+ return refs
+end
+
+function add_section_refs (blks, lvl, refs_title, all_refs)
+ local output_blks = {}
+ local section = {}
+ local refs_num = 0
+
+ local go = function ()
+ refs_num = refs_num + 1
+ local section_refs = get_partial_refs(section, all_refs)
+ if refs_title then
+ local hdr = pandoc.Header(lvl + 1,
+ refs_title,
+ pandoc.Attr("bibliography-" .. tostring(refs_num),
+ {"unnumbered"}))
+ table.insert(section_refs, 1, hdr)
+ end
+ local refs_div = pandoc.Div(section_refs,
+ pandoc.Attr("refs-" .. tostring(refs_num),
+ {"references"}))
+ table.insert(section, refs_div)
+ for _, x in pairs(section) do
+ table.insert(output_blks, x)
+ end
+ end
+
+ -- to avoid putting a bib after an intro paragraph.
+ local seen_hdr_before = false
+ for _, b in pairs(blks) do
+ if b.t == "Header" and b.level <= lvl then
+ if seen_hdr_before then
+ go()
+ section = {b}
+ else
+ seen_hdr_before = true
+ table.insert(section, b)
+ end
+ else
+ table.insert(section, b)
+ end
+ end
+ go()
+ return output_blks
+end
+
+function Pandoc(doc)
+ if PANDOC_VERSION == nil then -- if pandoc_version < 2.1
+ io.stderr:write("WARNING: pandoc >= 2.1 required for section-refs filter\n")
+ return doc
+ end
+ local refs_title = doc.meta["reference-section-title"]
+ -- if we get it from a command-line field, read it in as md.
+ if type(refs_title) == "string" then
+ refs_title = pandoc.read(refs_title, "markdown").blocks[1].content
+ end
+ local lvl = tonumber(doc.meta["section-refs-level"]) or 1
+ local all_refs = get_all_refs(doc.blocks)
+ -- we only want to do something if there are refs to work
+ -- with. This way, if this is run without pandoc-citeproc, it will
+ -- just return the same document.
+ if all_refs then
+ local unreffed = remove_all_refs(doc.blocks)
+ local output = add_section_refs(unreffed, lvl, refs_title, all_refs)
+ return pandoc.Pandoc(output, doc.meta)
+ end
+end
diff --git a/paper/lua-filters/short-captions/Makefile b/paper/lua-filters/short-captions/Makefile
new file mode 100644
index 0000000..756cf2b
--- /dev/null
+++ b/paper/lua-filters/short-captions/Makefile
@@ -0,0 +1,18 @@
+LF = --lua-filter=short-captions.lua
+F = -F pandoc-crossref
+
+test:
+ @pandoc $(LF) README.md -w latex | diff --strip-trailing-cr expected-1.tex -
+
+crossref:
+ @echo testing short-captions.lua after pandoc-crossref
+ @pandoc $(F) $(LF) README.md -w latex | diff --strip-trailing-cr expected-2.tex -
+ @echo testing short-captions.lua before pandoc-crossref
+ @pandoc $(LF) $(F) README.md -w latex | diff --strip-trailing-cr expected-2.tex -
+
+readme:
+ @pandoc -s $(F) $(LF) README.md -o README.pdf
+
+latex:
+ @pandoc -s $(F) $(LF) README.md -o README.tex
+
diff --git a/paper/lua-filters/short-captions/README.md b/paper/lua-filters/short-captions/README.md
new file mode 100644
index 0000000..2721004
--- /dev/null
+++ b/paper/lua-filters/short-captions/README.md
@@ -0,0 +1,52 @@
+---
+title: "short-captions.lua"
+lof: true
+---
+
+# Short captions in \LaTeX\ output
+
+For latex output, this filter uses the attribute `short-caption` for
+figures so that the attribute value appears in the List of Figures, if
+one is desired.
+
+# Usage
+
+Where you would have a figure in, say, markdown as
+
+ ![The caption](foo.png )
+
+You can now specify the figure as
+
+ ![The long caption](foo.png){short-caption="a short caption"}
+
+If the document metadata includes `lof:true`, then the List of Figures
+will use the short caption. This is particularly useful for students
+writing dissertations, who often have to include a List of Figures in
+the front matter, but where figure captions themselves can be quite
+lengthy.
+
+ pandoc --lua-filter=short-captions.lua article.md -o article.tex
+
+ pandoc --lua-filter=short-captions.lua article.md -o article.pdf
+
+
+
+# Example
+
+@Fig:shortcap is an interesting figure with a long caption, but a short
+caption in the List of Figures.
+
+![This is an *extremely* interesting figure that has a lot of detail I
+will need to describe in a few sentences. This figure has a short
+caption that will appear in the list of figures. Other attributes are
+preserved](fig.pdf){#fig:shortcap short-caption="A short caption with
+math $x^n + y^n = z^n$" width="50%"}
+
+
+# Limitations
+
+- The filter will process the `short-caption` attribute value as pandoc
+ markdown, regardless of the input format.
+- It does not work for tables and listings yet.
+- But it works with pandoc-crossref, regardless of the order of
+ application.
diff --git a/paper/lua-filters/short-captions/expected-1.tex b/paper/lua-filters/short-captions/expected-1.tex
new file mode 100644
index 0000000..a28bb7e
--- /dev/null
+++ b/paper/lua-filters/short-captions/expected-1.tex
@@ -0,0 +1,67 @@
+\hypertarget{short-captions-in-output}{%
+\section{\texorpdfstring{Short captions in
+\LaTeX~output}{Short captions in ~output}}\label{short-captions-in-output}}
+
+For latex output, this filter uses the attribute \texttt{short-caption}
+for figures so that the attribute value appears in the List of Figures,
+if one is desired.
+
+\hypertarget{usage}{%
+\section{Usage}\label{usage}}
+
+Where you would have a figure in, say, markdown as
+
+\begin{verbatim}
+![The caption](foo.png )
+\end{verbatim}
+
+You can now specify the figure as
+
+\begin{verbatim}
+![The long caption](foo.png){short-caption="a short caption"}
+\end{verbatim}
+
+If the document metadata includes \texttt{lof:true}, then the List of
+Figures will use the short caption. This is particularly useful for
+students writing dissertations, who often have to include a List of
+Figures in the front matter, but where figure captions themselves can be
+quite lengthy.
+
+\begin{verbatim}
+pandoc --lua-filter=short-captions.lua article.md -o article.tex
+
+pandoc --lua-filter=short-captions.lua article.md -o article.pdf
+\end{verbatim}
+
+\hypertarget{example}{%
+\section{Example}\label{example}}
+
+@Fig:shortcap is an interesting figure with a long caption, but a short
+caption in the List of Figures.
+
+\hypertarget{fig:shortcap}{%
+\begin{figure}
+\centering
+\includegraphics[width=0.5\textwidth,height=\textheight]{fig.pdf}
+\caption[{A short caption with math \(x^n + y^n = z^n\)}]{This is an
+\emph{extremely} interesting figure that has a lot of detail I will need
+to describe in a few sentences. This figure has a short caption that
+will appear in the list of figures. Other attributes are preserved}
+\label{fig:shortcap}
+\end{figure}
+}
+
+\hypertarget{limitations}{%
+\section{Limitations}\label{limitations}}
+
+\begin{itemize}
+\tightlist
+\item
+ The filter will process the \texttt{short-caption} attribute value as
+ pandoc markdown, regardless of the input format.
+\item
+ It does not work for tables and listings yet.
+\item
+ But it works with pandoc-crossref, regardless of the order of
+ application.
+\end{itemize}
diff --git a/paper/lua-filters/short-captions/expected-2.tex b/paper/lua-filters/short-captions/expected-2.tex
new file mode 100644
index 0000000..95444b3
--- /dev/null
+++ b/paper/lua-filters/short-captions/expected-2.tex
@@ -0,0 +1,67 @@
+\hypertarget{short-captions-in-output}{%
+\section{\texorpdfstring{Short captions in
+\LaTeX~output}{Short captions in ~output}}\label{short-captions-in-output}}
+
+For latex output, this filter uses the attribute \texttt{short-caption}
+for figures so that the attribute value appears in the List of Figures,
+if one is desired.
+
+\hypertarget{usage}{%
+\section{Usage}\label{usage}}
+
+Where you would have a figure in, say, markdown as
+
+\begin{verbatim}
+![The caption](foo.png )
+\end{verbatim}
+
+You can now specify the figure as
+
+\begin{verbatim}
+![The long caption](foo.png){short-caption="a short caption"}
+\end{verbatim}
+
+If the document metadata includes \texttt{lof:true}, then the List of
+Figures will use the short caption. This is particularly useful for
+students writing dissertations, who often have to include a List of
+Figures in the front matter, but where figure captions themselves can be
+quite lengthy.
+
+\begin{verbatim}
+pandoc --lua-filter=short-captions.lua article.md -o article.tex
+
+pandoc --lua-filter=short-captions.lua article.md -o article.pdf
+\end{verbatim}
+
+\hypertarget{example}{%
+\section{Example}\label{example}}
+
+Fig.~\ref{fig:shortcap} is an interesting figure with a long caption,
+but a short caption in the List of Figures.
+
+\hypertarget{fig:shortcap}{%
+\begin{figure}
+\centering
+\includegraphics[width=0.5\textwidth,height=\textheight]{fig.pdf}
+\caption[{A short caption with math \(x^n + y^n = z^n\)}]{This is an
+\emph{extremely} interesting figure that has a lot of detail I will need
+to describe in a few sentences. This figure has a short caption that
+will appear in the list of figures. Other attributes are preserved}
+\label{fig:shortcap}
+\end{figure}
+}
+
+\hypertarget{limitations}{%
+\section{Limitations}\label{limitations}}
+
+\begin{itemize}
+\tightlist
+\item
+ The filter will process the \texttt{short-caption} attribute value as
+ pandoc markdown, regardless of the input format.
+\item
+ It does not work for tables and listings yet.
+\item
+ But it works with pandoc-crossref, regardless of the order of
+ application.
+\end{itemize}
diff --git a/paper/lua-filters/short-captions/fig.pdf b/paper/lua-filters/short-captions/fig.pdf
new file mode 100644
index 0000000..cac7f39
Binary files /dev/null and b/paper/lua-filters/short-captions/fig.pdf differ
diff --git a/paper/lua-filters/short-captions/short-captions.lua b/paper/lua-filters/short-captions/short-captions.lua
new file mode 100644
index 0000000..9aaf309
--- /dev/null
+++ b/paper/lua-filters/short-captions/short-captions.lua
@@ -0,0 +1,37 @@
+if FORMAT ~= "latex" then
+ return
+end
+
+local function latex(str)
+ return pandoc.RawInline('latex', str)
+end
+
+function figure_image (elem)
+ local image = elem.content and elem.content[1]
+ return (image.t == 'Image' and image.title == 'fig:')
+ and image
+ or nil
+end
+
+function Para (para)
+ local img = figure_image(para)
+ if not img or not img.caption or not img.attributes['short-caption'] then
+ return nil
+ end
+
+ local short_caption = pandoc.Span(
+ pandoc.read(img.attributes['short-caption']).blocks[1].c
+ )
+ local hypertarget = "{%%\n"
+ local label = "\n"
+ if img.identifier ~= img.title then
+ hypertarget = string.format("\\hypertarget{%s}{%%\n",img.identifier)
+ label = string.format("\n\\label{%s}",img.identifier)
+ end
+ return pandoc.Para {
+ latex(hypertarget .. "\\begin{figure}\n\\centering\n"),
+ img,
+ latex("\n\\caption["), short_caption, latex("]"), pandoc.Span(img.caption),
+ latex(label .."\n\\end{figure}\n}\n")
+ }
+end
diff --git a/paper/lua-filters/spellcheck/Makefile b/paper/lua-filters/spellcheck/Makefile
new file mode 100644
index 0000000..9d51bff
--- /dev/null
+++ b/paper/lua-filters/spellcheck/Makefile
@@ -0,0 +1,2 @@
+test:
+ @pandoc --lua-filter=spellcheck.lua sample.md | sort | diff --strip-trailing-cr -u expected.txt -
diff --git a/paper/lua-filters/spellcheck/README.md b/paper/lua-filters/spellcheck/README.md
new file mode 100644
index 0000000..5f5d6a3
--- /dev/null
+++ b/paper/lua-filters/spellcheck/README.md
@@ -0,0 +1,42 @@
+# spellcheck
+
+This filter checks the spelling of words in the body of the
+document (omitting metadata). The external program `aspell` is
+used for the checking, and must be present in the path.
+
+Why use this instead of just running `aspell` on the
+document's source? Because this filter is sensitive to
+the semantics of the document in ways that `aspell` is
+not:
+
+- Material in code spans, raw HTML, URLs in links,
+ and math is not spell-checked, eliminating a big
+ class of false positives.
+
+- The filter is sensitive to the `lang` specified in
+ the document's metadata; this will be treated as the
+ default language for the document.
+
+- It is also sensitive to `lang` attributes on native
+ divs and spans. Thus, for example, in an English
+ document, `[chevaux]{lang=fr}` will not be registered
+ as a spelling error.
+
+To run it,
+
+ pandoc --lua-filter spellcheck.lua sample.md
+
+A list of misspelled words (or at any rate, words not
+in the appropriate dictionary) will be printed to stdout.
+If the word is in a div or span with a non-default `lang`
+attribute, the relevant language will be indicated in
+brackets after the word, separated by a tab.
+
+To add words to the list for a language, you can add files
+with names `.aspell.LANG.pws` in your home directory. Example:
+
+```
+% cat ~/.aspell.en.pws
+personal_ws-1.1 en 0
+goopy
+```
diff --git a/paper/lua-filters/spellcheck/expected.txt b/paper/lua-filters/spellcheck/expected.txt
new file mode 100644
index 0000000..dd973c8
--- /dev/null
+++ b/paper/lua-filters/spellcheck/expected.txt
@@ -0,0 +1,2 @@
+missspeling [en]
+summer
diff --git a/paper/lua-filters/spellcheck/sample.md b/paper/lua-filters/spellcheck/sample.md
new file mode 100644
index 0000000..31f7834
--- /dev/null
+++ b/paper/lua-filters/spellcheck/sample.md
@@ -0,0 +1,15 @@
+---
+lang: fr-FR
+...
+
+Ces sont des mots français.
+Mais pas summer.
+
+[This is a sentence in English,
+with one missspeling.]{lang=en}
+
+::: {lang=en}
+Here's a div in English.
+Code is ignored: `baoeuthasoe`{.nolang}.
+So are [URLs](http://example.com/notaword).
+:::
diff --git a/paper/lua-filters/spellcheck/spellcheck.lua b/paper/lua-filters/spellcheck/spellcheck.lua
new file mode 100644
index 0000000..85ae281
--- /dev/null
+++ b/paper/lua-filters/spellcheck/spellcheck.lua
@@ -0,0 +1,70 @@
+-- lua filter for spell checking: requires 'aspell'.
+-- Copyright (C) 2017-2019 John MacFarlane, released under MIT license
+
+local text = require('text')
+local words = {}
+local deflang
+
+local function add_to_dict(lang, t)
+ if not words[lang] then
+ words[lang] = {}
+ end
+ if not words[lang][t] then
+ words[lang][t] = (words[lang][t] or 0) + 1
+ end
+end
+
+local function get_deflang(meta)
+ deflang = (meta.lang and meta.lang[1] and meta.lang[1].c) or 'en'
+ -- the following is better but won't work in pandoc 2.0.6.
+ -- it requires pandoc commit ecc46e229fde934f163d1f646383d24bfe2039e1:
+ -- deflang = (meta.lang and pandoc.utils.stringify(meta.lang)) or 'en'
+ return {} -- eliminate meta so it doesn't get spellchecked
+end
+
+local function run_spellcheck(lang)
+ local keys = {}
+ local wordlist = words[lang]
+ for k,_ in pairs(wordlist) do
+ keys[#keys + 1] = k
+ end
+ local inp = table.concat(keys, '\n')
+ local outp = pandoc.pipe('aspell', {'list','-l',lang}, inp)
+ for w in string.gmatch(outp, "(%a*)\n") do
+ io.write(w)
+ if lang ~= deflang then
+ io.write("\t[" .. lang .. "]")
+ end
+ io.write("\n")
+ end
+end
+
+local function results(el)
+ pandoc.walk_block(pandoc.Div(el.blocks), {Str = function(e) add_to_dict(deflang, e.text) end})
+ for lang,v in pairs(words) do
+ run_spellcheck(lang)
+ end
+ os.exit(0)
+end
+
+local function checkstr(el)
+ add_to_dict(deflang, el.text)
+end
+
+local function checkspan(el)
+ local lang = el.attributes.lang
+ if not lang then return nil end
+ pandoc.walk_inline(el, {Str = function(e) add_to_dict(lang, e.text) end})
+ return {} -- remove span, so it doesn't get checked again
+end
+
+local function checkdiv(el)
+ local lang = el.attributes.lang
+ if not lang then return nil end
+ pandoc.walk_block(el, {Str = function(e) add_to_dict(lang, e.text) end})
+ return {} -- remove div, so it doesn't get checked again
+end
+
+return {{Meta = get_deflang},
+ {Div = checkdiv, Span = checkspan},
+ {Str = function(e) add_to_dict(deflang, e.text) end, Pandoc = results}}
diff --git a/paper/lua-filters/table-short-captions/Makefile b/paper/lua-filters/table-short-captions/Makefile
new file mode 100644
index 0000000..3df47b1
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/Makefile
@@ -0,0 +1,24 @@
+LF = --lua-filter=table-short-captions.lua
+F = -F pandoc-crossref
+
+test: sample.md
+ @pandoc -s $(LF) -t native $< | \
+ diff -u expected-sample.native -
+
+test-with-crossref: sample.md
+ @pandoc -s $(LF) $(F) -t latex $< | \
+ diff -u expected-sample.tex -
+
+README.pdf: README.md
+ @pandoc $(LF) $(F) $< -o $@
+
+sample.tex: sample.md
+ @pandoc -s $(LF) $(F) -t latex $< -o $@
+
+sample.pdf: sample.md
+ @pandoc -s $(LF) $(F) -t latex $< -o $@
+
+clean:
+ rm -v *.aux *.dvi *.fdb_latexmk *.fls *.log *.lot *.ps *.pdf sample.tex | true
+
+.PHONY: test test-with-crossref clean
diff --git a/paper/lua-filters/table-short-captions/README.md b/paper/lua-filters/table-short-captions/README.md
new file mode 100644
index 0000000..853b809
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/README.md
@@ -0,0 +1,66 @@
+---
+title: "table-short-captions.lua"
+---
+
+# Short captions in \LaTeX\ tables output
+
+For LaTeX output, this filter enables use of the attribute
+`short-caption` for tables. The attribute value will appear in the List
+of Tables.
+
+This filter also enables the class `.unlisted` for tables. This will
+prevent the table caption from appearing in the List of Tables.
+
+# Usage
+
+In Pandoc Markdown, you can add a caption to a table with
+
+ Table: This is the *italicised long caption* of my table, which has
+ a very long caption.
+
+If the document metadata includes `lot:true`, then the List of Tables
+will be inserted at the beginning of the document.
+
+The [pandoc-crossref](http://lierdakil.github.io/pandoc-crossref/)
+filter extends this, and enables you to specify a custom label for the
+table.
+
+ Table: This is the *italicised long caption* of my table, which has
+ a very long caption. {#tbl:full-of-juicy-data}
+
+This filter, when run _before_ pandoc-crossref, allows you to add short
+captions to the table as a `short-caption` attribute. What is between
+the quotes will be parsed as Markdown.
+
+**Important!:** You _must_ use empty square brackets before the
+attributes tag.
+
+ Table: This is the *italicised long caption* of my table, which has
+ a very long caption.
+ []{#tbl:full-of-juicy-data short-caption="Short caption for *juicy* data table."}
+
+Alternatively, if you wish to create a table which is unlisted in the
+List of Tables, you can use the `.unlisted` class in the attributes tag.
+
+ Table: This is the *italicised long caption* of my table, which will
+ not appear in the List of Tables. []{#tbl:full-of-juicy-data .unlisted}
+
+This filter should prove useful for students writing dissertations, who
+often have to include a List of Tables in the front matter, but where
+table captions themselves can be quite lengthy.
+
+ pandoc --lua-filter=table-short-captions.lua \
+ --filter pandoc-crossref \
+ article.md -o article.tex
+
+ pandoc --lua-filter=table-short-captions.lua \
+ --filter pandoc-crossref \
+ article.md -o article.pdf
+
+
+# Limitations
+
+- The filter will process the `short-caption` attribute value as pandoc
+ markdown, regardless of the input format.
+- pandoc-crossref should be run after it.
+- I have only tested this from a Markdown source.
diff --git a/paper/lua-filters/table-short-captions/expected-sample.native b/paper/lua-filters/table-short-captions/expected-sample.native
new file mode 100644
index 0000000..c387ebe
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/expected-sample.native
@@ -0,0 +1,68 @@
+Pandoc (Meta {unMeta = fromList [("lot",MetaBool True),("title",MetaInlines [Str "Tests",Space,Str "for",Space,Str "table-short-captions.lua"])]})
+[Para [Str "These",Space,Str "tests",Space,Str "are",Space,Str "written",Space,Str "so",Space,Str "that",Space,Str "if",Space,Strong [Str "bold",Space,Str "font"],Space,Str "appears",Space,Str "in",Space,Str "the",Space,Str "LOT,",Space,Str "something",Space,Str "is",Space,Str "wrong."]
+,Para [Str "The",Space,Str "tests",Space,Str "are",Space,Str "split",Space,Str "into",Space,Str "two:",Space,Str "expected",Space,Str "uses,",Space,Str "and",Space,Str "non-standard",Space,Str "uses/errors.",LineBreak,Str "The",Space,Str "non-standard",Space,Str "uses",Space,Str "are",Space,Str "presented",Space,Str "in",Space,Str "this",Space,Str "document",Space,Str "for",Space,Str "troubleshooting",Space,Str "purposes,",Space,Str "and",Space,Str "to",Space,Str "ensure",Space,Str "the",Space,Str "filter",Space,Str "doesn\8217t",Space,Str "crash",Space,Str "in",Space,Str "corner",Space,Str "cases."]
+,Header 1 ("standard-usage",[],[]) [Str "Standard",Space,Str "usage"]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl1,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label."] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl2,",Space,Str "in",Space,Str "standard",Space,Code ("",[],[]) "pandoc-crossref",Space,Str "form.",Space,Str "{#tbl:tbl-label2}"] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl3,",Space,Str "which",Space,Str "is",Space,Strong [Str "unlisted"],Str ".",Space,Span ("tbl:tbl-label3",["unlisted"],[]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl4,",Space,Str "which",Space,Str "has",Space,Str "an",Space,Strong [Str "overriding"],Space,Str "short-caption.",Space,Str "This",Space,Str "is",Space,Str "the",Space,Str "expected",Space,Str "usage.",Space,Span ("tbl:tbl-label4",[],[("short-caption","Table 4 *short* capt.")]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Header 1 ("non-standard-usageerrors",[],[]) [Str "Non-standard",Space,Str "usage/errors"]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl5,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label,",Space,Str "but",Space,Str "does",Space,Str "have",Space,Str "empty",Space,Str "braces",Space,Str "at",Space,Str "the",Space,Str "end.",Space,Str "{}"] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl6,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label,",Space,Str "but",Space,Str "does",Space,Str "have",Space,Str "an",Space,Str "empty",Space,Str "span",Space,Str "at",Space,Str "the",Space,Str "end.",Space,Span ("",[],[]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl7,",Space,Str "which",Space,Str "is",Space,Str "improperly",Space,Str "formatted,",Space,Str "and",Space,Str "will",Space,Str "appear",Space,Str "in",Space,Str "the",Space,Str "list",Space,Str "of",Space,Str "tables.",Space,Str "This",Space,Str "filter",Space,Str "requires",Space,Str "that",Space,Code ("",[],[]) ".unlisted",Space,Str "is",Space,Str "placed",Space,Str "in",Space,Str "a",Space,Str "span.",Space,Str "{#tbl:tbl-label7",Space,Str ".unlisted}"] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl8,",Space,Str "which",Space,Str "has",Space,Str "an",Space,Str "empty",Space,Str "short-caption.",Space,Str "An",Space,Str "empty",Space,Str "short-caption",Space,Str "does",Space,Str "nothing.",Space,Str "The",Space,Str "long",Space,Str "caption",Space,Str "will",Space,Str "still",Space,Str "be",Space,Str "used.",Space,Span ("tbl:tbl-label8",[],[("short-caption","")]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]
+,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl9,",Space,Str "which",Space,Str "is",Space,Strong [Str "unlisted"],Str ",",Space,Str "yet",Space,Str "has",Space,Str "a",Space,Str "short-caption.",Space,Span ("tbl:tbl-label9",["unlisted"],[("short-caption","Table 9 **unlisted** *short* capt.")]) []] [AlignDefault,AlignDefault] [0.0,0.0]
+ [[Plain [Str "cola"]]
+ ,[Plain [Str "colb"]]]
+ [[[Plain [Str "a1"]]
+ ,[Plain [Str "b1"]]]
+ ,[[Plain [Str "a2"]]
+ ,[Plain [Str "b2"]]]]]
diff --git a/paper/lua-filters/table-short-captions/expected-sample.tex b/paper/lua-filters/table-short-captions/expected-sample.tex
new file mode 100644
index 0000000..65a2073
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/expected-sample.tex
@@ -0,0 +1,291 @@
+% Options for packages loaded elsewhere
+\PassOptionsToPackage{unicode=true}{hyperref}
+\PassOptionsToPackage{hyphens}{url}
+%
+\documentclass[
+]{article}
+\usepackage{lmodern}
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[T1]{fontenc}
+ \usepackage[utf8]{inputenc}
+ \usepackage{textcomp} % provides euro and other symbols
+\else % if luatex or xelatex
+ \usepackage{unicode-math}
+ \defaultfontfeatures{Scale=MatchLowercase}
+ \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
+\fi
+% Use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+\IfFileExists{microtype.sty}{% use microtype if available
+ \usepackage[]{microtype}
+ \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+\makeatletter
+\@ifundefined{KOMAClassName}{% if non-KOMA class
+ \IfFileExists{parskip.sty}{%
+ \usepackage{parskip}
+ }{% else
+ \setlength{\parindent}{0pt}
+ \setlength{\parskip}{6pt plus 2pt minus 1pt}}
+}{% if KOMA class
+ \KOMAoptions{parskip=half}}
+\makeatother
+\usepackage{xcolor}
+\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
+\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
+\hypersetup{
+ pdftitle={Tests for table-short-captions.lua},
+ hidelinks,
+}
+\urlstyle{same} % disable monospaced font for URLs
+\usepackage{longtable,booktabs}
+% Allow footnotes in longtable head/foot
+\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
+\makesavenoteenv{longtable}
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+\providecommand{\tightlist}{%
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+ \let\oldparagraph\paragraph
+ \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+ \let\oldsubparagraph\subparagraph
+ \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+
+% Set default figure placement to htbp
+\makeatletter
+\def\fps@figure{htbp}
+\makeatother
+
+% -- begin:latex-table-short-captions --
+\makeatletter\AtBeginDocument{%
+\def\LT@c@ption#1[#2]#3{% % Overwrite the workhorse macro used in formatting a longtable caption.
+ \LT@makecaption#1\fnum@table{#3}%
+ \ifdefined\pandoctableshortcapt % If pandoctableshortcapt is defined (even if blank), we should override default behaviour.
+ \let\@tempa\pandoctableshortcapt% % (Use let, we don't want to expand pandoctableshortcapt!)
+ \else % If not, fall back to default behaviour
+ \def\@tempa{#2}% % (Use the argument in square brackets)
+ \fi
+ \ifx\@tempa\@empty\else % If @tempa is blank, no lot entry! Otherwise, @tempa becomes the lot title.
+ {\let\\\space
+ \addcontentsline{lot}{table}{\protect\numberline{\thetable}{\@tempa}}}%
+ \fi}
+}\makeatother
+% -- end:latex-table-short-captions --
+\makeatletter
+\@ifpackageloaded{subfig}{}{\usepackage{subfig}}
+\@ifpackageloaded{caption}{}{\usepackage{caption}}
+\captionsetup[subfloat]{margin=0.5em}
+\AtBeginDocument{%
+\renewcommand*\figurename{Figure}
+\renewcommand*\tablename{Table}
+}
+\AtBeginDocument{%
+\renewcommand*\listfigurename{List of Figures}
+\renewcommand*\listtablename{List of Tables}
+}
+\@ifpackageloaded{float}{}{\usepackage{float}}
+\floatstyle{ruled}
+\@ifundefined{c@chapter}{\newfloat{codelisting}{h}{lop}}{\newfloat{codelisting}{h}{lop}[chapter]}
+\floatname{codelisting}{Listing}
+\newcommand*\listoflistings{\listof{codelisting}{List of Listings}}
+\makeatother
+
+\title{Tests for table-short-captions.lua}
+\date{}
+
+\begin{document}
+\maketitle
+
+\listoftables
+These tests are written so that if \textbf{bold font} appears in the
+LOT, something is wrong.
+
+The tests are split into two: expected uses, and non-standard
+uses/errors.\\
+The non-standard uses are presented in this document for troubleshooting
+purposes, and to ensure the filter doesn't crash in corner cases.
+
+\hypertarget{standard-usage}{%
+\section{Standard usage}\label{standard-usage}}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl1, which does
+not have a label.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\hypertarget{tbl:tbl-label2}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label2}This is the \emph{italicised long
+caption} of tbl2, in standard \texttt{pandoc-crossref}
+form.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\def\pandoctableshortcapt{} % .unlisted
+
+\hypertarget{tbl:tbl-label3}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label3}This is the \emph{italicised long
+caption} of tbl3, which is \textbf{unlisted}.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\undef\pandoctableshortcapt
+
+\def\pandoctableshortcapt{Table 4 \emph{short} capt.}
+
+\hypertarget{tbl:tbl-label4}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label4}This is the \emph{italicised long
+caption} of tbl4, which has an \textbf{overriding} short-caption. This
+is the expected usage.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\undef\pandoctableshortcapt
+
+\hypertarget{non-standard-usageerrors}{%
+\section{Non-standard usage/errors}\label{non-standard-usageerrors}}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl5, which does
+not have a label, but does have empty braces at the end.
+\{\}}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl6, which does
+not have a label, but does have an empty span at the end.
+}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\begin{longtable}[]{@{}ll@{}}
+\caption{This is the \emph{italicised long caption} of tbl7, which is
+improperly formatted, and will appear in the list of tables. This filter
+requires that \texttt{.unlisted} is placed in a span. \{\#tbl:tbl-label7
+.unlisted\}}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\hypertarget{tbl:tbl-label8}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label8}This is the \emph{italicised long
+caption} of tbl8, which has an empty short-caption. An empty
+short-caption does nothing. The long caption will still be
+used.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\def\pandoctableshortcapt{} % .unlisted
+
+\hypertarget{tbl:tbl-label9}{}
+\begin{longtable}[]{@{}ll@{}}
+\caption{\label{tbl:tbl-label9}This is the \emph{italicised long
+caption} of tbl9, which is \textbf{unlisted}, yet has a
+short-caption.}\tabularnewline
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endfirsthead
+\toprule
+cola & colb\tabularnewline
+\midrule
+\endhead
+a1 & b1\tabularnewline
+a2 & b2\tabularnewline
+\bottomrule
+\end{longtable}
+
+\undef\pandoctableshortcapt
+
+\end{document}
diff --git a/paper/lua-filters/table-short-captions/sample.md b/paper/lua-filters/table-short-captions/sample.md
new file mode 100644
index 0000000..74c27fb
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/sample.md
@@ -0,0 +1,84 @@
+---
+title: "Tests for table-short-captions.lua"
+lot: true
+---
+
+These tests are written so that if **bold font** appears in the LOT, something is wrong.
+
+The tests are split into two: expected uses, and non-standard uses/errors.
+The non-standard uses are presented in this document for troubleshooting purposes, and to ensure the filter doesn't crash in corner cases.
+
+# Standard usage
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl1, which does not have a label.
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl2, in standard `pandoc-crossref` form. {#tbl:tbl-label2}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl3, which is **unlisted**. []{#tbl:tbl-label3 .unlisted}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl4, which has an **overriding** short-caption. This is the expected usage. []{#tbl:tbl-label4 short-caption="Table 4 *short* capt."}
+
+
+# Non-standard usage/errors
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl5, which does not have a label, but does have empty braces at the end. {}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl6, which does not have a label, but does have an empty span at the end. []{}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl7, which is improperly formatted, and will appear in the list of tables. This filter requires that `.unlisted` is placed in a span. {#tbl:tbl-label7 .unlisted}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl8, which has an empty short-caption. An empty short-caption does nothing. The long caption will still be used. []{#tbl:tbl-label8 short-caption=""}
+
+
+| cola | colb |
+| ---- | ---- |
+| a1 | b1 |
+| a2 | b2 |
+
+Table: This is the *italicised long caption* of tbl9, which is **unlisted**, yet has a short-caption. []{#tbl:tbl-label9 .unlisted short-caption="Table 9 **unlisted** *short* capt."}
diff --git a/paper/lua-filters/table-short-captions/table-short-captions.lua b/paper/lua-filters/table-short-captions/table-short-captions.lua
new file mode 100644
index 0000000..6f4970b
--- /dev/null
+++ b/paper/lua-filters/table-short-captions/table-short-captions.lua
@@ -0,0 +1,160 @@
+---LaTeXTableShortCapts – enable `.unlisted` and `short-caption=""` properties
+-- for Pandoc conversion to LaTeX
+
+--[[
+Copyright (c) 2019 Blake Riley
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+]]
+local List = require 'pandoc.List'
+
+-- don't do anything unless we target latex
+if FORMAT ~= "latex" then
+ return {}
+end
+
+--- Code for injection into the LaTeX header,
+-- to overwrite a macro in longtable captions.
+longtable_caption_mod = [[
+% -- begin:latex-table-short-captions --
+\makeatletter\AtBeginDocument{%
+\def\LT@c@ption#1[#2]#3{% % Overwrite the workhorse macro used in formatting a longtable caption.
+ \LT@makecaption#1\fnum@table{#3}%
+ \@ifundefined{pandoctableshortcapt}
+ {\def\@tempa{#2}} % Use default behaviour: argument in square brackets
+ {\let\@tempa\pandoctableshortcapt} % If defined (even if blank), use to override
+ \ifx\@tempa\@empty\else % If @tempa is blank, no lot entry! Otherwise, @tempa becomes the lot title.
+ {\let\\\space
+ \addcontentsline{lot}{table}{\protect\numberline{\thetable}{\@tempa}}}%
+ \fi}
+}\makeatother
+% -- end:latex-table-short-captions --
+]]
+
+--- Creates a def shortcaption block to be placed before the table
+-- @tparam ?string sc : The short-caption property value
+-- @treturn Plain : The def shortcaption block
+local function defshortcapt(sc)
+ local scblock = List:new{}
+ scblock:extend {pandoc.RawInline('tex', "\\def\\pandoctableshortcapt{")}
+ if sc then
+ scblock:extend (pandoc.read(sc).blocks[1].c)
+ end
+ scblock:extend {pandoc.RawInline('tex', "}")}
+ if not sc then
+ scblock:extend {pandoc.RawInline('tex', " % .unlisted")}
+ end
+ return pandoc.Plain(scblock)
+end
+
+--- The undef shortcaption block to be placed after the table
+local undefshortcapt = pandoc.RawBlock('tex', "\\let\\pandoctableshortcapt\\relax")
+
+--- Parses a mock "Table Attr".
+-- We use the Attr of an empty Span as if it were Table Attr.
+-- This function extracts what is needed to build a short-caption.
+-- @tparam Attr attr : The Attr of the property Span in the table caption
+-- @treturn ?string : The identifier
+-- @treturn ?string : The "short-caption" property, if present.
+-- @treturn bool : Whether ".unlisted" appeared in the classes
+local function parse_table_attrs(attr)
+ -- Find label
+ local label = nil
+ if attr.identifier and (#attr.identifier > 0) then
+ label = attr.identifier
+ end
+
+ -- Look for ".unlisted" in classes
+ local unlisted = false
+ if attr.classes:includes("unlisted") then
+ unlisted = true
+ end
+
+ -- If not unlisted, then find the property short-caption.
+ local short_caption = nil
+ if not unlisted then
+ if (attr.attributes["short-caption"]) and
+ (#attr.attributes["short-caption"] > 0) then
+ short_caption = attr.attributes['short-caption']
+ end
+ end
+
+ return label, short_caption, unlisted
+end
+
+--- Wraps a table with shortcaption code
+-- @tparam Table tbl : The table with {}-wrapped properties in the caption
+-- @treturn List[Blocks] : The table with {label} in the caption,
+-- optionally wrapped in shortcaption code
+function rewrite_longtable_caption(tbl)
+ -- Escape if there is no caption present.
+ if not tbl.caption then
+ return nil
+ end
+
+ -- Try find the properties block
+ local is_properties_span = function (inl)
+ return (inl.t) and (inl.t == "Span") -- is span
+ and (inl.content) and (#inl.content == 0) -- is empty span
+ end
+ local propspan, idx = tbl.caption:find_if(is_properties_span)
+
+ -- If we couldn't find properties, escape.
+ if not propspan then
+ return nil
+ end
+
+ -- Otherwise, parse it all
+ local label, short_caption, unlisted = parse_table_attrs(propspan.attr)
+
+ -- Excise the span from the caption
+ tbl.caption[idx] = nil
+
+ -- Put label back into caption for pandoc-crossref
+ if label then
+ tbl.caption:extend {pandoc.Str("{#"..label.."}")}
+ end
+
+ -- Place new table
+ local result = List:new{}
+ if short_caption or unlisted then
+ result:extend {defshortcapt(short_caption)}
+ end
+ result:extend {tbl}
+ if short_caption or unlisted then
+ result:extend {undefshortcapt}
+ end
+ return result
+end
+
+--- Inserts longtable_caption_mod into the header_includes
+-- @tparam Meta meta : The document metadata
+-- @treturn Meta : The document metadata, with replacement LaTeX macro
+-- in header_includes
+function add_longtable_caption_mod(meta)
+ local header_includes = -- test ? a : b
+ (meta['header-includes'] and meta['header-includes'].t == 'MetaList')
+ and meta['header-includes']
+ or pandoc.MetaList{meta['header-includes']}
+ header_includes[#header_includes + 1] =
+ pandoc.MetaBlocks{pandoc.RawBlock('tex', longtable_caption_mod)}
+ meta['header-includes'] = header_includes
+ return meta
+end
+
+return {
+ {
+ Meta = add_longtable_caption_mod,
+ Table = rewrite_longtable_caption,
+ }
+}
diff --git a/paper/lua-filters/track-changes/.gitignore b/paper/lua-filters/track-changes/.gitignore
new file mode 100644
index 0000000..5fdf006
--- /dev/null
+++ b/paper/lua-filters/track-changes/.gitignore
@@ -0,0 +1,2 @@
+/sample.docx
+/sample.pdf
diff --git a/paper/lua-filters/track-changes/Makefile b/paper/lua-filters/track-changes/Makefile
new file mode 100644
index 0000000..990450e
--- /dev/null
+++ b/paper/lua-filters/track-changes/Makefile
@@ -0,0 +1,26 @@
+.PHONY: test clean
+
+## PENDING: ensure that LaTeX output can be compiled to PDF.
+test: sample.md test-track-changes.sh sample.pdf
+ @pandoc -t markdown --wrap=preserve \
+ --lua-filter=track-changes.lua sample.md | \
+ diff --strip-trailing-cr -u - expected_accept.markdown
+ @pandoc -t markdown --wrap=preserve --track-changes=reject \
+ -M trackChanges:reject --lua-filter=track-changes.lua sample.md | \
+ diff --strip-trailing-cr -u - expected_reject.markdown
+ @pandoc -s -t html --wrap=preserve --track-changes=all \
+ -M trackChanges:all --lua-filter=track-changes.lua sample.md | \
+ diff --strip-trailing-cr -u - expected_draft.html
+ @pandoc -M trackChanges:all --track-changes=all --wrap=preserve \
+ --to=latex --lua-filter=track-changes.lua \
+ --standalone sample.md | \
+ sh test-track-changes.sh
+ @rm sample.pdf
+
+sample.pdf: sample.md track-changes.lua
+ @pandoc -M trackChanges:all --track-changes=all \
+ --lua-filter=track-changes.lua \
+ --output $@ $<
+
+clean:
+ rm sample.pdf || true
diff --git a/paper/lua-filters/track-changes/README.md b/paper/lua-filters/track-changes/README.md
new file mode 100644
index 0000000..379b4ff
--- /dev/null
+++ b/paper/lua-filters/track-changes/README.md
@@ -0,0 +1,18 @@
+# Tracks changes in LaTeX and HTML or removes them in other output formats
+
+The Pandoc Docx reader and writer supports track changes of MS Word
+(command line parameter `--track-changes=accept|reject|all`).
+
+If `--track-changes=all` was used to read a docx file, track changes
+and/or comments are included in the AST as spans and are written to any
+other output formats than docx and clutters the output.
+
+This Lua filter addresses this problem by interpreting the parameter
+`--track-changes` (pandoc version >= 2.1.1) or the metadata variable
+`trackChanges: accept|reject|all` (set either in a YAML block or with
+`-M`) and accepts/rejects changes and removes comments for all output
+formats including docx. In case of `--track-changes=all` and for html
+and latex, it converts track changings and comments to appropriate
+commands (for LaTex provided by the [changes
+package](https://ctan.org/pkg/changes)) and tries to mimic the
+visualization as in MS Word.
diff --git a/paper/lua-filters/track-changes/TODO.md b/paper/lua-filters/track-changes/TODO.md
new file mode 100644
index 0000000..86290ec
--- /dev/null
+++ b/paper/lua-filters/track-changes/TODO.md
@@ -0,0 +1,18 @@
+# Ideas and ToDos
+
+- [ ] nested comments
+- [x] comments across paragraphs
+- [ ] implement `paragraph-insertion`, `paragraph-deletion`
+- [/] implement multiple classes (see https://github.com/jgm/pandoc/issues/4270#issuecomment-358996343)
+- [x] treat comments with multiple paragraphs (see [#4270](https://github.com/jgm/pandoc/issues/4270))
+- [x] track changes in chapter titles
+- [ ] combine Strs after acceptions/rejections
+- [ ] remove track changes from automatic section identifiers
+- [ ] track changes in captions (figure, table, etc.)
+- [ ] color comments with authors color
+- [x] docx sample
+- [x] `PANDOC_READER_OPTIONS.trackChanges`
+- [x] HTML support with ``, `` (requested with jgm/pandoc#1560) and `` with title attribute or spans with CSS
+- [ ] HTML track changes decorations like explaind at [Comparing and contrasting ins, del, and s](http://html5doctor.com/ins-del-s) or https://github.com/jgm/pandoc/issues/2884#issuecomment-240263921
+- [ ] auto identifiers, be careful on nested/overlapping comments
+- [ ] citations in comment text
\ No newline at end of file
diff --git a/paper/lua-filters/track-changes/expected_accept.markdown b/paper/lua-filters/track-changes/expected_accept.markdown
new file mode 100644
index 0000000..9207571
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_accept.markdown
@@ -0,0 +1,29 @@
+Track changes in LaTeX and HTML
+===============================
+
+A **simple** comment from me.
+
+This is a text with an *exciting* insertion.
+
+This is/was a text with a deletion.
+
+Here is the text to be moved.
+
+Here is a comment with nested changes.
+
+Here is a multi-line paragraph containing some text and a long deletion wrapping over two lines.
+
+This is a new paragraph.
+
+And so is this.
+
+One more.
+
+A *header* with a comment {#a-header-width-a-notecomment}
+=========================
+
+Some unmodified text ...
+
+\newpage
+
+... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
diff --git a/paper/lua-filters/track-changes/expected_draft.html b/paper/lua-filters/track-changes/expected_draft.html
new file mode 100644
index 0000000..d5c3cc5
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_draft.html
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+ Track changes in LaTeX and HTML
+
+
+
+
+
+
Track changes in LaTeX and HTML
+
+
+
Track changes in LaTeX and HTML
+
A simple comment from me.
+
This is a text with an exciting insertion.
+
This is/was a text with a short deletion.
+
Here is the text to be moved.
+
Here is the text to be moved.
+
Here is a comment with nestted changes.
+
Here is a multi-line paragraph containing some text and a long deletion short insertion wrapping over two lines.
+
This is a new paragraph.
+
And so
is this.
+
One more.
+
A header width a comment
+
Some unmodified text …
+
+
… continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
+
+
diff --git a/paper/lua-filters/track-changes/expected_draft.tex b/paper/lua-filters/track-changes/expected_draft.tex
new file mode 100644
index 0000000..87ea1ac
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_draft.tex
@@ -0,0 +1,159 @@
+\PassOptionsToPackage{unicode=true}{hyperref} % options for packages loaded elsewhere
+\PassOptionsToPackage{hyphens}{url}
+%
+\documentclass[
+]{article}
+\usepackage{lmodern}
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[T1]{fontenc}
+ \usepackage[utf8]{inputenc}
+ \usepackage{textcomp} % provides euro and other symbols
+\else % if luatex or xelatex
+ \usepackage{unicode-math}
+ \defaultfontfeatures{Scale=MatchLowercase}
+ \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
+\fi
+% use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+\IfFileExists{microtype.sty}{% use microtype if available
+ \usepackage[]{microtype}
+ \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+\makeatletter
+\@ifundefined{KOMAClassName}{% if non-KOMA class
+ \IfFileExists{parskip.sty}{%
+ \usepackage{parskip}
+ }{% else
+ \setlength{\parindent}{0pt}
+ \setlength{\parskip}{6pt plus 2pt minus 1pt}}
+}{% if KOMA class
+ \KOMAoptions{parskip=half}}
+\makeatother
+\usepackage{xcolor}
+\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
+\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
+\hypersetup{
+ pdftitle={Track changes in LaTeX and HTML},
+ pdfborder={0 0 0},
+ breaklinks=true}
+\urlstyle{same} % don't use monospace font for urls
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+\providecommand{\tightlist}{%
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+\setcounter{secnumdepth}{-2}
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+ \let\oldparagraph\paragraph
+ \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+ \let\oldsubparagraph\subparagraph
+ \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+
+% set default figure placement to htbp
+\makeatletter
+\def\fps@figure{htbp}
+\makeatother
+
+\RequirePackage[debrief]{silence}
+\ErrorsOff
+\usepackage{fancyhdr}
+\pagestyle{fancy}
+\fancyhf{}
+\fancyhead[C]{\leftmark}
+\usepackage[markup=underlined,authormarkup=none]{changes}
+\definecolor{auth1}{HTML}{4477AA}
+\definecolor{auth2}{HTML}{117733}
+\definecolor{auth3}{HTML}{999933}
+\definecolor{auth4}{HTML}{CC6677}
+\definecolor{auth5}{HTML}{AA4499}
+\definecolor{auth6}{HTML}{332288}
+\usepackage[textsize=scriptsize]{todonotes}
+\setlength{\marginparwidth}{3cm}
+\makeatletter
+\setremarkmarkup{\todo[color=Changes@Color#1!20]{\sffamily\textbf{#1:}~#2}}
+\makeatother
+\newcommand{\note}[2][]{\added[#1,remark={#2}]{}}
+\newcommand\hlnotesingle{%
+ \bgroup
+ \expandafter\def\csname sout\space\endcsname{\bgroup \ULdepth =-.8ex \ULset}%
+ \markoverwith{\textcolor{yellow}{\rule[-.5ex]{.1pt}{2.5ex}}}%
+ \ULon}
+\newcommand\hlnote[1]{\let\helpcmd\hlnotesingle\parhelp#1\par\relax\relax}
+\long\def\parhelp#1\par#2\relax{%
+ \helpcmd{#1}\ifx\relax#2\else\par\parhelp#2\relax\fi%
+}
+
+\makeatletter
+\newcommand\ifmoving{%
+ \ifx\protect\@unexpandable@protect
+ \expandafter\@firstoftwo
+ \else
+ \expandafter\@secondoftwo
+ \fi
+}
+
+\newcommand{\gobbletwo}[2][]{\@bsphack\@esphack}
+\newcommand{\gobbleone}[1][]{\@bsphack\@esphack}
+
+\let\oldadded\added
+\let\olddeleted\deleted
+\let\oldhlnote\hlnote
+\let\oldnote\note
+\renewcommand{\added}{\ifmoving{\gobbleone}{\oldadded}}
+\renewcommand{\deleted}{\ifmoving{\gobbletwo}{\olddeleted}}
+\renewcommand{\hlnote}{\ifmoving{}{\oldhlnote}}
+\renewcommand{\note}{\ifmoving{\gobbletwo}{\oldnote}}
+\makeatother
+\definechangesauthor[name={FKA}, color=auth1]{FKA}
+\definechangesauthor[name={JFK}, color=auth2]{JFK}
+\definechangesauthor[name={Jesse Rosenthal}, color=auth3]{JR}
+\definechangesauthor[name={MCW}, color=auth4]{MCW}
+\definechangesauthor[name={SWS}, color=auth5]{SWS}
+
+\title{Track changes in LaTeX and HTML}
+\date{}
+
+\begin{document}
+\maketitle
+
+{
+\setcounter{tocdepth}{3}
+\tableofcontents
+}
+\hypertarget{track-changes-in-latex-and-html}{%
+\section{Track changes in LaTeX and HTML}\label{track-changes-in-latex-and-html}}
+
+A \note[id=MCW]{I agree!}\hlnote{\textbf{simple}} comment from me.
+
+This is a text with \added[id=MCW]{an exciting} insertion.
+
+This is/was a text with a \deleted[id=SWS]{short} deletion.
+
+\added[id=FKA]{Here is the text to be moved.}
+
+\deleted[id=JFK]{Here is the text to be moved.}
+
+Here is a \note[id=JFK]{Why?}\hlnote{com\added[id=SWS]{m}ent with nest\deleted[id=FKA]{t}ed changes}.
+
+Here is a multi-line paragraph containing some text and a long deletion \deleted[id=MCW]{short insertion} wrapping over two lines.
+
+This is \note[id=MCW]{A comment across paragraphs.}\hlnote{a new paragraph.
+
+And so} is this.
+
+One \note[id=JR]{This one has multiple paragraphs. \newline \newline See?}\hlnote{more}.
+
+\hypertarget{a-header-width-a-notecomment}{%
+\section{\texorpdfstring{A \emph{header} wi\deleted[id=FKA]{d}th \added[id=JFK]{a} \note[id=FKA]{Note}\hlnote{comment}}{A header with comment}}\label{a-header-width-a-notecomment}}
+
+Some unmodified text \ldots{}
+
+\newpage
+
+\ldots{} continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
+
+\end{document}
diff --git a/paper/lua-filters/track-changes/expected_reject.markdown b/paper/lua-filters/track-changes/expected_reject.markdown
new file mode 100644
index 0000000..5059f34
--- /dev/null
+++ b/paper/lua-filters/track-changes/expected_reject.markdown
@@ -0,0 +1,29 @@
+Track changes in LaTeX and HTML
+===============================
+
+A **simple** comment from me.
+
+This is a text with insertion.
+
+This is/was a text with a *short* deletion.
+
+Here is the text to be moved.
+
+Here is a coment with nestted changes.
+
+Here is a multi-line paragraph containing some text and a long deletion short insertion wrapping over two lines.
+
+This is a new paragraph.
+
+And so is this.
+
+One more.
+
+A *header* width comment {#a-header-width-a-notecomment}
+========================
+
+Some unmodified text ...
+
+\newpage
+
+... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
diff --git a/paper/lua-filters/track-changes/sample.md b/paper/lua-filters/track-changes/sample.md
new file mode 100644
index 0000000..42493ed
--- /dev/null
+++ b/paper/lua-filters/track-changes/sample.md
@@ -0,0 +1,43 @@
+---
+title: Track changes in LaTeX and HTML
+toc: true
+header-includes: |
+ ```{=latex}
+ \RequirePackage[debrief]{silence}
+ \ErrorsOff
+ \usepackage{fancyhdr}
+ \pagestyle{fancy}
+ \fancyhf{}
+ \fancyhead[C]{\leftmark}
+ ```
+...
+
+# Track changes in LaTeX and HTML
+
+A [I agree!]{.comment-start id="1" author="Mathias C. Walter" date="2016-05-21T22:14:00Z"}**simple**[]{.comment-end id="1"} comment from me.
+
+This is a text with [an *exciting*]{.insertion author="MCW" date="2014-06-25T10:40:00Z"} insertion.
+
+This is/was a text with a [*short*]{.deletion author="SWS" date="2014-06-25T10:42:00Z"} deletion.
+
+[Here is the text to be moved.]{.insertion author="FKA" date="2016-04-16T08:20:00Z"}
+
+[Here is the text to be moved.]{.deletion author="John F. Kennedy" date="2016-04-16T08:20:00Z"}
+
+Here is a [Why?]{.comment-start id="2" author="JFK" date="2016-07-29T16:50:00Z"}com[m]{.insertion author="SWS" date="2016-07-29T16:50:00Z"}ent with nest[t]{.deletion author="FKA" date="2016-04-16T08:20:00Z"}ed changes[]{.comment-end id="2"}.
+
+Here is a multi-line paragraph containing some text and a long deletion [short insertion]{.deletion author="MCW" date="2016-04-16T08:20:00Z"} wrapping over two lines.
+
+This is [A comment across paragraphs.]{.comment-start id="4" author="MCW" date="2016-05-09T16:13:00Z"}a new paragraph.
+
+And so[]{.comment-end id="4"} is this.
+
+One [This one has multiple paragraphs. ¶ ¶ See?]{.comment-start id="5" author="Jesse Rosenthal" date="2016-05-09T16:14:00Z"}more[]{.comment-end id="5"}.
+
+# A *header* wi[d]{.deletion author="FKA" date="2018-03-02T23:07:00Z"}th [a]{.insertion author="JFK" date="2018-03-02T23:07:00Z"} [Note]{.comment-start id="3" author="FKA" date="2017-08-24T22:14:00Z"}comment[]{.comment-end id="3"}
+
+Some unmodified text ...
+
+\newpage
+
+... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).
diff --git a/paper/lua-filters/track-changes/test-track-changes.sh b/paper/lua-filters/track-changes/test-track-changes.sh
new file mode 100644
index 0000000..b7074ef
--- /dev/null
+++ b/paper/lua-filters/track-changes/test-track-changes.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+latex_result="$(cat -)"
+
+assert_contains ()
+{
+ printf '%s' "$latex_result" | grep -qF "$1" -
+ if [ $? -ne 0 ]; then
+ printf 'Output does not contain `%s`.\n' "$1" >&2
+ exit 1
+ fi
+}
+
+# whether we are using the change package
+assert_contains <'
+ else
+ s = s .. '>' .. pandoc.utils.stringify(elem.content) .. '' .. toHtml[elem.classes[1]] .. '>'
+ end
+ return pandoc.RawInline('html', s)
+ elseif elem.classes:includes("comment-end") then
+ return pandoc.RawInline('html', '')
+ end
+end
+
+local function SpanAcceptChanges(elem)
+ if elem.classes:includes("comment-start") or elem.classes:includes("comment-end") then
+ return {}
+ elseif elem.classes:includes("insertion") then
+ return elem.content
+ elseif elem.classes:includes("deletion") then
+ return {}
+ end
+end
+
+local function SpanRejectChanges(elem)
+ if elem.classes:includes("comment-start") or elem.classes:includes("comment-end") then
+ return {}
+ elseif elem.classes:includes("insertion") then
+ return {}
+ elseif elem.classes:includes("deletion") then
+ return elem.content
+ end
+end
+
+function Pandoc(doc)
+ local meta = doc.meta
+ local trackChangesOptions = {all = 'AllChanges', accept = 'AcceptChanges', reject = 'RejectChanges' }
+ local tc = meta and meta['trackChanges']
+ tc = type(meta['trackChanges']) == 'table' and pandoc.utils.stringify(meta['trackChanges']) or meta['trackChanges'] or 'accept'
+ local trackChanges = PANDOC_READER_OPTIONS and PANDOC_READER_OPTIONS.trackChanges or trackChangesOptions[tc]
+ meta.trackChanges = nil -- remove it from the matadata
+
+ local M = {}
+ if trackChanges == 'AllChanges' then
+ if is_html(FORMAT) then
+ M[#M + 1] = {
+ Span = TrackingSpanToHtml
+ }
+ elseif is_tex(FORMAT) then
+ M[#M + 1] = {
+ Span = TrackingSpanToTex,
+ }
+ elseif is_wordprocessing(FORMAT) then
+ M[#M + 1] = { Span = SpanReliner }
+ end
+ elseif trackChanges == 'RejectChanges' then
+ M[#M + 1] = { Span = SpanRejectChanges }
+ else -- otherwise assumes AcceptChanges
+ M[#M + 1] = { Span = SpanAcceptChanges }
+ end
+
+ if #M then
+ local blocks = doc.blocks
+ for i = 1, #M do
+ blocks = pandoc.walk_block(pandoc.Div(blocks), M[i]).content
+ end
+ if trackChanges == 'AllChanges' and is_tex(FORMAT) then
+ meta = add_track_changes(meta)
+ end
+ return pandoc.Pandoc(blocks, meta)
+ end
+end
diff --git a/paper/lua-filters/wordcount/Makefile b/paper/lua-filters/wordcount/Makefile
new file mode 100644
index 0000000..7dfba48
--- /dev/null
+++ b/paper/lua-filters/wordcount/Makefile
@@ -0,0 +1,2 @@
+test:
+ @pandoc --lua-filter=wordcount.lua sample.md | diff --strip-trailing-cr -u expected.txt -
diff --git a/paper/lua-filters/wordcount/README.md b/paper/lua-filters/wordcount/README.md
new file mode 100644
index 0000000..45efc2f
--- /dev/null
+++ b/paper/lua-filters/wordcount/README.md
@@ -0,0 +1,11 @@
+# wordcount
+
+This filter counts the words in the body of a document (omitting
+metadata like titles and abstracts), including words in code.
+It should be more accurate than `wc -w` run directly on a
+Markdown document, since the latter will count markup
+characters, like the `#` in front of an ATX header, or
+tags in HTML documents, as words.
+
+To run it, `pandoc --lua-filter wordcount.lua myfile.md`.
+The word count will be printed to stdout.
diff --git a/paper/lua-filters/wordcount/expected.txt b/paper/lua-filters/wordcount/expected.txt
new file mode 100644
index 0000000..dc608fc
--- /dev/null
+++ b/paper/lua-filters/wordcount/expected.txt
@@ -0,0 +1 @@
+15 words in body
diff --git a/paper/lua-filters/wordcount/sample.md b/paper/lua-filters/wordcount/sample.md
new file mode 100644
index 0000000..240bee0
--- /dev/null
+++ b/paper/lua-filters/wordcount/sample.md
@@ -0,0 +1,12 @@
+---
+title: Metadata words don't count
+abstract: ignored!
+---
+
+# Word count
+
+This document has *a **lot** of [words](url "title")* (15).[^1]
+
+ code is counted
+
+[^1]: Footnotes count.
diff --git a/paper/lua-filters/wordcount/wordcount.lua b/paper/lua-filters/wordcount/wordcount.lua
new file mode 100644
index 0000000..19aec11
--- /dev/null
+++ b/paper/lua-filters/wordcount/wordcount.lua
@@ -0,0 +1,29 @@
+-- counts words in a document
+
+words = 0
+
+wordcount = {
+ Str = function(el)
+ -- we don't count a word if it's entirely punctuation:
+ if el.text:match("%P") then
+ words = words + 1
+ end
+ end,
+
+ Code = function(el)
+ _,n = el.text:gsub("%S+","")
+ words = words + n
+ end,
+
+ CodeBlock = function(el)
+ _,n = el.text:gsub("%S+","")
+ words = words + n
+ end
+}
+
+function Pandoc(el)
+ -- skip metadata, just count body:
+ pandoc.walk_block(pandoc.Div(el.blocks), wordcount)
+ print(words .. " words in body")
+ os.exit(0)
+end
diff --git a/paper/media/image1.png b/paper/media/image1.png
new file mode 100644
index 0000000..2a61d5b
Binary files /dev/null and b/paper/media/image1.png differ
diff --git a/paper/media/image2.png b/paper/media/image2.png
new file mode 100644
index 0000000..3902872
Binary files /dev/null and b/paper/media/image2.png differ
diff --git a/paper/media/image3.png b/paper/media/image3.png
new file mode 100644
index 0000000..1045b7d
Binary files /dev/null and b/paper/media/image3.png differ
diff --git a/paper/media/image4.png b/paper/media/image4.png
new file mode 100644
index 0000000..0306b17
Binary files /dev/null and b/paper/media/image4.png differ
diff --git a/paper/media/image5.png b/paper/media/image5.png
new file mode 100644
index 0000000..f16264c
Binary files /dev/null and b/paper/media/image5.png differ
diff --git a/paper/media/image6.png b/paper/media/image6.png
new file mode 100644
index 0000000..7c9cf31
Binary files /dev/null and b/paper/media/image6.png differ
diff --git a/paper/media/image7.png b/paper/media/image7.png
new file mode 100644
index 0000000..8abda7f
Binary files /dev/null and b/paper/media/image7.png differ
diff --git a/paper/media/image8.png b/paper/media/image8.png
new file mode 100644
index 0000000..8e34c5c
Binary files /dev/null and b/paper/media/image8.png differ
diff --git a/paper/mutagenicity.md b/paper/mutagenicity.md
new file mode 100644
index 0000000..c316543
--- /dev/null
+++ b/paper/mutagenicity.md
@@ -0,0 +1,532 @@
+---
+title: A comparison of random forest, support vector machine, deep learning and lazar algorithms for predicting mutagenicity
+#subtitle: Performance comparison with a new expanded dataset
+author:
+ - Christoph Helma:
+ institute: ist
+ email: helma@in-silico.ch
+ correspondence: "yes"
+ - Verena Schöning:
+ institute: zeller
+ - Philipp Boss:
+ institute: zeller
+ - Jürgen Drewe:
+ institute: zeller
+institute:
+ - ist:
+ name: in silico toxicology gmbh
+ address: "Rastatterstrasse 41, 4057 Basel, Switzerland"
+ - zeller:
+ name: Zeller AG
+ address: "Seeblickstrasse 4, 8590 Romanshorn, Switzerland"
+bibliography: bibliography.bib
+keywords: mutagenicity, (Q)SAR, lazar, random forest, support vector machine, deep learning
+documentclass: scrartcl
+...
+
+Abstract
+========
+
+k-nearest neighbor (`lazar`), random forest, support vector machine and deep
+learning algorithms were applied to a new *Salmonella* mutagenicity dataset
+with 8281 unique chemical structures. Algorithm performance was evaluated
+using 5-fold crossvalidation.
+TODO
+- results
+- conclusion
+
+Introduction
+============
+
+TODO: algo history
+
+TODO: dataset history
+
+TODO: open problems
+
+The main objective of this study was
+
+ - to generate a new training dataset, by combining the most comprehensive public mutagenicity datasets
+ - to compare the performance of global models (RF, SVM, Neural Nets) with local models (`lazar`)
+
+Materials and Methods
+=====================
+
+Data
+----
+
+For all methods, the same training dataset was used. The
+training dataset was compiled from the following sources:
+
+- Kazius/Bursi Dataset (4337 compounds, @Kazius2005):
+
+- Hansen Dataset (6513 compounds, @Hansen2009):
+
+- EFSA Dataset (695 compounds):
+
+Mutagenicity classifications from Kazius and Hansen datasets were used
+without further processing. To achieve consistency with these
+datasets, EFSA compounds were classified as mutagenic, if at least one
+positive result was found for TA98 or T100 Salmonella strains.
+
+Dataset merges were based on unique SMILES (*Simplified Molecular Input
+Line Entry Specification*) strings of the compound structures.
+Duplicated experimental data with the same outcome was merged into a
+single value, because it is likely that it originated from the same
+experiment. Contradictory results were kept as multiple measurements in
+the database. The combined training dataset contains 8281 unique
+structures.
+
+Source code for all data download, extraction and merge operations is
+publicly available from the git repository
+ under a GPL3 License.
+
+TODO: check/fix git repo
+
+Algorithms
+----------
+
+### `lazar`
+
+`lazar` (*lazy structure activity relationships*) is a modular framework
+for read-across model development and validation. It follows the
+following basic workflow: For a given chemical structure `lazar`:
+
+- searches in a database for similar structures (neighbours) with
+ experimental data,
+
+- builds a local QSAR model with these neighbours and
+
+- uses this model to predict the unknown activity of the query
+ compound.
+
+This procedure resembles an automated version of read across predictions
+in toxicology, in machine learning terms it would be classified as a
+k-nearest-neighbour algorithm.
+
+Apart from this basic workflow, `lazar` is completely modular and allows
+the researcher to use any algorithm for similarity searches and local
+QSAR (*Quantitative structure--activity relationship*) modelling.
+Algorithms used within this study are described in the following
+sections.
+
+#### Neighbour identification
+
+Similarity calculations were based on MolPrint2D fingerprints (@Bender2004) from the OpenBabel cheminformatics library
+(@OBoyle2011a). The MolPrint2D fingerprint uses
+atom environments as molecular representation, which resembles basically
+the chemical concept of functional groups. For each atom in a molecule,
+it represents the chemical environment using the atom types of connected
+atoms.
+
+MolPrint2D fingerprints are generated dynamically from chemical
+structures and do not rely on predefined lists of fragments (such as
+OpenBabel FP3, FP4 or MACCs fingerprints or lists of
+toxicophores/toxicophobes). This has the advantage that they may capture
+substructures of toxicological relevance that are not included in other
+fingerprints.
+
+From MolPrint2D fingerprints a feature vector with all atom environments
+of a compound can be constructed that can be used to calculate chemical
+similarities.
+
+The chemical similarity between two compounds a and b is expressed as
+the proportion between atom environments common in both structures A ∩ B
+and the total number of atom environments A U B (Jaccard/Tanimoto
+index).
+
+$$sim = \frac{\left| A\ \cap B \right|}{\left| A\ \cup B \right|}$$
+
+Threshold selection is a trade-off between prediction accuracy (high
+threshold) and the number of predictable compounds (low threshold). As
+it is in many practical cases desirable to make predictions even in the
+absence of closely related neighbours, we follow a tiered approach:
+
+- First a similarity threshold of 0.5 is used to collect neighbours,
+ to create a local QSAR model and to make a prediction for the query
+ compound.
+
+- If any of these steps fails, the procedure is repeated with a
+ similarity threshold of 0.2 and the prediction is flagged with a
+ warning that it might be out of the applicability domain of the
+ training data.
+
+- Similarity thresholds of 0.5 and 0.2 are the default values chosen
+ > by the software developers and remained unchanged during the
+ > course of these experiments.
+
+Compounds with the same structure as the query structure are
+automatically eliminated from neighbours to obtain unbiased predictions
+in the presence of duplicates.
+
+#### Local QSAR models and predictions
+
+Only similar compounds (neighbours) above the threshold are used for
+local QSAR models. In this investigation, we are using a weighted
+majority vote from the neighbour's experimental data for mutagenicity
+classifications. Probabilities for both classes
+(mutagenic/non-mutagenic) are calculated according to the following
+formula and the class with the higher probability is used as prediction
+outcome.
+
+$$p_{c} = \ \frac{\sum_{}^{}\text{sim}_{n,c}}{\sum_{}^{}\text{sim}_{n}}$$
+
+$p_{c}$ Probability of class c (e.g. mutagenic or non-mutagenic)\
+$\sum_{}^{}\text{sim}_{n,c}$ Sum of similarities of neighbours with
+class c\
+$\sum_{}^{}\text{sim}_{n}$ Sum of all neighbours
+
+#### Applicability domain
+
+The applicability domain (AD) of `lazar` models is determined by the
+structural diversity of the training data. If no similar compounds are
+found in the training data no predictions will be generated. Warnings
+are issued if the similarity threshold had to be lowered from 0.5 to 0.2
+in order to enable predictions. Predictions without warnings can be
+considered as close to the applicability domain and predictions with
+warnings as more distant from the applicability domain. Quantitative
+applicability domain information can be obtained from the similarities
+of individual neighbours.
+
+#### Availability
+
+- `lazar` experiments for this manuscript:
+
+ (source code, GPL3)
+
+- `lazar` framework:
+
+ (source code, GPL3)
+
+- `lazar` GUI:
+
+ (source code, GPL3)
+
+- Public web interface:
+
+
+### Random Forest, Support Vector Machines, and Deep Learning in R-project
+
+For the Random Forest (RF), Support Vector Machines (SVM), and Deep
+Learning (DL) models, molecular descriptors were calculated
+with the PaDEL-Descriptors program ( version 2.21, @Yap2011).
+
+TODO: sentence ??
+
+From these descriptors were
+chosen, which were actually used for the generation of the DL model.
+
+
+In comparison to `lazar`, three other models (Random Forest (RF), Support
+Vector Machines (SVM), and Deep Learning (DL)) were evaluated.
+
+For the generation of these models, molecular 1D and 2D descriptors of
+the training dataset were calculated using PaDEL-Descriptors ( version
+2.21, @Yap2011).
+
+As the training dataset contained over 8280 instances, it was decided to
+delete instances with missing values during data pre-processing.
+Furthermore, substances with equivocal outcome were removed. The final
+training dataset contained 8080 instances with known mutagenic
+potential. The RF, SVM, and DL models were generated using the R
+software (R-project for Statistical Computing,
+*;* version 3.3.1), specific R packages used
+are identified for each step in the description below. During feature
+selection, descriptor with near zero variance were removed using
+'*NearZeroVar*'-function (package 'caret'). If the percentage of the
+most common value was more than 90% or when the frequency ratio of the
+most common value to the second most common value was greater than 95:5
+(e.g. 95 instances of the most common value and only 5 or less instances
+of the second most common value), a descriptor was classified as having
+a near zero variance. After that, highly correlated descriptors were
+removed using the '*findCorrelation*'-function (package 'caret') with a
+cut-off of 0.9. This resulted in a training dataset with 516
+descriptors. These descriptors were scaled to be in the range between 0
+and 1 using the '*preProcess*'-function (package 'caret'). The scaling
+routine was saved in order to apply the same scaling on the testing
+dataset. As these three steps did not consider the outcome, it was
+decided that they do not need to be included in the cross-validation of
+the model. To further reduce the number of features, a LASSO (*least
+absolute shrinkage and selection operator*) regression was performed
+using the '*glmnet*'-function (package '*glmnet*'). The reduced dataset
+was used for the generation of the pre-trained models.
+
+For the RF model, the '*randomForest*'-function (package
+'*randomForest*') was used. A forest with 1000 trees with maximal
+terminal nodes of 200 was grown for the prediction.
+
+The '*svm*'-function (package 'e1071') with a *radial basis function
+kernel* was used for the SVM model.
+
+The DL model was generated using the '*h2o.deeplearning*'-function
+(package '*h2o*'). The DL contained four hidden layer with 70, 50, 50,
+and 10 neurons, respectively. Other hyperparameter were set as follows:
+l1=1.0E-7, l2=1.0E-11, epsilon = 1.0E-10, rho = 0.8, and quantile\_alpha
+= 0.5. For all other hyperparameter, the default values were used.
+Weights and biases were in a first step determined with an unsupervised
+DL model. These values were then used for the actual, supervised DL
+model.
+
+To validate these models, an internal cross-validation approach was
+chosen. The training dataset was randomly split in training data, which
+contained 95% of the data, and validation data, which contain 5% of the
+data. A feature selection with LASSO on the training data was performed,
+reducing the number of descriptors to approximately 100. This step was
+repeated five times. Based on each of the five different training data,
+the predictive models were trained and the performance tested with the
+validation data. This step was repeated 10 times. Furthermore, a
+y-randomisation using the RF model was performed. During
+y-randomisation, the outcome (y-variable) is randomly permuted. The
+theory is that after randomisation of the outcome, the model should not
+be able to correlate the outcome to the properties (descriptor values)
+of the substances. The performance of the model should therefore
+indicate a by change prediction with an accuracy of about 50%. If this
+is true, it can be concluded that correlation between actual outcome and
+properties of the substances is real and not by chance (@Rücker2007).
+
+![](media/image1.png){width="6.26875in" height="5.486111111111111in"}
+
+Figure 1: Flowchart of the generation and validation of the models
+generated in R-project
+
+#### Applicability domain
+
+The AD of the training dataset and the PA dataset was evaluated using
+the Jaccard distance. A Jaccard distance of '0' indicates that the
+substances are similar, whereas a value of '1' shows that the substances
+are different. The Jaccard distance was below 0.2 for all PAs relative
+to the training dataset. Therefore, PA dataset is within the AD of the
+training dataset and the models can be used to predict the genotoxic
+potential of the PA dataset.
+
+#### y-randomisation
+
+After y-randomisation of the outcome, the accuracy and CCR are around
+50%, indicating a chance in the distribution of the results. This shows,
+that the outcome is actually related to the predictors and not by
+chance.
+
+### Deep Learning in TensorFlow
+
+Alternatively, a DL model was established with Python-based TensorFlow
+program () using the high-level API Keras
+() to build the models.
+
+Data pre-processing was done by rank transformation using the
+'*QuantileTransformer*' procedure. A sequential model has been used.
+Four layers have been used: input layer, two hidden layers (with 12, 8
+and 8 nodes, respectively) and one output layer. For the output layer, a
+sigmoidal activation function and for all other layers the ReLU
+('*Rectified Linear Unit*') activation function was used. Additionally,
+a L^2^-penalty of 0.001 was used for the input layer. For training of
+the model, the ADAM algorithm was used to minimise the cross-entropy
+loss using the default parameters of Keras. Training was performed for
+100 epochs with a batch size of 64. The model was implemented with
+Python 3.6 and Keras. For training of the model, a 6-fold
+cross-validation was used. Accuracy was estimated by ROC-AUC and
+confusion matrix.
+
+Validation
+----------
+
+Results
+=======
+
+`lazar`
+-----
+
+Random Forest
+-------------
+
+The validation showed that the RF model has an accuracy of 64%, a
+sensitivity of 66% and a specificity of 63%. The confusion matrix of the
+model, calculated for 8080 instances, is provided in Table 1.
+
+Table 1: Confusion matrix of the RF model
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2274 1163 3437
+ ***TN*** 1736 2907 4643
+ ***Total*** 4010 4070 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+Support Vector Machines
+-----------------------
+
+The validation showed that the SVM model has an accuracy of 62%, a
+sensitivity of 65% and a specificity of 60%. The confusion matrix of SVM
+model, calculated for 8080 instances, is provided in Table 2.
+
+Table 2: Confusion matrix of the SVM model
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2057 1107 3164
+ ***TN*** 1953 2963 4916
+ ***Total*** 4010 4070 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+Deep Learning (R-project)
+-------------------------
+
+The validation showed that the DL model generated in R has an accuracy
+of 59%, a sensitivity of 89% and a specificity of 30%. The confusion
+matrix of the model, normalised to 8080 instances, is provided in Table
+3.
+
+Table 3: Confusion matrix of the DL model (R-project)
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 3575 435 4010
+ ***TN*** 2853 1217 4070
+ ***Total*** 6428 1652 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+DL model (TensorFlow)
+---------------------
+
+The validation showed that the DL model generated in TensorFlow has an
+accuracy of 68%, a sensitivity of 70% and a specificity of 46%. The
+confusion matrix of the model, normalised to 8080 instances, is provided
+in Table 4.
+
+Table 4: Confusion matrix of the DL model (TensorFlow)
+
+ Predicted genotoxicity
+ ----------------------- ------------------------ ---------- ---------- -------------
+ Measured genotoxicity ***PP*** ***PN*** ***Total***
+ ***TP*** 2851 1227 4078
+ ***TN*** 1825 2177 4002
+ ***Total*** 4676 3404 8080
+
+PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
+True negative
+
+The ROC curves from the 6-fold validation are shown in Figure 7.
+
+![](media/image7.png){width="3.825in"
+height="2.7327045056867894in"}
+
+Figure 7: Six-fold cross-validation of TensorFlow DL model show an
+average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68%.
+
+In summary, the validation results of the four methods are presented in
+the following table.
+
+Table 5 Results of the cross-validation of the four models and after
+y-randomisation
+
+ ----------------------------------------------------------------------
+ Accuracy CCR Sensitivity Specificity
+ ----------------------- ---------- ------- ------------- -------------
+ RF model 64.1% 64.4% 66.2% 62.6%
+
+ SVM model 62.1% 62.6% 65.0% 60.3%
+
+ DL model\ 59.3% 59.5% 89.2% 29.9%
+ (R-project)
+
+ DL model (TensorFlow) 68% 62.2% 69.9% 45.6%
+
+ y-randomisation 50.5% 50.4% 50.3% 50.6%
+ ----------------------------------------------------------------------
+
+CCR (correct classification rate)
+
+Discussion
+==========
+
+General model performance
+
+Based on the results of the cross-validation for all models, `lazar`, RF,
+SVM, DL (R-project) and DL (TensorFlow) it can be state that the
+prediction results are not optimal due to different reasons. The
+accuracy as measured during cross-validation of the four models (RF,
+SVM, DL (R-project and TensorFlow)) was partly low with CCR values
+between 59.3 and 68%, with the R-generated DL model and the
+TensorFlow-generated DL model showing the worst and the best
+performance, respectively. The validation of the R-generated DL model
+revealed a high sensitivity (89.2%) but an unacceptably low specificity
+of 29.9% indicating a high number of false positive estimates. The
+TensorFlow-generated DL model, however, showed an acceptable but not
+optimal accuracy of 68%, a sensitivity of 69.9% and a specificity of
+45.6%. The low specificity indicates that both DL models tends to
+predict too many instances as positive (genotoxic), and therefore have a
+high false positive rate. This allows at least with the TensorFlow
+generated DL model to make group statements, but the confidence for
+estimations of single PAs appears to be insufficiently low.
+
+Several factors have likely contributed to the low to moderate
+performance of the used methods as shown during the cross-validation:
+
+1. The outcome in the training dataset was based on the results of AMES
+ tests for genotoxicity [ICH 2011](#_ENREF_63)(), an *in vitro* test
+ in different strains of the bacteria *Salmonella typhimurium*. In
+ this test, mutagenicity is evaluated with and without prior
+ metabolic activation of the test substance. Metabolic activation
+ could result in the formation of genotoxic metabolites from
+ non-genotoxic parent compounds. However, no distinction was made in
+ the training dataset between substances that needed metabolic
+ activation before being mutagenic and those that were mutagenic
+ without metabolic activation. `lazar` is able to handle this
+ 'inaccuracy' in the training dataset well due to the way the
+ algorithm works: `lazar` predicts the genotoxic potential based on the
+ neighbours of substances with comparable structural features,
+ considering mutagenic and not mutagenic neighbours. Based on the
+ structural similarity, a probability for mutagenicity and no
+ mutagenicity is calculated independently from each other (meaning
+ that the sum of probabilities does not necessarily adds up to 100%).
+ The class with the higher outcome is then the overall outcome for
+ the substance.
+
+> In contrast, the other models need to be trained first to recognise
+> the structural features that are responsible for genotoxicity.
+> Therefore, the mixture of substances being mutagenic with and without
+> metabolic activation in the training dataset may have adversely
+> affected the ability to separate the dataset in two distinct classes
+> and thus explains the relatively low performance of these models.
+
+2. Machine learning algorithms try to find an optimized solution in a
+ high-dimensional (one dimension per each predictor) space. Sometimes
+ these methods do not find the global optimum of estimates but only
+ local (not optimal) solutions. Strategies to find the global
+ solutions are systematic variation (grid search) of the
+ hyperparameters of the methods, which may be very time consuming in
+ particular in large datasets.
+
+
+Conclusions
+===========
+
+In this study, an attempt was made to predict the genotoxic potential of
+PAs using five different machine learning techniques (`lazar`, RF, SVM, DL
+(R-project and TensorFlow). The results of all models fitted only partly
+to the findings in literature, with best results obtained with the
+TensorFlow DL model. Therefore, modelling allows statements on the
+relative risks of genotoxicity of the different PA groups. Individual
+predictions for selective PAs appear, however, not reliable on the
+current basis of the used training dataset.
+
+This study emphasises the importance of critical assessment of
+predictions by QSAR models. This includes not only extensive literature
+research to assess the plausibility of the predictions, but also a good
+knowledge of the metabolism of the test substances and understanding for
+possible mechanisms of toxicity.
+
+In further studies, additional machine learning techniques or a modified
+(extended) training dataset should be used for an additional attempt to
+predict the genotoxic potential of PAs.
+
+References
+==========
diff --git a/paper/outfile.docx b/paper/outfile.docx
new file mode 100644
index 0000000..dc3070c
Binary files /dev/null and b/paper/outfile.docx differ
diff --git a/paper/outfile.enriched.json b/paper/outfile.enriched.json
new file mode 100644
index 0000000..a547003
--- /dev/null
+++ b/paper/outfile.enriched.json
@@ -0,0 +1 @@
+{"blocks":[{"t":"Header","c":[1,["introduction",[],[]],[{"t":"Str","c":"Introduction"}]]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"algo"},{"t":"Space"},{"t":"Str","c":"history"}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"history"}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"open"},{"t":"Space"},{"t":"Str","c":"problems"}]},{"t":"Header","c":[1,["materials-and-methods",[],[]],[{"t":"Str","c":"Materials"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Methods"}]]},{"t":"Header","c":[2,["mutagenicity-data",[],[]],[{"t":"Str","c":"Mutagenicity"},{"t":"Space"},{"t":"Str","c":"data"}]]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"methods,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"compiled"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"Space"},{"t":"Str","c":"sources:"}]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Str","c":"Kazius/Bursi"},{"t":"Space"},{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"(4337"},{"t":"Space"},{"t":"Str","c":"compounds,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Kazius2005","citationHash":0}],[{"t":"Str","c":"@Kazius2005"}]]},{"t":"Str","c":"):"},{"t":"Space"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://cheminformatics.org/datasets/bursi/cas_4337.zip"}],["http://cheminformatics.org/datasets/bursi/cas_4337.zip",""]]}]}],[{"t":"Para","c":[{"t":"Str","c":"Hansen"},{"t":"Space"},{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"(6513"},{"t":"Space"},{"t":"Str","c":"compounds,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Hansen2009","citationHash":0}],[{"t":"Str","c":"@Hansen2009"}]]},{"t":"Str","c":"):"},{"t":"Space"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"}],["http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv",""]]}]}],[{"t":"Para","c":[{"t":"Str","c":"EFSA"},{"t":"Space"},{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"(695"},{"t":"Space"},{"t":"Str","c":"compounds):"},{"t":"Space"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls"}],["https://data.europa.eu/euodp/data/storage/f/2017-0719T142131/GENOTOX%20data%20and%20dictionary.xls",""]]}]}]]},{"t":"Para","c":[{"t":"Str","c":"Mutagenicity"},{"t":"Space"},{"t":"Str","c":"classifications"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"Kazius"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Hansen"},{"t":"Space"},{"t":"Str","c":"datasets"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"SoftBreak"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"further"},{"t":"Space"},{"t":"Str","c":"processing."},{"t":"Space"},{"t":"Str","c":"To"},{"t":"Space"},{"t":"Str","c":"achieve"},{"t":"Space"},{"t":"Str","c":"consistency"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"SoftBreak"},{"t":"Str","c":"datasets,"},{"t":"Space"},{"t":"Str","c":"EFSA"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"classified"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"mutagenic,"},{"t":"Space"},{"t":"Str","c":"if"},{"t":"Space"},{"t":"Str","c":"at"},{"t":"Space"},{"t":"Str","c":"least"},{"t":"Space"},{"t":"Str","c":"one"},{"t":"SoftBreak"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"result"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"found"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"TA98"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"T100"},{"t":"Space"},{"t":"Str","c":"Salmonella"},{"t":"Space"},{"t":"Str","c":"strains."}]},{"t":"Para","c":[{"t":"Str","c":"Dataset"},{"t":"Space"},{"t":"Str","c":"merges"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"unique"},{"t":"Space"},{"t":"Str","c":"SMILES"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"Simplified"},{"t":"Space"},{"t":"Str","c":"Molecular"},{"t":"Space"},{"t":"Str","c":"Input"},{"t":"SoftBreak"},{"t":"Str","c":"Line"},{"t":"Space"},{"t":"Str","c":"Entry"},{"t":"Space"},{"t":"Str","c":"Specification"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"strings"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"compound"},{"t":"Space"},{"t":"Str","c":"structures."},{"t":"SoftBreak"},{"t":"Str","c":"Duplicated"},{"t":"Space"},{"t":"Str","c":"experimental"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"merged"},{"t":"Space"},{"t":"Str","c":"into"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"single"},{"t":"Space"},{"t":"Str","c":"value,"},{"t":"Space"},{"t":"Str","c":"because"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"likely"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"originated"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"SoftBreak"},{"t":"Str","c":"experiment."},{"t":"Space"},{"t":"Str","c":"Contradictory"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"kept"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"multiple"},{"t":"Space"},{"t":"Str","c":"measurements"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"database."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"combined"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"contains"},{"t":"Space"},{"t":"Str","c":"8281"},{"t":"Space"},{"t":"Str","c":"unique"},{"t":"SoftBreak"},{"t":"Str","c":"structures."}]},{"t":"Para","c":[{"t":"Str","c":"Source"},{"t":"Space"},{"t":"Str","c":"code"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"download,"},{"t":"Space"},{"t":"Str","c":"extraction"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"merge"},{"t":"Space"},{"t":"Str","c":"operations"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"SoftBreak"},{"t":"Str","c":"publicly"},{"t":"Space"},{"t":"Str","c":"available"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"git"},{"t":"Space"},{"t":"Str","c":"repository"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/pyrrolizidine"}],["https://git.in-silico.ch/pyrrolizidine",""]]},{"t":"Space"},{"t":"Str","c":"under"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"GPL3"},{"t":"Space"},{"t":"Str","c":"License."}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"check/fix"},{"t":"Space"},{"t":"Str","c":"git"},{"t":"Space"},{"t":"Str","c":"repo"}]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"Random"},{"t":"Space"},{"t":"Str","c":"Forest"},{"t":"Space"},{"t":"Str","c":"(RF),"},{"t":"Space"},{"t":"Str","c":"Support"},{"t":"Space"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines"},{"t":"Space"},{"t":"Str","c":"(SVM),"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Deep"},{"t":"SoftBreak"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"(DL)"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Str","c":"molecular"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"SoftBreak"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"PaDEL-Descriptors"},{"t":"Space"},{"t":"Str","c":"program"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://www.yapcwsoft.com"}],["http://www.yapcwsoft.com",""]]},{"t":"Space"},{"t":"Str","c":"version"},{"t":"Space"},{"t":"Str","c":"2.21,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Yap2011","citationHash":0}],[{"t":"Str","c":"@Yap2011"}]]},{"t":"Str","c":")."}]},{"t":"Para","c":[{"t":"Str","c":"TODO:"},{"t":"Space"},{"t":"Str","c":"sentence"},{"t":"Space"},{"t":"Str","c":"??"}]},{"t":"Para","c":[{"t":"Str","c":"From"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"SoftBreak"},{"t":"Str","c":"chosen,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"actually"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model."}]},{"t":"Header","c":[2,["algorithms",[],[]],[{"t":"Str","c":"Algorithms"}]]},{"t":"Header","c":[3,["lazar",[],[]],[{"t":"Code","c":[["",[],[]],"lazar"]}]]},{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"lazy"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Str","c":"activity"},{"t":"Space"},{"t":"Str","c":"relationships"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"modular"},{"t":"Space"},{"t":"Str","c":"framework"},{"t":"SoftBreak"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"read-across"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"development"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"validation."},{"t":"Space"},{"t":"Str","c":"It"},{"t":"Space"},{"t":"Str","c":"follows"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"following"},{"t":"Space"},{"t":"Str","c":"basic"},{"t":"Space"},{"t":"Str","c":"workflow:"},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"given"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":":"}]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Str","c":"searches"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"database"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"similar"},{"t":"Space"},{"t":"Str","c":"structures"},{"t":"Space"},{"t":"Str","c":"(neighbours)"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"experimental"},{"t":"Space"},{"t":"Str","c":"data,"}]}],[{"t":"Para","c":[{"t":"Str","c":"builds"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"and"}]}],[{"t":"Para","c":[{"t":"Str","c":"uses"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"unknown"},{"t":"Space"},{"t":"Str","c":"activity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"query"},{"t":"SoftBreak"},{"t":"Str","c":"compound."}]}]]},{"t":"Para","c":[{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"procedure"},{"t":"Space"},{"t":"Str","c":"resembles"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"automated"},{"t":"Space"},{"t":"Str","c":"version"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"read"},{"t":"Space"},{"t":"Str","c":"across"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"toxicology,"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"terms"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"would"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"classified"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"k-nearest-neighbour"},{"t":"Space"},{"t":"Str","c":"algorithm."}]},{"t":"Para","c":[{"t":"Str","c":"Apart"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"basic"},{"t":"Space"},{"t":"Str","c":"workflow,"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"completely"},{"t":"Space"},{"t":"Str","c":"modular"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"allows"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"researcher"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"use"},{"t":"Space"},{"t":"Str","c":"any"},{"t":"Space"},{"t":"Str","c":"algorithm"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"searches"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"local"},{"t":"SoftBreak"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"Quantitative"},{"t":"Space"},{"t":"Str","c":"structure–activity"},{"t":"Space"},{"t":"Str","c":"relationship"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"modelling."},{"t":"SoftBreak"},{"t":"Str","c":"Algorithms"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"within"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"study"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"described"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"SoftBreak"},{"t":"Str","c":"sections."}]},{"t":"Header","c":[4,["neighbour-identification",[],[]],[{"t":"Str","c":"Neighbour"},{"t":"Space"},{"t":"Str","c":"identification"}]]},{"t":"Para","c":[{"t":"Str","c":"Similarity"},{"t":"Space"},{"t":"Str","c":"calculations"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Bender2004","citationHash":0}],[{"t":"Str","c":"@Bender2004"}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"OpenBabel"},{"t":"Space"},{"t":"Str","c":"cheminformatics"},{"t":"Space"},{"t":"Str","c":"library"},{"t":"SoftBreak"},{"t":"Str","c":"("},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"OBoyle2011a","citationHash":0}],[{"t":"Str","c":"@OBoyle2011a"}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprint"},{"t":"Space"},{"t":"Str","c":"uses"},{"t":"SoftBreak"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"molecular"},{"t":"Space"},{"t":"Str","c":"representation,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"resembles"},{"t":"Space"},{"t":"Str","c":"basically"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"concept"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"functional"},{"t":"Space"},{"t":"Str","c":"groups."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"molecule,"},{"t":"SoftBreak"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"represents"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"environment"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"types"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"connected"},{"t":"SoftBreak"},{"t":"Str","c":"atoms."}]},{"t":"Para","c":[{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"dynamically"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"SoftBreak"},{"t":"Str","c":"structures"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"do"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"rely"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"predefined"},{"t":"Space"},{"t":"Str","c":"lists"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"fragments"},{"t":"Space"},{"t":"Str","c":"(such"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"SoftBreak"},{"t":"Str","c":"OpenBabel"},{"t":"Space"},{"t":"Str","c":"FP3,"},{"t":"Space"},{"t":"Str","c":"FP4"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"MACCs"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"lists"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"toxicophores/toxicophobes)."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"advantage"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"they"},{"t":"Space"},{"t":"Str","c":"may"},{"t":"Space"},{"t":"Str","c":"capture"},{"t":"SoftBreak"},{"t":"Str","c":"substructures"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"toxicological"},{"t":"Space"},{"t":"Str","c":"relevance"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"included"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"SoftBreak"},{"t":"Str","c":"fingerprints."}]},{"t":"Para","c":[{"t":"Str","c":"From"},{"t":"Space"},{"t":"Str","c":"MolPrint2D"},{"t":"Space"},{"t":"Str","c":"fingerprints"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"feature"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"compound"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"constructed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"calculate"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"SoftBreak"},{"t":"Str","c":"similarities."}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"two"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"b"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"expressed"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"proportion"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"both"},{"t":"Space"},{"t":"Str","c":"structures"},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"∩"},{"t":"Space"},{"t":"Str","c":"B"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"total"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"atom"},{"t":"Space"},{"t":"Str","c":"environments"},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"U"},{"t":"Space"},{"t":"Str","c":"B"},{"t":"Space"},{"t":"Str","c":"(Jaccard/Tanimoto"},{"t":"SoftBreak"},{"t":"Str","c":"index)."}]},{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"},"sim = \\frac{\\left| A\\ \\cap B \\right|}{\\left| A\\ \\cup B \\right|}"]}]},{"t":"Para","c":[{"t":"Str","c":"Threshold"},{"t":"Space"},{"t":"Str","c":"selection"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"trade-off"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"(high"},{"t":"SoftBreak"},{"t":"Str","c":"threshold)"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"predictable"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"(low"},{"t":"Space"},{"t":"Str","c":"threshold)."},{"t":"Space"},{"t":"Str","c":"As"},{"t":"SoftBreak"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"many"},{"t":"Space"},{"t":"Str","c":"practical"},{"t":"Space"},{"t":"Str","c":"cases"},{"t":"Space"},{"t":"Str","c":"desirable"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"make"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"even"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"absence"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"closely"},{"t":"Space"},{"t":"Str","c":"related"},{"t":"Space"},{"t":"Str","c":"neighbours,"},{"t":"Space"},{"t":"Str","c":"we"},{"t":"Space"},{"t":"Str","c":"follow"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"tiered"},{"t":"Space"},{"t":"Str","c":"approach:"}]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Str","c":"First"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.5"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"collect"},{"t":"Space"},{"t":"Str","c":"neighbours,"},{"t":"SoftBreak"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"create"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"make"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"query"},{"t":"SoftBreak"},{"t":"Str","c":"compound."}]}],[{"t":"Para","c":[{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"any"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"steps"},{"t":"Space"},{"t":"Str","c":"fails,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"procedure"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"repeated"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"flagged"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"warning"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"might"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"out"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data."}]}],[{"t":"Para","c":[{"t":"Str","c":"Similarity"},{"t":"Space"},{"t":"Str","c":"thresholds"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.5"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"default"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"chosen"},{"t":"SoftBreak"},{"t":"Str","c":">"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"software"},{"t":"Space"},{"t":"Str","c":"developers"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"remained"},{"t":"Space"},{"t":"Str","c":"unchanged"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":">"},{"t":"Space"},{"t":"Str","c":"course"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"experiments."}]}]]},{"t":"Para","c":[{"t":"Str","c":"Compounds"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"query"},{"t":"Space"},{"t":"Str","c":"structure"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"SoftBreak"},{"t":"Str","c":"automatically"},{"t":"Space"},{"t":"Str","c":"eliminated"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"obtain"},{"t":"Space"},{"t":"Str","c":"unbiased"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"presence"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"duplicates."}]},{"t":"Header","c":[4,["local-qsar-models-and-predictions",[],[]],[{"t":"Str","c":"Local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"predictions"}]]},{"t":"Para","c":[{"t":"Str","c":"Only"},{"t":"Space"},{"t":"Str","c":"similar"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"(neighbours)"},{"t":"Space"},{"t":"Str","c":"above"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"models."},{"t":"Space"},{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"investigation,"},{"t":"Space"},{"t":"Str","c":"we"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"weighted"},{"t":"SoftBreak"},{"t":"Str","c":"majority"},{"t":"Space"},{"t":"Str","c":"vote"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"neighbour’s"},{"t":"Space"},{"t":"Str","c":"experimental"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"SoftBreak"},{"t":"Str","c":"classifications."},{"t":"Space"},{"t":"Str","c":"Probabilities"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"both"},{"t":"Space"},{"t":"Str","c":"classes"},{"t":"SoftBreak"},{"t":"Str","c":"(mutagenic/non-mutagenic)"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"according"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"SoftBreak"},{"t":"Str","c":"formula"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"higher"},{"t":"Space"},{"t":"Str","c":"probability"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"SoftBreak"},{"t":"Str","c":"outcome."}]},{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"},"p_{c} = \\ \\frac{\\sum_{}^{}\\text{sim}_{n,c}}{\\sum_{}^{}\\text{sim}_{n}}"]}]},{"t":"Para","c":[{"t":"Math","c":[{"t":"InlineMath"},"p_{c}"]},{"t":"Space"},{"t":"Str","c":"Probability"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"c"},{"t":"Space"},{"t":"Str","c":"(e.g. mutagenic"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"non-mutagenic)"},{"t":"LineBreak"},{"t":"Math","c":[{"t":"InlineMath"},"\\sum_{}^{}\\text{sim}_{n,c}"]},{"t":"Space"},{"t":"Str","c":"Sum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"similarities"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"c"},{"t":"LineBreak"},{"t":"Math","c":[{"t":"InlineMath"},"\\sum_{}^{}\\text{sim}_{n}"]},{"t":"Space"},{"t":"Str","c":"Sum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"neighbours"}]},{"t":"Header","c":[4,["applicability-domain",[],[]],[{"t":"Str","c":"Applicability"},{"t":"Space"},{"t":"Str","c":"domain"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"(AD)"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"determined"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"diversity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data."},{"t":"Space"},{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"Space"},{"t":"Str","c":"similar"},{"t":"Space"},{"t":"Str","c":"compounds"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"SoftBreak"},{"t":"Str","c":"found"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"will"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"generated."},{"t":"Space"},{"t":"Str","c":"Warnings"},{"t":"SoftBreak"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"issued"},{"t":"Space"},{"t":"Str","c":"if"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"similarity"},{"t":"Space"},{"t":"Str","c":"threshold"},{"t":"Space"},{"t":"Str","c":"had"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"lowered"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"0.5"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"order"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"enable"},{"t":"Space"},{"t":"Str","c":"predictions."},{"t":"Space"},{"t":"Str","c":"Predictions"},{"t":"Space"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"warnings"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"SoftBreak"},{"t":"Str","c":"considered"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"close"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"warnings"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"more"},{"t":"Space"},{"t":"Str","c":"distant"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain."},{"t":"Space"},{"t":"Str","c":"Quantitative"},{"t":"SoftBreak"},{"t":"Str","c":"applicability"},{"t":"Space"},{"t":"Str","c":"domain"},{"t":"Space"},{"t":"Str","c":"information"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"obtained"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"similarities"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"individual"},{"t":"Space"},{"t":"Str","c":"neighbours."}]},{"t":"Header","c":[4,["availability",[],[]],[{"t":"Str","c":"Availability"}]]},{"t":"BulletList","c":[[{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"experiments"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"manuscript:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/pyrrolizidine"}],["https://git.in-silico.ch/pyrrolizidine",""]]},{"t":"SoftBreak"},{"t":"Str","c":"(source"},{"t":"Space"},{"t":"Str","c":"code,"},{"t":"Space"},{"t":"Str","c":"GPL3)"}]}],[{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"framework:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/lazar"}],["https://git.in-silico.ch/lazar",""]]},{"t":"SoftBreak"},{"t":"Str","c":"(source"},{"t":"Space"},{"t":"Str","c":"code,"},{"t":"Space"},{"t":"Str","c":"GPL3)"}]}],[{"t":"Para","c":[{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"GUI:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://git.in-silico.ch/lazar-gui"}],["https://git.in-silico.ch/lazar-gui",""]]},{"t":"SoftBreak"},{"t":"Str","c":"(source"},{"t":"Space"},{"t":"Str","c":"code,"},{"t":"Space"},{"t":"Str","c":"GPL3)"}]}],[{"t":"Para","c":[{"t":"Str","c":"Public"},{"t":"Space"},{"t":"Str","c":"web"},{"t":"Space"},{"t":"Str","c":"interface:"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://lazar.in-silico.ch"}],["https://lazar.in-silico.ch",""]]}]}]]},{"t":"Header","c":[3,["random-forest-support-vector-machines-and-deep-learning-in-r-project",[],[]],[{"t":"Str","c":"Random"},{"t":"Space"},{"t":"Str","c":"Forest,"},{"t":"Space"},{"t":"Str","c":"Support"},{"t":"Space"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"R-project"}]]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"comparison"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":","},{"t":"Space"},{"t":"Str","c":"three"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"(Random"},{"t":"Space"},{"t":"Str","c":"Forest"},{"t":"Space"},{"t":"Str","c":"(RF),"},{"t":"Space"},{"t":"Str","c":"Support"},{"t":"SoftBreak"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines"},{"t":"Space"},{"t":"Str","c":"(SVM),"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"(DL))"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"evaluated."}]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Str","c":"molecular"},{"t":"Space"},{"t":"Str","c":"1D"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"2D"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"PaDEL-Descriptors"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"http://www.yapcwsoft.com"}],["http://www.yapcwsoft.com",""]]},{"t":"Space"},{"t":"Str","c":"version"},{"t":"SoftBreak"},{"t":"Str","c":"2.21,"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Yap2011","citationHash":0}],[{"t":"Str","c":"@Yap2011"}]]},{"t":"Str","c":")."}]},{"t":"Para","c":[{"t":"Str","c":"As"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"over"},{"t":"Space"},{"t":"Str","c":"8280"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"decided"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"SoftBreak"},{"t":"Str","c":"delete"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"missing"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"pre-processing."},{"t":"SoftBreak"},{"t":"Str","c":"Furthermore,"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"equivocal"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"removed."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"final"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"known"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"SoftBreak"},{"t":"Str","c":"potential."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"RF,"},{"t":"Space"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"R"},{"t":"SoftBreak"},{"t":"Str","c":"software"},{"t":"Space"},{"t":"Str","c":"(R-project"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"Statistical"},{"t":"Space"},{"t":"Str","c":"Computing,"},{"t":"SoftBreak"},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://www.r-project.org/"}],["https://www.r-project.org/",""]]},{"t":"Emph","c":[{"t":"Str","c":";"}]},{"t":"Space"},{"t":"Str","c":"version"},{"t":"Space"},{"t":"Str","c":"3.3.1),"},{"t":"Space"},{"t":"Str","c":"specific"},{"t":"Space"},{"t":"Str","c":"R"},{"t":"Space"},{"t":"Str","c":"packages"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"SoftBreak"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"identified"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"description"},{"t":"Space"},{"t":"Str","c":"below."},{"t":"Space"},{"t":"Str","c":"During"},{"t":"Space"},{"t":"Str","c":"feature"},{"t":"SoftBreak"},{"t":"Str","c":"selection,"},{"t":"Space"},{"t":"Str","c":"descriptor"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"near"},{"t":"Space"},{"t":"Str","c":"zero"},{"t":"Space"},{"t":"Str","c":"variance"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"removed"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"NearZeroVar"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"caret"}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"percentage"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"more"},{"t":"Space"},{"t":"Str","c":"than"},{"t":"Space"},{"t":"Str","c":"90%"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"when"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"frequency"},{"t":"Space"},{"t":"Str","c":"ratio"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"second"},{"t":"Space"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"greater"},{"t":"Space"},{"t":"Str","c":"than"},{"t":"Space"},{"t":"Str","c":"95:5"},{"t":"SoftBreak"},{"t":"Str","c":"(e.g. 95"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"Space"},{"t":"Str","c":"5"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"less"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"second"},{"t":"Space"},{"t":"Str","c":"most"},{"t":"Space"},{"t":"Str","c":"common"},{"t":"Space"},{"t":"Str","c":"value),"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"descriptor"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"classified"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"having"},{"t":"SoftBreak"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"near"},{"t":"Space"},{"t":"Str","c":"zero"},{"t":"Space"},{"t":"Str","c":"variance."},{"t":"Space"},{"t":"Str","c":"After"},{"t":"Space"},{"t":"Str","c":"that,"},{"t":"Space"},{"t":"Str","c":"highly"},{"t":"Space"},{"t":"Str","c":"correlated"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"SoftBreak"},{"t":"Str","c":"removed"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"findCorrelation"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"caret"}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"cut-off"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.9."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"resulted"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"516"},{"t":"SoftBreak"},{"t":"Str","c":"descriptors."},{"t":"Space"},{"t":"Str","c":"These"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"scaled"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"range"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"0"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"1"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"preProcess"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"caret"}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"scaling"},{"t":"SoftBreak"},{"t":"Str","c":"routine"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"saved"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"order"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"apply"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"same"},{"t":"Space"},{"t":"Str","c":"scaling"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"testing"},{"t":"SoftBreak"},{"t":"Str","c":"dataset."},{"t":"Space"},{"t":"Str","c":"As"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"three"},{"t":"Space"},{"t":"Str","c":"steps"},{"t":"Space"},{"t":"Str","c":"did"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"consider"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome,"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"SoftBreak"},{"t":"Str","c":"decided"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"they"},{"t":"Space"},{"t":"Str","c":"do"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"need"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"included"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model."},{"t":"Space"},{"t":"Str","c":"To"},{"t":"Space"},{"t":"Str","c":"further"},{"t":"Space"},{"t":"Str","c":"reduce"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"features,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"LASSO"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Emph","c":[{"t":"Str","c":"least"},{"t":"SoftBreak"},{"t":"Str","c":"absolute"},{"t":"Space"},{"t":"Str","c":"shrinkage"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"selection"},{"t":"Space"},{"t":"Str","c":"operator"}]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"regression"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed"},{"t":"SoftBreak"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"glmnet"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"glmnet"}]}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"reduced"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"SoftBreak"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"pre-trained"},{"t":"Space"},{"t":"Str","c":"models."}]},{"t":"Para","c":[{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"randomForest"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"randomForest"}]}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"forest"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"1000"},{"t":"Space"},{"t":"Str","c":"trees"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"maximal"},{"t":"SoftBreak"},{"t":"Str","c":"terminal"},{"t":"Space"},{"t":"Str","c":"nodes"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"200"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"grown"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"prediction."}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"svm"}]}]]},{"t":"Str","c":"-function"},{"t":"Space"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"e1071"}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"radial"},{"t":"Space"},{"t":"Str","c":"basis"},{"t":"Space"},{"t":"Str","c":"function"},{"t":"SoftBreak"},{"t":"Str","c":"kernel"}]},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model."}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"h2o.deeplearning"}]}]]},{"t":"Str","c":"-function"},{"t":"SoftBreak"},{"t":"Str","c":"(package"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"h2o"}]}]]},{"t":"Str","c":")."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"hidden"},{"t":"Space"},{"t":"Str","c":"layer"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"70,"},{"t":"Space"},{"t":"Str","c":"50,"},{"t":"Space"},{"t":"Str","c":"50,"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"10"},{"t":"Space"},{"t":"Str","c":"neurons,"},{"t":"Space"},{"t":"Str","c":"respectively."},{"t":"Space"},{"t":"Str","c":"Other"},{"t":"Space"},{"t":"Str","c":"hyperparameter"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"set"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"follows:"},{"t":"SoftBreak"},{"t":"Str","c":"l1=1.0E-7,"},{"t":"Space"},{"t":"Str","c":"l2=1.0E-11,"},{"t":"Space"},{"t":"Str","c":"epsilon"},{"t":"Space"},{"t":"Str","c":"="},{"t":"Space"},{"t":"Str","c":"1.0E-10,"},{"t":"Space"},{"t":"Str","c":"rho"},{"t":"Space"},{"t":"Str","c":"="},{"t":"Space"},{"t":"Str","c":"0.8,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"quantile_alpha"},{"t":"SoftBreak"},{"t":"Str","c":"="},{"t":"Space"},{"t":"Str","c":"0.5."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"hyperparameter,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"default"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"SoftBreak"},{"t":"Str","c":"Weights"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"biases"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"first"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"determined"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"unsupervised"},{"t":"SoftBreak"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model."},{"t":"Space"},{"t":"Str","c":"These"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"then"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"actual,"},{"t":"Space"},{"t":"Str","c":"supervised"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"SoftBreak"},{"t":"Str","c":"model."}]},{"t":"Para","c":[{"t":"Str","c":"To"},{"t":"Space"},{"t":"Str","c":"validate"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"internal"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"approach"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"SoftBreak"},{"t":"Str","c":"chosen."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"randomly"},{"t":"Space"},{"t":"Str","c":"split"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"SoftBreak"},{"t":"Str","c":"contained"},{"t":"Space"},{"t":"Str","c":"95%"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"contain"},{"t":"Space"},{"t":"Str","c":"5%"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"data."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"feature"},{"t":"Space"},{"t":"Str","c":"selection"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"LASSO"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed,"},{"t":"SoftBreak"},{"t":"Str","c":"reducing"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"descriptors"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"approximately"},{"t":"Space"},{"t":"Str","c":"100."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"SoftBreak"},{"t":"Str","c":"repeated"},{"t":"Space"},{"t":"Str","c":"five"},{"t":"Space"},{"t":"Str","c":"times."},{"t":"Space"},{"t":"Str","c":"Based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"five"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"data,"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"predictive"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"trained"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"tested"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"data."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"step"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"repeated"},{"t":"Space"},{"t":"Str","c":"10"},{"t":"Space"},{"t":"Str","c":"times."},{"t":"Space"},{"t":"Str","c":"Furthermore,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"y-randomisation"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed."},{"t":"Space"},{"t":"Str","c":"During"},{"t":"SoftBreak"},{"t":"Str","c":"y-randomisation,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"(y-variable)"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"randomly"},{"t":"Space"},{"t":"Str","c":"permuted."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"theory"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"after"},{"t":"Space"},{"t":"Str","c":"randomisation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"should"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"SoftBreak"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"able"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"correlate"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"properties"},{"t":"Space"},{"t":"Str","c":"(descriptor"},{"t":"Space"},{"t":"Str","c":"values)"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substances."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"should"},{"t":"Space"},{"t":"Str","c":"therefore"},{"t":"SoftBreak"},{"t":"Str","c":"indicate"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"change"},{"t":"Space"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"about"},{"t":"Space"},{"t":"Str","c":"50%."},{"t":"Space"},{"t":"Str","c":"If"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"SoftBreak"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"true,"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"concluded"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"correlation"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"actual"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"SoftBreak"},{"t":"Str","c":"properties"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"real"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"chance"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"Rücker2007","citationHash":0}],[{"t":"Str","c":"@Rücker2007"}]]},{"t":"Str","c":")."}]},{"t":"Para","c":[{"t":"Image","c":[["",[],[["width","6.26875in"],["height","5.486111111111111in"]]],[],["media/image1.png",""]]}]},{"t":"Para","c":[{"t":"Str","c":"Figure"},{"t":"Space"},{"t":"Str","c":"1:"},{"t":"Space"},{"t":"Str","c":"Flowchart"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"generation"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"SoftBreak"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"R-project"}]},{"t":"Header","c":[4,["applicability-domain-1",[],[]],[{"t":"Str","c":"Applicability"},{"t":"Space"},{"t":"Str","c":"domain"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"AD"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"evaluated"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"Jaccard"},{"t":"Space"},{"t":"Str","c":"distance."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"Jaccard"},{"t":"Space"},{"t":"Str","c":"distance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"0"}]]},{"t":"Space"},{"t":"Str","c":"indicates"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"similar,"},{"t":"Space"},{"t":"Str","c":"whereas"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"value"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"1"}]]},{"t":"Space"},{"t":"Str","c":"shows"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"SoftBreak"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"different."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"Jaccard"},{"t":"Space"},{"t":"Str","c":"distance"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"below"},{"t":"Space"},{"t":"Str","c":"0.2"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"relative"},{"t":"SoftBreak"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset."},{"t":"Space"},{"t":"Str","c":"Therefore,"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"within"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"AD"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"SoftBreak"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"dataset."}]},{"t":"Header","c":[4,["y-randomisation",[],[]],[{"t":"Str","c":"y-randomisation"}]]},{"t":"Para","c":[{"t":"Str","c":"After"},{"t":"Space"},{"t":"Str","c":"y-randomisation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"CCR"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"around"},{"t":"SoftBreak"},{"t":"Str","c":"50%,"},{"t":"Space"},{"t":"Str","c":"indicating"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"chance"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"distribution"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"results."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"shows,"},{"t":"SoftBreak"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"actually"},{"t":"Space"},{"t":"Str","c":"related"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"predictors"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"SoftBreak"},{"t":"Str","c":"chance."}]},{"t":"Header","c":[3,["deep-learning-in-tensorflow",[],[]],[{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"TensorFlow"}]]},{"t":"Para","c":[{"t":"Str","c":"Alternatively,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"established"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"Python-based"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"SoftBreak"},{"t":"Str","c":"program"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://www.tensorflow.org/"}],["https://www.tensorflow.org/",""]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"high-level"},{"t":"Space"},{"t":"Str","c":"API"},{"t":"Space"},{"t":"Str","c":"Keras"},{"t":"SoftBreak"},{"t":"Str","c":"("},{"t":"Link","c":[["",["uri"],[]],[{"t":"Str","c":"https://www.tensorflow.org/guide/keras"}],["https://www.tensorflow.org/guide/keras",""]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"build"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"models."}]},{"t":"Para","c":[{"t":"Str","c":"Data"},{"t":"Space"},{"t":"Str","c":"pre-processing"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"done"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"rank"},{"t":"Space"},{"t":"Str","c":"transformation"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"QuantileTransformer"}]}]]},{"t":"Space"},{"t":"Str","c":"procedure."},{"t":"Space"},{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"sequential"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"been"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"SoftBreak"},{"t":"Str","c":"Four"},{"t":"Space"},{"t":"Str","c":"layers"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"been"},{"t":"Space"},{"t":"Str","c":"used:"},{"t":"Space"},{"t":"Str","c":"input"},{"t":"Space"},{"t":"Str","c":"layer,"},{"t":"Space"},{"t":"Str","c":"two"},{"t":"Space"},{"t":"Str","c":"hidden"},{"t":"Space"},{"t":"Str","c":"layers"},{"t":"Space"},{"t":"Str","c":"(with"},{"t":"Space"},{"t":"Str","c":"12,"},{"t":"Space"},{"t":"Str","c":"8"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"8"},{"t":"Space"},{"t":"Str","c":"nodes,"},{"t":"Space"},{"t":"Str","c":"respectively)"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"one"},{"t":"Space"},{"t":"Str","c":"output"},{"t":"Space"},{"t":"Str","c":"layer."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"output"},{"t":"Space"},{"t":"Str","c":"layer,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"sigmoidal"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"function"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"layers"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ReLU"},{"t":"SoftBreak"},{"t":"Str","c":"("},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Emph","c":[{"t":"Str","c":"Rectified"},{"t":"Space"},{"t":"Str","c":"Linear"},{"t":"Space"},{"t":"Str","c":"Unit"}]}]]},{"t":"Str","c":")"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"function"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"Additionally,"},{"t":"SoftBreak"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"L"},{"t":"Superscript","c":[{"t":"Str","c":"2"}]},{"t":"Str","c":"-penalty"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"0.001"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"input"},{"t":"Space"},{"t":"Str","c":"layer."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ADAM"},{"t":"Space"},{"t":"Str","c":"algorithm"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"minimise"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-entropy"},{"t":"SoftBreak"},{"t":"Str","c":"loss"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"default"},{"t":"Space"},{"t":"Str","c":"parameters"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"Keras."},{"t":"Space"},{"t":"Str","c":"Training"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"performed"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"100"},{"t":"Space"},{"t":"Str","c":"epochs"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"batch"},{"t":"Space"},{"t":"Str","c":"size"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"64."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"implemented"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"SoftBreak"},{"t":"Str","c":"Python"},{"t":"Space"},{"t":"Str","c":"3.6"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"Keras."},{"t":"Space"},{"t":"Str","c":"For"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"6-fold"},{"t":"SoftBreak"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"used."},{"t":"Space"},{"t":"Str","c":"Accuracy"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"estimated"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"ROC-AUC"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"SoftBreak"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix."}]},{"t":"Header","c":[2,["validation",[],[]],[{"t":"Str","c":"Validation"}]]},{"t":"Header","c":[1,["results",[],[]],[{"t":"Str","c":"Results"}]]},{"t":"Header","c":[2,["lazar-1",[],[]],[{"t":"Code","c":[["",[],[]],"lazar"]}]]},{"t":"Header","c":[2,["random-forest",[],[]],[{"t":"Str","c":"Random"},{"t":"Space"},{"t":"Str","c":"Forest"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"64%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"66%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"63%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"1."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"1:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2274"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1163"}]}],[{"t":"Plain","c":[{"t":"Str","c":"3437"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"1736"}]}],[{"t":"Plain","c":[{"t":"Str","c":"2907"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4643"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"4010"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4070"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Header","c":[2,["support-vector-machines",[],[]],[{"t":"Str","c":"Support"},{"t":"Space"},{"t":"Str","c":"Vector"},{"t":"Space"},{"t":"Str","c":"Machines"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"62%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"65%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"60%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"SoftBreak"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"2."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"2:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2057"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1107"}]}],[{"t":"Plain","c":[{"t":"Str","c":"3164"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"1953"}]}],[{"t":"Plain","c":[{"t":"Str","c":"2963"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4916"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"4010"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4070"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Header","c":[2,["deep-learning-r-project",[],[]],[{"t":"Str","c":"Deep"},{"t":"Space"},{"t":"Str","c":"Learning"},{"t":"Space"},{"t":"Str","c":"(R-project)"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"R"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"59%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"89%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"30%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"confusion"},{"t":"SoftBreak"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"normalised"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"SoftBreak"},{"t":"Str","c":"3."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"3:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(R-project)"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"3575"}]}],[{"t":"Plain","c":[{"t":"Str","c":"435"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4010"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2853"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1217"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4070"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"6428"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1652"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Header","c":[2,["dl-model-tensorflow",[],[]],[{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"}]]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"Space"},{"t":"Str","c":"has"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"SoftBreak"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"68%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"70%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"46%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"normalised"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"8080"},{"t":"Space"},{"t":"Str","c":"instances,"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"provided"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"4."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"4:"},{"t":"Space"},{"t":"Str","c":"Confusion"},{"t":"Space"},{"t":"Str","c":"matrix"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignDefault"},{"t":"AlignDefault"},{"t":"AlignDefault"}],[0,0,0,0,0],[[],[{"t":"Plain","c":[{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[],[]],[[[{"t":"Plain","c":[{"t":"Str","c":"Measured"},{"t":"Space"},{"t":"Str","c":"genotoxicity"}]}],[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PP"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"PN"}]}]}]}],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TP"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"2851"}]}],[{"t":"Plain","c":[{"t":"Str","c":"1227"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4078"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"TN"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"1825"}]}],[{"t":"Plain","c":[{"t":"Str","c":"2177"}]}],[{"t":"Plain","c":[{"t":"Str","c":"4002"}]}]],[[],[{"t":"Plain","c":[{"t":"Strong","c":[{"t":"Emph","c":[{"t":"Str","c":"Total"}]}]}]}],[{"t":"Plain","c":[{"t":"Str","c":"4676"}]}],[{"t":"Plain","c":[{"t":"Str","c":"3404"}]}],[{"t":"Plain","c":[{"t":"Str","c":"8080"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"PP:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"positive;"},{"t":"Space"},{"t":"Str","c":"PN:"},{"t":"Space"},{"t":"Str","c":"Predicted"},{"t":"Space"},{"t":"Str","c":"negative,"},{"t":"Space"},{"t":"Str","c":"TP:"},{"t":"Space"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"positive,"},{"t":"Space"},{"t":"Str","c":"TN:"},{"t":"SoftBreak"},{"t":"Str","c":"True"},{"t":"Space"},{"t":"Str","c":"negative"}]},{"t":"Para","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"ROC"},{"t":"Space"},{"t":"Str","c":"curves"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"6-fold"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"shown"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"Figure"},{"t":"Space"},{"t":"Str","c":"7."}]},{"t":"Para","c":[{"t":"Image","c":[["",[],[["width","3.825in"],["height","2.7327045056867894in"]]],[],["media/image7.png",""]]}]},{"t":"Para","c":[{"t":"Str","c":"Figure"},{"t":"Space"},{"t":"Str","c":"7:"},{"t":"Space"},{"t":"Str","c":"Six-fold"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"show"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"SoftBreak"},{"t":"Str","c":"average"},{"t":"Space"},{"t":"Str","c":"area"},{"t":"Space"},{"t":"Str","c":"under"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ROC-curve"},{"t":"Space"},{"t":"Str","c":"(ROC-AUC;"},{"t":"Space"},{"t":"Str","c":"measure"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"accuracy)"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"68%."}]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"summary,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"methods"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"presented"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"following"},{"t":"Space"},{"t":"Str","c":"table."}]},{"t":"Para","c":[{"t":"Str","c":"Table"},{"t":"Space"},{"t":"Str","c":"5"},{"t":"Space"},{"t":"Str","c":"Results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"after"},{"t":"SoftBreak"},{"t":"Str","c":"y-randomisation"}]},{"t":"Table","c":[[],[{"t":"AlignDefault"},{"t":"AlignLeft"},{"t":"AlignLeft"},{"t":"AlignLeft"},{"t":"AlignLeft"}],[0.3287671232876712,0.1506849315068493,0.1095890410958904,0.1917808219178082,0.1917808219178082],[[],[{"t":"Plain","c":[{"t":"Str","c":"Accuracy"}]}],[{"t":"Plain","c":[{"t":"Str","c":"CCR"}]}],[{"t":"Plain","c":[{"t":"Str","c":"Sensitivity"}]}],[{"t":"Plain","c":[{"t":"Str","c":"Specificity"}]}]],[[[{"t":"Plain","c":[{"t":"Str","c":"RF"},{"t":"Space"},{"t":"Str","c":"model"}]}],[{"t":"Plain","c":[{"t":"Str","c":"64.1%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"64.4%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"66.2%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.6%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"SVM"},{"t":"Space"},{"t":"Str","c":"model"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.1%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.6%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"65.0%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"60.3%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"LineBreak"},{"t":"Str","c":"(R-project)"}]}],[{"t":"Plain","c":[{"t":"Str","c":"59.3%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"59.5%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"89.2%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"29.9%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"}]}],[{"t":"Plain","c":[{"t":"Str","c":"68%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"62.2%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"69.9%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"45.6%"}]}]],[[{"t":"Plain","c":[{"t":"Str","c":"y-randomisation"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.5%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.4%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.3%"}]}],[{"t":"Plain","c":[{"t":"Str","c":"50.6%"}]}]]]]},{"t":"Para","c":[{"t":"Str","c":"CCR"},{"t":"Space"},{"t":"Str","c":"(correct"},{"t":"Space"},{"t":"Str","c":"classification"},{"t":"Space"},{"t":"Str","c":"rate)"}]},{"t":"Header","c":[1,["discussion",[],[]],[{"t":"Str","c":"Discussion"}]]},{"t":"Para","c":[{"t":"Str","c":"General"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"performance"}]},{"t":"Para","c":[{"t":"Str","c":"Based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"models,"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":","},{"t":"Space"},{"t":"Str","c":"RF,"},{"t":"SoftBreak"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"(R-project)"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"(TensorFlow)"},{"t":"Space"},{"t":"Str","c":"it"},{"t":"Space"},{"t":"Str","c":"can"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"state"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"prediction"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"optimal"},{"t":"Space"},{"t":"Str","c":"due"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"reasons."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"measured"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"cross-validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"four"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"(RF,"},{"t":"SoftBreak"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"(R-project"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"TensorFlow))"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"partly"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"CCR"},{"t":"Space"},{"t":"Str","c":"values"},{"t":"SoftBreak"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"59.3"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"68%,"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"R-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"TensorFlow-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"showing"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"worst"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"best"},{"t":"SoftBreak"},{"t":"Str","c":"performance,"},{"t":"Space"},{"t":"Str","c":"respectively."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"validation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"R-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"SoftBreak"},{"t":"Str","c":"revealed"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"high"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"(89.2%)"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"unacceptably"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"SoftBreak"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"29.9%"},{"t":"Space"},{"t":"Str","c":"indicating"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"high"},{"t":"Space"},{"t":"Str","c":"number"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"false"},{"t":"Space"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"estimates."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"SoftBreak"},{"t":"Str","c":"TensorFlow-generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model,"},{"t":"Space"},{"t":"Str","c":"however,"},{"t":"Space"},{"t":"Str","c":"showed"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"acceptable"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"SoftBreak"},{"t":"Str","c":"optimal"},{"t":"Space"},{"t":"Str","c":"accuracy"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"68%,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"sensitivity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"69.9%"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"45.6%."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"specificity"},{"t":"Space"},{"t":"Str","c":"indicates"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"both"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"tends"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"SoftBreak"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"too"},{"t":"Space"},{"t":"Str","c":"many"},{"t":"Space"},{"t":"Str","c":"instances"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"(genotoxic),"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"therefore"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"high"},{"t":"Space"},{"t":"Str","c":"false"},{"t":"Space"},{"t":"Str","c":"positive"},{"t":"Space"},{"t":"Str","c":"rate."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"allows"},{"t":"Space"},{"t":"Str","c":"at"},{"t":"Space"},{"t":"Str","c":"least"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"TensorFlow"},{"t":"SoftBreak"},{"t":"Str","c":"generated"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"make"},{"t":"Space"},{"t":"Str","c":"group"},{"t":"Space"},{"t":"Str","c":"statements,"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"confidence"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"estimations"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"single"},{"t":"Space"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"appears"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"insufficiently"},{"t":"Space"},{"t":"Str","c":"low."}]},{"t":"Para","c":[{"t":"Str","c":"Several"},{"t":"Space"},{"t":"Str","c":"factors"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"likely"},{"t":"Space"},{"t":"Str","c":"contributed"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"moderate"},{"t":"SoftBreak"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"methods"},{"t":"Space"},{"t":"Str","c":"as"},{"t":"Space"},{"t":"Str","c":"shown"},{"t":"Space"},{"t":"Str","c":"during"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"cross-validation:"}]},{"t":"OrderedList","c":[[1,{"t":"Decimal"},{"t":"Period"}],[[{"t":"Plain","c":[{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"AMES"},{"t":"SoftBreak"},{"t":"Str","c":"tests"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"genotoxicity"},{"t":"Space"},{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"ICH"},{"t":"Space"},{"t":"Str","c":"2011"}],["#_ENREF_63",""]]},{"t":"Str","c":"(),"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"vitro"}]},{"t":"Space"},{"t":"Str","c":"test"},{"t":"SoftBreak"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"strains"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"bacteria"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"Salmonella"},{"t":"Space"},{"t":"Str","c":"typhimurium"}]},{"t":"Str","c":"."},{"t":"Space"},{"t":"Str","c":"In"},{"t":"SoftBreak"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"test,"},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"evaluated"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"prior"},{"t":"SoftBreak"},{"t":"Str","c":"metabolic"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"test"},{"t":"Space"},{"t":"Str","c":"substance."},{"t":"Space"},{"t":"Str","c":"Metabolic"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"SoftBreak"},{"t":"Str","c":"could"},{"t":"Space"},{"t":"Str","c":"result"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"formation"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"metabolites"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"SoftBreak"},{"t":"Str","c":"non-genotoxic"},{"t":"Space"},{"t":"Str","c":"parent"},{"t":"Space"},{"t":"Str","c":"compounds."},{"t":"Space"},{"t":"Str","c":"However,"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"Space"},{"t":"Str","c":"distinction"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"made"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"between"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"needed"},{"t":"Space"},{"t":"Str","c":"metabolic"},{"t":"SoftBreak"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"before"},{"t":"Space"},{"t":"Str","c":"being"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"those"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"SoftBreak"},{"t":"Str","c":"without"},{"t":"Space"},{"t":"Str","c":"metabolic"},{"t":"Space"},{"t":"Str","c":"activation."},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"able"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"handle"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"SoftBreak"},{"t":"Quoted","c":[{"t":"SingleQuote"},[{"t":"Str","c":"inaccuracy"}]]},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"well"},{"t":"Space"},{"t":"Str","c":"due"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"way"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"algorithm"},{"t":"Space"},{"t":"Str","c":"works:"},{"t":"Space"},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Space"},{"t":"Str","c":"predicts"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"neighbours"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"comparable"},{"t":"Space"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"features,"},{"t":"SoftBreak"},{"t":"Str","c":"considering"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"neighbours."},{"t":"Space"},{"t":"Str","c":"Based"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"similarity,"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"probability"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"no"},{"t":"SoftBreak"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"calculated"},{"t":"Space"},{"t":"Str","c":"independently"},{"t":"Space"},{"t":"Str","c":"from"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"(meaning"},{"t":"SoftBreak"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"sum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"probabilities"},{"t":"Space"},{"t":"Str","c":"does"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"necessarily"},{"t":"Space"},{"t":"Str","c":"adds"},{"t":"Space"},{"t":"Str","c":"up"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"100%)."},{"t":"SoftBreak"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"class"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"higher"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"is"},{"t":"Space"},{"t":"Str","c":"then"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"overall"},{"t":"Space"},{"t":"Str","c":"outcome"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"substance."}]}]]]},{"t":"BlockQuote","c":[{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"contrast,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"other"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"need"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"trained"},{"t":"Space"},{"t":"Str","c":"first"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"recognise"},{"t":"SoftBreak"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"structural"},{"t":"Space"},{"t":"Str","c":"features"},{"t":"Space"},{"t":"Str","c":"that"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"responsible"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"genotoxicity."},{"t":"SoftBreak"},{"t":"Str","c":"Therefore,"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"mixture"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"being"},{"t":"Space"},{"t":"Str","c":"mutagenic"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"without"},{"t":"SoftBreak"},{"t":"Str","c":"metabolic"},{"t":"Space"},{"t":"Str","c":"activation"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"may"},{"t":"Space"},{"t":"Str","c":"have"},{"t":"Space"},{"t":"Str","c":"adversely"},{"t":"SoftBreak"},{"t":"Str","c":"affected"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"ability"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"separate"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"two"},{"t":"Space"},{"t":"Str","c":"distinct"},{"t":"Space"},{"t":"Str","c":"classes"},{"t":"SoftBreak"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"thus"},{"t":"Space"},{"t":"Str","c":"explains"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"relatively"},{"t":"Space"},{"t":"Str","c":"low"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"models."}]}]},{"t":"OrderedList","c":[[2,{"t":"Decimal"},{"t":"Period"}],[[{"t":"Plain","c":[{"t":"Str","c":"Machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"algorithms"},{"t":"Space"},{"t":"Str","c":"try"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"find"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"optimized"},{"t":"Space"},{"t":"Str","c":"solution"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"SoftBreak"},{"t":"Str","c":"high-dimensional"},{"t":"Space"},{"t":"Str","c":"(one"},{"t":"Space"},{"t":"Str","c":"dimension"},{"t":"Space"},{"t":"Str","c":"per"},{"t":"Space"},{"t":"Str","c":"each"},{"t":"Space"},{"t":"Str","c":"predictor)"},{"t":"Space"},{"t":"Str","c":"space."},{"t":"Space"},{"t":"Str","c":"Sometimes"},{"t":"SoftBreak"},{"t":"Str","c":"these"},{"t":"Space"},{"t":"Str","c":"methods"},{"t":"Space"},{"t":"Str","c":"do"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"find"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"global"},{"t":"Space"},{"t":"Str","c":"optimum"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"estimates"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"SoftBreak"},{"t":"Str","c":"local"},{"t":"Space"},{"t":"Str","c":"(not"},{"t":"Space"},{"t":"Str","c":"optimal)"},{"t":"Space"},{"t":"Str","c":"solutions."},{"t":"Space"},{"t":"Str","c":"Strategies"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"find"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"global"},{"t":"SoftBreak"},{"t":"Str","c":"solutions"},{"t":"Space"},{"t":"Str","c":"are"},{"t":"Space"},{"t":"Str","c":"systematic"},{"t":"Space"},{"t":"Str","c":"variation"},{"t":"Space"},{"t":"Str","c":"(grid"},{"t":"Space"},{"t":"Str","c":"search)"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"hyperparameters"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"methods,"},{"t":"Space"},{"t":"Str","c":"which"},{"t":"Space"},{"t":"Str","c":"may"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"very"},{"t":"Space"},{"t":"Str","c":"time"},{"t":"Space"},{"t":"Str","c":"consuming"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"SoftBreak"},{"t":"Str","c":"particular"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"large"},{"t":"Space"},{"t":"Str","c":"datasets."}]}]]]},{"t":"Header","c":[1,["conclusions",[],[]],[{"t":"Str","c":"Conclusions"}]]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"this"},{"t":"Space"},{"t":"Str","c":"study,"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"attempt"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"made"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"five"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"techniques"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":","},{"t":"Space"},{"t":"Str","c":"RF,"},{"t":"Space"},{"t":"Str","c":"SVM,"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"SoftBreak"},{"t":"Str","c":"(R-project"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"TensorFlow)."},{"t":"Space"},{"t":"Str","c":"The"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"all"},{"t":"Space"},{"t":"Str","c":"models"},{"t":"Space"},{"t":"Str","c":"fitted"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"Space"},{"t":"Str","c":"partly"},{"t":"SoftBreak"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"findings"},{"t":"Space"},{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"literature,"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"best"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"Space"},{"t":"Str","c":"obtained"},{"t":"Space"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"TensorFlow"},{"t":"Space"},{"t":"Str","c":"DL"},{"t":"Space"},{"t":"Str","c":"model."},{"t":"Space"},{"t":"Str","c":"Therefore,"},{"t":"Space"},{"t":"Str","c":"modelling"},{"t":"Space"},{"t":"Str","c":"allows"},{"t":"Space"},{"t":"Str","c":"statements"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"relative"},{"t":"Space"},{"t":"Str","c":"risks"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"genotoxicity"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"different"},{"t":"Space"},{"t":"Str","c":"PA"},{"t":"Space"},{"t":"Str","c":"groups."},{"t":"Space"},{"t":"Str","c":"Individual"},{"t":"SoftBreak"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"selective"},{"t":"Space"},{"t":"Str","c":"PAs"},{"t":"Space"},{"t":"Str","c":"appear,"},{"t":"Space"},{"t":"Str","c":"however,"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"reliable"},{"t":"Space"},{"t":"Str","c":"on"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"SoftBreak"},{"t":"Str","c":"current"},{"t":"Space"},{"t":"Str","c":"basis"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset."}]},{"t":"Para","c":[{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"study"},{"t":"Space"},{"t":"Str","c":"emphasises"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"importance"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"critical"},{"t":"Space"},{"t":"Str","c":"assessment"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"SoftBreak"},{"t":"Str","c":"predictions"},{"t":"Space"},{"t":"Str","c":"by"},{"t":"Space"},{"t":"Str","c":"QSAR"},{"t":"Space"},{"t":"Str","c":"models."},{"t":"Space"},{"t":"Str","c":"This"},{"t":"Space"},{"t":"Str","c":"includes"},{"t":"Space"},{"t":"Str","c":"not"},{"t":"Space"},{"t":"Str","c":"only"},{"t":"Space"},{"t":"Str","c":"extensive"},{"t":"Space"},{"t":"Str","c":"literature"},{"t":"SoftBreak"},{"t":"Str","c":"research"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"assess"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"plausibility"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"predictions,"},{"t":"Space"},{"t":"Str","c":"but"},{"t":"Space"},{"t":"Str","c":"also"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"good"},{"t":"SoftBreak"},{"t":"Str","c":"knowledge"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"metabolism"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"test"},{"t":"Space"},{"t":"Str","c":"substances"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"understanding"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"SoftBreak"},{"t":"Str","c":"possible"},{"t":"Space"},{"t":"Str","c":"mechanisms"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"toxicity."}]},{"t":"Para","c":[{"t":"Str","c":"In"},{"t":"Space"},{"t":"Str","c":"further"},{"t":"Space"},{"t":"Str","c":"studies,"},{"t":"Space"},{"t":"Str","c":"additional"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"techniques"},{"t":"Space"},{"t":"Str","c":"or"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"modified"},{"t":"SoftBreak"},{"t":"Str","c":"(extended)"},{"t":"Space"},{"t":"Str","c":"training"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"Space"},{"t":"Str","c":"should"},{"t":"Space"},{"t":"Str","c":"be"},{"t":"Space"},{"t":"Str","c":"used"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"an"},{"t":"Space"},{"t":"Str","c":"additional"},{"t":"Space"},{"t":"Str","c":"attempt"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"SoftBreak"},{"t":"Str","c":"predict"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"genotoxic"},{"t":"Space"},{"t":"Str","c":"potential"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"PAs."}]},{"t":"Header","c":[1,["references",[],[]],[{"t":"Str","c":"References"}]]}],"pandoc-api-version":[1,17,5,4],"meta":{"bibliography":{"t":"MetaInlines","c":[{"t":"Str","c":"bibliography.bib"}]},"documentclass":{"t":"MetaInlines","c":[{"t":"Str","c":"scrartcl"}]},"keywords":{"t":"MetaInlines","c":[{"t":"Str","c":"mutagenicity,"},{"t":"Space"},{"t":"Str","c":"(Q)SAR,"},{"t":"Space"},{"t":"Str","c":"lazar,"},{"t":"Space"},{"t":"Str","c":"random"},{"t":"Space"},{"t":"Str","c":"forest,"},{"t":"Space"},{"t":"Str","c":"support"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"machine,"},{"t":"Space"},{"t":"Str","c":"deep"},{"t":"Space"},{"t":"Str","c":"learning"}]},"author":{"t":"MetaList","c":[{"t":"MetaMap","c":{"email":{"t":"MetaInlines","c":[{"t":"Str","c":"helma@in-silico.ch"}]},"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Christoph Helma"}]},"correspondence":{"t":"MetaInlines","c":[{"t":"Str","c":"yes"}]},"id":{"t":"MetaString","c":"Christoph Helma"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"1"}]}}},{"t":"MetaMap","c":{"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Verena Schöning"}]},"id":{"t":"MetaString","c":"Verena Schöning"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"2"}]}}},{"t":"MetaMap","c":{"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Philipp Boss"}]},"id":{"t":"MetaString","c":"Philipp Boss"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"2"}]}}},{"t":"MetaMap","c":{"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Jürgen Drewe"}]},"id":{"t":"MetaString","c":"Jürgen Drewe"},"institute":{"t":"MetaList","c":[{"t":"MetaString","c":"2"}]}}}]},"abstract":{"t":"MetaBlocks","c":[{"t":"Para","c":[{"t":"Str","c":"k-nearest"},{"t":"Space"},{"t":"Str","c":"neighbor"},{"t":"Space"},{"t":"Str","c":"("},{"t":"Code","c":[["",[],[]],"lazar"]},{"t":"Str","c":"),"},{"t":"Space"},{"t":"Str","c":"random"},{"t":"Space"},{"t":"Str","c":"forest,"},{"t":"Space"},{"t":"Str","c":"support"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"machine"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"deep"},{"t":"SoftBreak"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"algorithms"},{"t":"Space"},{"t":"Str","c":"were"},{"t":"Space"},{"t":"Str","c":"applied"},{"t":"Space"},{"t":"Str","c":"to"},{"t":"Space"},{"t":"Str","c":"a"},{"t":"Space"},{"t":"Str","c":"new"},{"t":"Space"},{"t":"Emph","c":[{"t":"Str","c":"Salmonella"}]},{"t":"Space"},{"t":"Str","c":"mutagenicity"},{"t":"Space"},{"t":"Str","c":"dataset"},{"t":"SoftBreak"},{"t":"Str","c":"with"},{"t":"Space"},{"t":"Str","c":"8281"},{"t":"Space"},{"t":"Str","c":"unique"},{"t":"Space"},{"t":"Str","c":"chemical"},{"t":"Space"},{"t":"Str","c":"structures."},{"t":"Space"},{"t":"Str","c":"Algorithm"},{"t":"Space"},{"t":"Str","c":"performance"},{"t":"Space"},{"t":"Str","c":"was"},{"t":"Space"},{"t":"Str","c":"evaluated"},{"t":"SoftBreak"},{"t":"Str","c":"using"},{"t":"Space"},{"t":"Str","c":"5-fold"},{"t":"Space"},{"t":"Str","c":"crossvalidation."},{"t":"SoftBreak"},{"t":"Str","c":"TODO"},{"t":"SoftBreak"},{"t":"Str","c":"-"},{"t":"Space"},{"t":"Str","c":"results"},{"t":"SoftBreak"},{"t":"Str","c":"-"},{"t":"Space"},{"t":"Str","c":"conclusion"}]}]},"title":{"t":"MetaInlines","c":[{"t":"Str","c":"A"},{"t":"Space"},{"t":"Str","c":"comparison"},{"t":"Space"},{"t":"Str","c":"of"},{"t":"Space"},{"t":"Str","c":"random"},{"t":"Space"},{"t":"Str","c":"forest,"},{"t":"Space"},{"t":"Str","c":"support"},{"t":"Space"},{"t":"Str","c":"vector"},{"t":"Space"},{"t":"Str","c":"machine,"},{"t":"Space"},{"t":"Str","c":"deep"},{"t":"Space"},{"t":"Str","c":"learning"},{"t":"Space"},{"t":"Str","c":"and"},{"t":"Space"},{"t":"Str","c":"lazar"},{"t":"Space"},{"t":"Str","c":"algorithms"},{"t":"Space"},{"t":"Str","c":"for"},{"t":"Space"},{"t":"Str","c":"predicting"},{"t":"Space"},{"t":"Str","c":"mutagenicity"}]},"cito_cites":{"t":"MetaMap","c":{"citation":{"t":"MetaList","c":[{"t":"MetaString","c":"Kazius2005"},{"t":"MetaString","c":"Hansen2009"},{"t":"MetaString","c":"Yap2011"},{"t":"MetaString","c":"Bender2004"},{"t":"MetaString","c":"OBoyle2011a"},{"t":"MetaString","c":"Yap2011"},{"t":"MetaString","c":"Rücker2007"}]}}},"institute":{"t":"MetaList","c":[{"t":"MetaMap","c":{"address":{"t":"MetaInlines","c":[{"t":"Str","c":"Rastatterstrasse"},{"t":"Space"},{"t":"Str","c":"41,"},{"t":"Space"},{"t":"Str","c":"4057"},{"t":"Space"},{"t":"Str","c":"Basel,"},{"t":"Space"},{"t":"Str","c":"Switzerland"}]},"name":{"t":"MetaInlines","c":[{"t":"Str","c":"in"},{"t":"Space"},{"t":"Str","c":"silico"},{"t":"Space"},{"t":"Str","c":"toxicology"},{"t":"Space"},{"t":"Str","c":"gmbh"}]},"id":{"t":"MetaString","c":"ist"}}},{"t":"MetaMap","c":{"address":{"t":"MetaInlines","c":[{"t":"Str","c":"Seeblickstrasse"},{"t":"Space"},{"t":"Str","c":"4,"},{"t":"Space"},{"t":"Str","c":"8590"},{"t":"Space"},{"t":"Str","c":"Romanshorn,"},{"t":"Space"},{"t":"Str","c":"Switzerland"}]},"name":{"t":"MetaInlines","c":[{"t":"Str","c":"Zeller"},{"t":"Space"},{"t":"Str","c":"AG"}]},"id":{"t":"MetaString","c":"zeller"}}}]}}}
diff --git a/paper/outfile.epub b/paper/outfile.epub
new file mode 100644
index 0000000..b64e7d8
Binary files /dev/null and b/paper/outfile.epub differ
diff --git a/paper/outfile.html b/paper/outfile.html
new file mode 100644
index 0000000..d2a84a9
--- /dev/null
+++ b/paper/outfile.html
@@ -0,0 +1,822 @@
+
+
+
+
+
+
+
+
+
+ A comparison of random forest, support vector machine, deep learning and lazar algorithms for predicting mutagenicity
+
+
+
+
+
+
+
+
+
+
A comparison of random forest, support vector machine, deep learning and lazar algorithms for predicting mutagenicity
+
+
+
+ Christoph Helma✉ 1,
+
+ Verena Schöning2,
+
+ Philipp Boss2,
+
+ Jürgen Drewe2
k-nearest neighbor (lazar), random forest, support vector machine and deep learning algorithms were applied to a new Salmonella mutagenicity dataset with 8281 unique chemical structures. Algorithm performance was evaluated using 5-fold crossvalidation. TODO - results - conclusion
+
+
+
+
+
Introduction
+
TODO: algo history
+
TODO: dataset history
+
TODO: open problems
+
Materials and Methods
+
Mutagenicity data
+
For all methods, the same training dataset was used. The training dataset was compiled from the following sources:
Mutagenicity classifications from Kazius and Hansen datasets were used without further processing. To achieve consistency between these datasets, EFSA compounds were classified as mutagenic, if at least one positive result was found for TA98 or T100 Salmonella strains.
+
Dataset merges were based on unique SMILES (Simplified Molecular Input Line Entry Specification) strings of the compound structures. Duplicated experimental data with the same outcome was merged into a single value, because it is likely that it originated from the same experiment. Contradictory results were kept as multiple measurements in the database. The combined training dataset contains 8281 unique structures.
+
Source code for all data download, extraction and merge operations is publicly available from the git repository https://git.in-silico.ch/pyrrolizidine under a GPL3 License.
+
TODO: check/fix git repo
+
For the Random Forest (RF), Support Vector Machines (SVM), and Deep Learning (DL) models, molecular descriptors were calculated with the PaDEL-Descriptors program (http://www.yapcwsoft.com version 2.21, Yap (2011)).
+
TODO: sentence ??
+
From these descriptors were chosen, which were actually used for the generation of the DL model.
+
Algorithms
+
lazar
+
lazar (lazy structure activity relationships) is a modular framework for read-across model development and validation. It follows the following basic workflow: For a given chemical structure lazar:
+
+
searches in a database for similar structures (neighbours) with experimental data,
+
builds a local QSAR model with these neighbours and
+
uses this model to predict the unknown activity of the query compound.
+
+
This procedure resembles an automated version of read across predictions in toxicology, in machine learning terms it would be classified as a k-nearest-neighbour algorithm.
+
Apart from this basic workflow, lazar is completely modular and allows the researcher to use any algorithm for similarity searches and local QSAR (Quantitative structure–activity relationship) modelling. Algorithms used within this study are described in the following sections.
+
Neighbour identification
+
Similarity calculations were based on MolPrint2D fingerprints (Bender et al. (2004)) from the OpenBabel cheminformatics library (O’Boyle et al. (2011)). The MolPrint2D fingerprint uses atom environments as molecular representation, which resembles basically the chemical concept of functional groups. For each atom in a molecule, it represents the chemical environment using the atom types of connected atoms.
+
MolPrint2D fingerprints are generated dynamically from chemical structures and do not rely on predefined lists of fragments (such as OpenBabel FP3, FP4 or MACCs fingerprints or lists of toxicophores/toxicophobes). This has the advantage that they may capture substructures of toxicological relevance that are not included in other fingerprints.
+
From MolPrint2D fingerprints a feature vector with all atom environments of a compound can be constructed that can be used to calculate chemical similarities.
+
The chemical similarity between two compounds a and b is expressed as the proportion between atom environments common in both structures A ∩ B and the total number of atom environments A U B (Jaccard/Tanimoto index).
+
\[sim = \frac{\left| A\ \cap B \right|}{\left| A\ \cup B \right|}\]
+
Threshold selection is a trade-off between prediction accuracy (high threshold) and the number of predictable compounds (low threshold). As it is in many practical cases desirable to make predictions even in the absence of closely related neighbours, we follow a tiered approach:
+
+
First a similarity threshold of 0.5 is used to collect neighbours, to create a local QSAR model and to make a prediction for the query compound.
+
If any of these steps fails, the procedure is repeated with a similarity threshold of 0.2 and the prediction is flagged with a warning that it might be out of the applicability domain of the training data.
+
Similarity thresholds of 0.5 and 0.2 are the default values chosen > by the software developers and remained unchanged during the > course of these experiments.
+
+
Compounds with the same structure as the query structure are automatically eliminated from neighbours to obtain unbiased predictions in the presence of duplicates.
+
Local QSAR models and predictions
+
Only similar compounds (neighbours) above the threshold are used for local QSAR models. In this investigation, we are using a weighted majority vote from the neighbour’s experimental data for mutagenicity classifications. Probabilities for both classes (mutagenic/non-mutagenic) are calculated according to the following formula and the class with the higher probability is used as prediction outcome.
\(p_{c}\) Probability of class c (e.g. mutagenic or non-mutagenic)
+\(\sum_{}^{}\text{sim}_{n,c}\) Sum of similarities of neighbours with class c
+\(\sum_{}^{}\text{sim}_{n}\) Sum of all neighbours
+
Applicability domain
+
The applicability domain (AD) of lazar models is determined by the structural diversity of the training data. If no similar compounds are found in the training data no predictions will be generated. Warnings are issued if the similarity threshold had to be lowered from 0.5 to 0.2 in order to enable predictions. Predictions without warnings can be considered as close to the applicability domain and predictions with warnings as more distant from the applicability domain. Quantitative applicability domain information can be obtained from the similarities of individual neighbours.
Random Forest, Support Vector Machines, and Deep Learning in R-project
+
In comparison to lazar, three other models (Random Forest (RF), Support Vector Machines (SVM), and Deep Learning (DL)) were evaluated.
+
For the generation of these models, molecular 1D and 2D descriptors of the training dataset were calculated using PaDEL-Descriptors (http://www.yapcwsoft.com version 2.21, Yap (2011)).
+
As the training dataset contained over 8280 instances, it was decided to delete instances with missing values during data pre-processing. Furthermore, substances with equivocal outcome were removed. The final training dataset contained 8080 instances with known mutagenic potential. The RF, SVM, and DL models were generated using the R software (R-project for Statistical Computing, https://www.r-project.org/; version 3.3.1), specific R packages used are identified for each step in the description below. During feature selection, descriptor with near zero variance were removed using ‘NearZeroVar’-function (package ‘caret’). If the percentage of the most common value was more than 90% or when the frequency ratio of the most common value to the second most common value was greater than 95:5 (e.g. 95 instances of the most common value and only 5 or less instances of the second most common value), a descriptor was classified as having a near zero variance. After that, highly correlated descriptors were removed using the ‘findCorrelation’-function (package ‘caret’) with a cut-off of 0.9. This resulted in a training dataset with 516 descriptors. These descriptors were scaled to be in the range between 0 and 1 using the ‘preProcess’-function (package ‘caret’). The scaling routine was saved in order to apply the same scaling on the testing dataset. As these three steps did not consider the outcome, it was decided that they do not need to be included in the cross-validation of the model. To further reduce the number of features, a LASSO (least absolute shrinkage and selection operator) regression was performed using the ‘glmnet’-function (package ‘glmnet’). The reduced dataset was used for the generation of the pre-trained models.
+
For the RF model, the ‘randomForest’-function (package ‘randomForest’) was used. A forest with 1000 trees with maximal terminal nodes of 200 was grown for the prediction.
+
The ‘svm’-function (package ‘e1071’) with a radial basis function kernel was used for the SVM model.
+
The DL model was generated using the ‘h2o.deeplearning’-function (package ‘h2o’). The DL contained four hidden layer with 70, 50, 50, and 10 neurons, respectively. Other hyperparameter were set as follows: l1=1.0E-7, l2=1.0E-11, epsilon = 1.0E-10, rho = 0.8, and quantile_alpha = 0.5. For all other hyperparameter, the default values were used. Weights and biases were in a first step determined with an unsupervised DL model. These values were then used for the actual, supervised DL model.
+
To validate these models, an internal cross-validation approach was chosen. The training dataset was randomly split in training data, which contained 95% of the data, and validation data, which contain 5% of the data. A feature selection with LASSO on the training data was performed, reducing the number of descriptors to approximately 100. This step was repeated five times. Based on each of the five different training data, the predictive models were trained and the performance tested with the validation data. This step was repeated 10 times. Furthermore, a y-randomisation using the RF model was performed. During y-randomisation, the outcome (y-variable) is randomly permuted. The theory is that after randomisation of the outcome, the model should not be able to correlate the outcome to the properties (descriptor values) of the substances. The performance of the model should therefore indicate a by change prediction with an accuracy of about 50%. If this is true, it can be concluded that correlation between actual outcome and properties of the substances is real and not by chance (Rücker, Rücker, and Meringer (2007)).
+
+
Figure 1: Flowchart of the generation and validation of the models generated in R-project
+
Applicability domain
+
The AD of the training dataset and the PA dataset was evaluated using the Jaccard distance. A Jaccard distance of ‘0’ indicates that the substances are similar, whereas a value of ‘1’ shows that the substances are different. The Jaccard distance was below 0.2 for all PAs relative to the training dataset. Therefore, PA dataset is within the AD of the training dataset and the models can be used to predict the genotoxic potential of the PA dataset.
+
y-randomisation
+
After y-randomisation of the outcome, the accuracy and CCR are around 50%, indicating a chance in the distribution of the results. This shows, that the outcome is actually related to the predictors and not by chance.
Data pre-processing was done by rank transformation using the ‘QuantileTransformer’ procedure. A sequential model has been used. Four layers have been used: input layer, two hidden layers (with 12, 8 and 8 nodes, respectively) and one output layer. For the output layer, a sigmoidal activation function and for all other layers the ReLU (‘Rectified Linear Unit’) activation function was used. Additionally, a L2-penalty of 0.001 was used for the input layer. For training of the model, the ADAM algorithm was used to minimise the cross-entropy loss using the default parameters of Keras. Training was performed for 100 epochs with a batch size of 64. The model was implemented with Python 3.6 and Keras. For training of the model, a 6-fold cross-validation was used. Accuracy was estimated by ROC-AUC and confusion matrix.
+
Validation
+
Results
+
lazar
+
Random Forest
+
The validation showed that the RF model has an accuracy of 64%, a sensitivity of 66% and a specificity of 63%. The confusion matrix of the model, calculated for 8080 instances, is provided in Table 1.
The validation showed that the SVM model has an accuracy of 62%, a sensitivity of 65% and a specificity of 60%. The confusion matrix of SVM model, calculated for 8080 instances, is provided in Table 2.
The validation showed that the DL model generated in R has an accuracy of 59%, a sensitivity of 89% and a specificity of 30%. The confusion matrix of the model, normalised to 8080 instances, is provided in Table 3.
+
Table 3: Confusion matrix of the DL model (R-project)
The validation showed that the DL model generated in TensorFlow has an accuracy of 68%, a sensitivity of 70% and a specificity of 46%. The confusion matrix of the model, normalised to 8080 instances, is provided in Table 4.
+
Table 4: Confusion matrix of the DL model (TensorFlow)
The ROC curves from the 6-fold validation are shown in Figure 7.
+
+
Figure 7: Six-fold cross-validation of TensorFlow DL model show an average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68%.
+
In summary, the validation results of the four methods are presented in the following table.
+
Table 5 Results of the cross-validation of the four models and after y-randomisation
+
+
+
+
+
+
+
+
+
+
+
+
Accuracy
+
CCR
+
Sensitivity
+
Specificity
+
+
+
+
+
RF model
+
64.1%
+
64.4%
+
66.2%
+
62.6%
+
+
+
SVM model
+
62.1%
+
62.6%
+
65.0%
+
60.3%
+
+
+
DL model
+(R-project)
+
59.3%
+
59.5%
+
89.2%
+
29.9%
+
+
+
DL model (TensorFlow)
+
68%
+
62.2%
+
69.9%
+
45.6%
+
+
+
y-randomisation
+
50.5%
+
50.4%
+
50.3%
+
50.6%
+
+
+
+
CCR (correct classification rate)
+
Discussion
+
General model performance
+
Based on the results of the cross-validation for all models, lazar, RF, SVM, DL (R-project) and DL (TensorFlow) it can be state that the prediction results are not optimal due to different reasons. The accuracy as measured during cross-validation of the four models (RF, SVM, DL (R-project and TensorFlow)) was partly low with CCR values between 59.3 and 68%, with the R-generated DL model and the TensorFlow-generated DL model showing the worst and the best performance, respectively. The validation of the R-generated DL model revealed a high sensitivity (89.2%) but an unacceptably low specificity of 29.9% indicating a high number of false positive estimates. The TensorFlow-generated DL model, however, showed an acceptable but not optimal accuracy of 68%, a sensitivity of 69.9% and a specificity of 45.6%. The low specificity indicates that both DL models tends to predict too many instances as positive (genotoxic), and therefore have a high false positive rate. This allows at least with the TensorFlow generated DL model to make group statements, but the confidence for estimations of single PAs appears to be insufficiently low.
+
Several factors have likely contributed to the low to moderate performance of the used methods as shown during the cross-validation:
+
+
The outcome in the training dataset was based on the results of AMES tests for genotoxicity ICH 2011(), an in vitro test in different strains of the bacteria Salmonella typhimurium. In this test, mutagenicity is evaluated with and without prior metabolic activation of the test substance. Metabolic activation could result in the formation of genotoxic metabolites from non-genotoxic parent compounds. However, no distinction was made in the training dataset between substances that needed metabolic activation before being mutagenic and those that were mutagenic without metabolic activation. lazar is able to handle this ‘inaccuracy’ in the training dataset well due to the way the algorithm works: lazar predicts the genotoxic potential based on the neighbours of substances with comparable structural features, considering mutagenic and not mutagenic neighbours. Based on the structural similarity, a probability for mutagenicity and no mutagenicity is calculated independently from each other (meaning that the sum of probabilities does not necessarily adds up to 100%). The class with the higher outcome is then the overall outcome for the substance.
+
+
+
In contrast, the other models need to be trained first to recognise the structural features that are responsible for genotoxicity. Therefore, the mixture of substances being mutagenic with and without metabolic activation in the training dataset may have adversely affected the ability to separate the dataset in two distinct classes and thus explains the relatively low performance of these models.
+
+
+
Machine learning algorithms try to find an optimized solution in a high-dimensional (one dimension per each predictor) space. Sometimes these methods do not find the global optimum of estimates but only local (not optimal) solutions. Strategies to find the global solutions are systematic variation (grid search) of the hyperparameters of the methods, which may be very time consuming in particular in large datasets.
+
+
Conclusions
+
In this study, an attempt was made to predict the genotoxic potential of PAs using five different machine learning techniques (lazar, RF, SVM, DL (R-project and TensorFlow). The results of all models fitted only partly to the findings in literature, with best results obtained with the TensorFlow DL model. Therefore, modelling allows statements on the relative risks of genotoxicity of the different PA groups. Individual predictions for selective PAs appear, however, not reliable on the current basis of the used training dataset.
+
This study emphasises the importance of critical assessment of predictions by QSAR models. This includes not only extensive literature research to assess the plausibility of the predictions, but also a good knowledge of the metabolism of the test substances and understanding for possible mechanisms of toxicity.
+
In further studies, additional machine learning techniques or a modified (extended) training dataset should be used for an additional attempt to predict the genotoxic potential of PAs.
+
References
+
+
+
Bender, Andreas, Hamse Y. Mussa, Robert C. Glen, and Stephan Reiling. 2004. “Molecular Similarity Searching Using Atom Environments, Information-Based Feature Selection, and a Naïve Bayesian Classifier.” Journal of Chemical Information and Computer Sciences 44 (1): 170–78. https://doi.org/10.1021/ci034207y.
+
+
+
Hansen, Katja, Sebastian Mika, Timon Schroeter, Andreas Sutter, Antonius ter Laak, Thomas Steger-Hartmann, Nikolaus Heinrich, and Klaus-Robert Müller. 2009. “Benchmark Data Set for in Silico Prediction of Ames Mutagenicity.” Journal of Chemical Information and Modeling 49 (9): 2077–81. https://doi.org/10.1021/ci900161g.
+
+
+
Kazius, J., R. McGuire, and R. Bursi. 2005. “Derivation and Validation of Toxicophores for Mutagenicity Prediction.” J Med Chem, no. 48: 312–20.
+
+
+
O’Boyle, Noel, Michael Banck, Craig James, Chris Morley, Tim Vandermeersch, and Geoffrey Hutchison. 2011. “Open Babel: An open chemical toolbox.” J. Cheminf. 3 (1): 33. https://doi.org/doi:10.1186/1758-2946-3-33.
+
+
+
Rücker, C, G Rücker, and M. Meringer. 2007. “Y-Randomization and Its Variants in Qspr/Qsar.” J. Chem. Inf. Model., no. 47: 2345–57.
+
+
+
Yap, CW. 2011. “PaDEL-Descriptor: An Open Source Software to Calculate Molecular Descriptors and Fingerprints.” Journal of Computational Chemistry, no. 32: 1466–74.