diff options
Diffstat (limited to 'paper/lua-filters')
109 files changed, 6642 insertions, 0 deletions
diff --git a/paper/lua-filters/.travis.yml b/paper/lua-filters/.travis.yml new file mode 100644 index 0000000..196eb28 --- /dev/null +++ b/paper/lua-filters/.travis.yml @@ -0,0 +1,55 @@ +# Use new container infrastructure to enable caching +sudo: false + +# Do not choose a language; we provide our own build tools. +language: minimal + +addons: + apt: + packages: + - aspell + - aspell-fr + - aspell-en + - imagemagick + - default-jre + - graphviz + - inkscape + - python3 + - python3-pip + - python3-tk + - python3-numpy + - python3-matplotlib + - latex-xcolor + - lmodern + - texlive-bibtex-extra + - texlive-fonts-recommended + - texlive-generic-recommended + - texlive-latex-recommended + - texlive-latex-extra + - pgf + +before_install: +- unset CC +# Download and unpack the pandoc binary +- | + pushd $HOME + RELEASES_URL='https://github.com/jgm/pandoc/releases' + export PANDOCVERSION=$(curl -I "$RELEASES_URL/latest" | sed -ne 's#Location:.*tag/\(.*\)$#\1#p' | tr -d "\n\r") + echo $PANDOCVERSION + wget $RELEASES_URL/download/$PANDOCVERSION/pandoc-$PANDOCVERSION-linux.tar.gz + tar xvzf pandoc-$PANDOCVERSION-linux.tar.gz + popd +# Download plantuml.jar for plantuml filter +- | + pushd $HOME + wget http://sourceforge.net/projects/plantuml/files/plantuml.jar + popd +- export PATH=$HOME/pandoc-$PANDOCVERSION/bin:$PATH +- export PLANTUML=$HOME/plantuml.jar + +install: [] + +script: +- | + make test + diff --git a/paper/lua-filters/LICENSE b/paper/lua-filters/LICENSE new file mode 100644 index 0000000..720865c --- /dev/null +++ b/paper/lua-filters/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017-2019 John MacFarlane and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/paper/lua-filters/Makefile b/paper/lua-filters/Makefile new file mode 100644 index 0000000..0de553d --- /dev/null +++ b/paper/lua-filters/Makefile @@ -0,0 +1,5 @@ +FILTERS=$(wildcard $(shell find * -type d | grep -v '[/\\]')) +.PHONY: test + +test: + bash runtests.sh $(FILTERS) diff --git a/paper/lua-filters/README.md b/paper/lua-filters/README.md new file mode 100644 index 0000000..736ed8f --- /dev/null +++ b/paper/lua-filters/README.md @@ -0,0 +1,21 @@ +[![travis build +status](https://img.shields.io/travis/pandoc/lua-filters/master.svg?label=travis+build)](https://travis-ci.org/pandoc/lua-filters) + +# Lua Filters + +This repository collects Lua filters for pandoc. + +To learn about Lua filters, see the +[documentation](http://pandoc.org/lua-filters.html). + +Each filter goes in its own subdirectory. Each subdirectory contains: + +- the filter itself (e.g. `wordcount.lua`) +- a `README.md` describing the use of the filter +- a `Makefile` with a `test` target to test the filter +- some data files used for the tests, which may also serve + as examples + +PRs for new filters are welcome, provided they conform to +these guidelines. + diff --git a/paper/lua-filters/abstract-to-meta/Makefile b/paper/lua-filters/abstract-to-meta/Makefile new file mode 100644 index 0000000..1ea3124 --- /dev/null +++ b/paper/lua-filters/abstract-to-meta/Makefile @@ -0,0 +1,8 @@ +test: sample.md abstract-to-meta.lua + @pandoc --lua-filter=abstract-to-meta.lua --standalone --to=markdown $< \ + | diff -u expected.md - + +expected.md: sample.md abstract-to-meta.lua + pandoc --lua-filter=abstract-to-meta.lua --standalone --output $@ $< + +.PHONY: test diff --git a/paper/lua-filters/abstract-to-meta/README.md b/paper/lua-filters/abstract-to-meta/README.md new file mode 100644 index 0000000..42c0d29 --- /dev/null +++ b/paper/lua-filters/abstract-to-meta/README.md @@ -0,0 +1,37 @@ +# abstract-to-meta + +This moves a document's abstract from the main text into the +metadata. Metadata elements usually allow for finer placement +control in the final output, but writing body text is easier and +more natural. + +## Defining an Abstract + +A document abstract can either be put directly in the document +metadata, for example by inserting an *abstract* attribute into a +YAML block. + + --- + abstract: | + Place abstract here. + + Multiple paragraphs are possible. + --- + +The additional indentation and formatting requirements in YAML +headers can be confusing or annoying for authors. It is hence +preferable to allow abstracts be written as normal sections. + + # Abstract + + Place abstract here. + + Multiple paragraphs are possible. + +This filter turns the latter into the former by looking for a +top-level header whose ID is `abstract`. Pandoc auto-creates IDs +based on header contents, so a header titled *Abstract* will +satisfy this condition.^[1] + +[1]: This requires the `auto_identifier` extension. It is + enabled by default. diff --git a/paper/lua-filters/abstract-to-meta/abstract-to-meta.lua b/paper/lua-filters/abstract-to-meta/abstract-to-meta.lua new file mode 100644 index 0000000..dbf9b03 --- /dev/null +++ b/paper/lua-filters/abstract-to-meta/abstract-to-meta.lua @@ -0,0 +1,23 @@ +local looking_at_abstract = false +local abstract = {} + +function Block (elem) + if looking_at_abstract then + abstract[#abstract + 1] = elem + return {} + end +end + +function Header (elem) + if elem.level == 1 and elem.identifier == 'abstract' then + looking_at_abstract = true + return {} + else + looking_at_abstract = looking_at_abstract and elem.level ~= 1 + end +end + +function Meta (meta) + meta.abstract = meta.abstract or pandoc.MetaBlocks(abstract) + return meta +end diff --git a/paper/lua-filters/abstract-to-meta/expected.md b/paper/lua-filters/abstract-to-meta/expected.md new file mode 100644 index 0000000..638e268 --- /dev/null +++ b/paper/lua-filters/abstract-to-meta/expected.md @@ -0,0 +1,19 @@ +--- +abstract: | + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod + tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim + veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea + commodo consequat. Duis aute irure dolor in reprehenderit in voluptate + velit esse cillum dolore eu fugiat nulla pariatur. + + Excepteur sint occaecat cupidatat non proident, sunt in culpa qui + officia deserunt mollit anim id est laborum. +--- + +Lorem Ipsum +=========== + +Quo dolore molestiae et laboriosam occaecati explicabo corrupti. Earum +expedita ducimus quaerat est quam ut molestiae. Illum deleniti vel +labore facilis et cum est. Est nemo est vel ad. Assumenda consequatur +rerum officiis atque officia. Est nihil iste cumque ad qui. diff --git a/paper/lua-filters/abstract-to-meta/sample.md b/paper/lua-filters/abstract-to-meta/sample.md new file mode 100644 index 0000000..27aabcd --- /dev/null +++ b/paper/lua-filters/abstract-to-meta/sample.md @@ -0,0 +1,17 @@ +# Abstract + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu +fugiat nulla pariatur. + +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia +deserunt mollit anim id est laborum. + +# Lorem Ipsum + +Quo dolore molestiae et laboriosam occaecati explicabo corrupti. Earum expedita +ducimus quaerat est quam ut molestiae. Illum deleniti vel labore facilis et cum +est. Est nemo est vel ad. Assumenda consequatur rerum officiis atque officia. +Est nihil iste cumque ad qui. diff --git a/paper/lua-filters/author-info-blocks/Makefile b/paper/lua-filters/author-info-blocks/Makefile new file mode 100644 index 0000000..341c44d --- /dev/null +++ b/paper/lua-filters/author-info-blocks/Makefile @@ -0,0 +1,8 @@ +test: sample.md author-info-blocks.lua + @pandoc --lua-filter=author-info-blocks.lua --standalone --to=native $< \ + | diff -u expected.native - + +expected.native: sample.md author-info-blocks.lua + pandoc --lua-filter=author-info-blocks.lua --standalone --output $@ $< + +.PHONY: test diff --git a/paper/lua-filters/author-info-blocks/README.md b/paper/lua-filters/author-info-blocks/README.md new file mode 100644 index 0000000..3386bac --- /dev/null +++ b/paper/lua-filters/author-info-blocks/README.md @@ -0,0 +1,59 @@ +# author-info-blocks + +This filter adds author-related header blocks usually included in +scholarly articles, such as a list of author affiliations, +correspondence information, and on notes equal contributors. + + +## Dependencies + +This filter assumes metadata in the canonical format generated by +the [scholarly-metadata filter](../scholarly-metadata). + +## Usage + +The filter should be run after *scholarly-metadata.lua*: + + pandoc --lua-filter=scholarly-metadata/scholarly-metadata.lua \ + --lua-filter=author-info-blocks/author-inffo-blocks.lua \ + --output=outfile.pdf --pdf-engine=xelatex \ + article.md + +The ways in which affiliation data should be given is described +in the docs for **scholarly-metadata.lua*. Additionally, authors +who contributed equally to an article can be marked by adding +`equal_contributor: yes` to the respective YAML objects. +Similarly, corresponding authors should be marked with +`correspondence: yes` and have an `email` listed. + +### Example + +Take the following example YAML block: + +``` yaml +--- +title: Affiliation Blocks Example +author: + - Jane Doe: + institute: + - federation + equal_contributor: "yes" + correspondence: "yes" + email: jane.doe@example.com + - John Q. Doe: + institute: [federation, acme] + equal_contributor: "yes" + - Juan Pérez: + institute: acme +institute: + - federation: Federation of Planets + - acme: + name: Acme Corporation +--- +``` + +This will mark Jane Doe and John Q. Doe as equal contributors and +Jane Doe as the sole corresponding author. Below is a screenshot +of a document header created from this metadata. + +![example document screenshot](document-screenshot.jpg) diff --git a/paper/lua-filters/author-info-blocks/author-info-blocks.lua b/paper/lua-filters/author-info-blocks/author-info-blocks.lua new file mode 100644 index 0000000..27e32bc --- /dev/null +++ b/paper/lua-filters/author-info-blocks/author-info-blocks.lua @@ -0,0 +1,176 @@ +--[[ +affiliation-blocks – generate title components + +Copyright © 2017–2019 Albert Krewinkel + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +]] +local List = require 'pandoc.List' +local utils = require 'pandoc.utils' +local stringify = utils.stringify + +local default_marks +local default_marks = { + corresponding_author = FORMAT == 'latex' + and {pandoc.RawInline('latex', '*')} + or {pandoc.Str '✉'}, + equal_contributor = FORMAT == 'latex' + and {pandoc.RawInline('latex', '$\\dagger{}$')} + or {pandoc.Str '*'}, +} + +local function intercalate(lists, elem) + local result = List:new{} + for i = 1, (#lists - 1) do + result:extend(lists[i]) + result:extend(elem) + end + if #lists > 0 then + result:extend(lists[#lists]) + end + return result +end + +--- Check whether the given author is a corresponding author +local function is_corresponding_author(author) + return author.correspondence and author.email +end + +--- Create inlines for a single author (includes all author notes) +local function author_inline_generator (get_mark) + return function (author) + local author_marks = List:new{} + if author.equal_contributor then + author_marks[#author_marks + 1] = get_mark 'equal_contributor' + end + local idx_str + for _, idx in ipairs(author.institute) do + if type(idx) ~= 'table' then + idx_str = tostring(idx) + else + idx_str = stringify(idx) + end + author_marks[#author_marks + 1] = {pandoc.Str(idx_str)} + end + if is_corresponding_author(author) then + author_marks[#author_marks + 1] = get_mark 'corresponding_author' + end + local res = List.clone(author.name) + res[#res + 1] = pandoc.Superscript(intercalate(author_marks, {pandoc.Str ','})) + return res + end +end + +local function is_equal_contributor (author) + return author.equal_contributor +end + +--- Create equal contributors note. +local function create_equal_contributors_block(authors, mark) + local has_equal_contribs = List:new(authors):find_if(is_equal_contributor) + if not has_equal_contribs then + return nil + end + local contributors = { + pandoc.Superscript(mark'equal_contributor'), + pandoc.Space(), + pandoc.Str 'These authors contributed equally to this work.' + } + return List:new{pandoc.Para(contributors)} +end + +--- Generate a block list all affiliations, marked with arabic numbers. +local function create_affiliations_blocks(affiliations) + local affil_lines = List:new(affiliations):map( + function (affil, i) + local num_inlines = List:new{ + pandoc.Superscript{pandoc.Str(tostring(i))}, + pandoc.Space() + } + return num_inlines .. affil.name + end + ) + return {pandoc.Para(intercalate(affil_lines, {pandoc.LineBreak()}))} +end + +--- Generate a block element containing the correspondence information +local function create_correspondence_blocks(authors, mark) + local corresponding_authors = List:new{} + for _, author in ipairs(authors) do + if is_corresponding_author(author) then + local mailto = 'mailto:' .. pandoc.utils.stringify(author.email) + local author_with_mail = List:new( + author.name .. List:new{pandoc.Space(), pandoc.Str '<'} .. + author.email .. List:new{pandoc.Str '>'} + ) + local link = pandoc.Link(author_with_mail, mailto) + table.insert(corresponding_authors, {link}) + end + end + if #corresponding_authors == 0 then + return nil + end + local correspondence = List:new{ + pandoc.Superscript(mark'corresponding_author'), + pandoc.Space(), + pandoc.Str'Correspondence:', + pandoc.Space() + } + local sep = List:new{pandoc.Str',', pandoc.Space()} + return { + pandoc.Para(correspondence .. intercalate(corresponding_authors, sep)) + } +end + +--- Generate a list of inlines containing all authors. +local function create_authors_inlines(authors, mark) + local inlines_generator = author_inline_generator(mark) + local inlines = List:new(authors):map(inlines_generator) + local and_str = List:new{pandoc.Space(), pandoc.Str'and', pandoc.Space()} + + local last_author = inlines[#inlines] + inlines[#inlines] = nil + local result = intercalate(inlines, {pandoc.Str ',', pandoc.Space()}) + if #authors > 1 then + result:extend(List:new{pandoc.Str ","} .. and_str) + end + result:extend(last_author) + return result +end + +return { + { + Pandoc = function (doc) + local meta = doc.meta + local body = List:new{} + + local mark = function (mark_name) return default_marks[mark_name] end + + body:extend(create_equal_contributors_block(doc.meta.author, mark) or {}) + body:extend(create_affiliations_blocks(doc.meta.institute) or {}) + body:extend(create_correspondence_blocks(doc.meta.author, mark) or {}) + body:extend(doc.blocks) + + -- Overwrite authors with formatted values. We use a single, formatted + -- string for most formats. LaTeX output, however, looks nicer if we + -- provide a authors as a list. + meta.author = FORMAT:match 'latex' + and pandoc.MetaList(doc.meta.author):map(author_inline_generator(mark)) + or pandoc.MetaInlines(create_authors_inlines(doc.meta.author, mark)) + -- Institute info is now baked into the affiliations block. + meta.institute = nil + + return pandoc.Pandoc(body, meta) + end + } +} diff --git a/paper/lua-filters/author-info-blocks/document-screenshot.jpg b/paper/lua-filters/author-info-blocks/document-screenshot.jpg Binary files differnew file mode 100644 index 0000000..9e30e9f --- /dev/null +++ b/paper/lua-filters/author-info-blocks/document-screenshot.jpg diff --git a/paper/lua-filters/author-info-blocks/expected.native b/paper/lua-filters/author-info-blocks/expected.native new file mode 100644 index 0000000..6608de7 --- /dev/null +++ b/paper/lua-filters/author-info-blocks/expected.native @@ -0,0 +1,5 @@ +Pandoc (Meta {unMeta = fromList [("author",MetaInlines [Str "Jane",Space,Str "Doe",Superscript [Str "*",Str ",",Str "1",Str ",",Str "\9993"],Str ",",Space,Str "John",Space,Str "Q.",Space,Str "Doe",Superscript [Str "*",Str ",",Str "1",Str ",",Str "2"],Str ",",Space,Str "and",Space,Str "Juan",Space,Str "P\233rez",Superscript [Str "2"]]),("title",MetaInlines [Str "Affiliation",Space,Str "Blocks",Space,Str "Example"])]}) +[Para [Superscript [Str "*"],Space,Str "These authors contributed equally to this work."] +,Para [Superscript [Str "1"],Space,Str "Federation",Space,Str "of",Space,Str "Planets",LineBreak,Superscript [Str "2"],Space,Str "Acme",Space,Str "Corporation"] +,Para [Superscript [Str "\9993"],Space,Str "Correspondence:",Space,Link ("",[],[]) [Str "Jane",Space,Str "Doe",Space,Str "<",Str "jane.doe@example.com",Str ">"] ("mailto:jane.doe@example.com","")] +,Para [Str "Lorem",Space,Str "ipsum",Space,Str "dolor",Space,Str "sit",Space,Str "amet."]] diff --git a/paper/lua-filters/author-info-blocks/sample.md b/paper/lua-filters/author-info-blocks/sample.md new file mode 100644 index 0000000..541aa26 --- /dev/null +++ b/paper/lua-filters/author-info-blocks/sample.md @@ -0,0 +1,28 @@ +--- +author: +- correspondence: yes + email: 'jane.doe\@example.com' + equal_contributor: yes + id: Jane Doe + institute: + - 1 + name: Jane Doe +- equal_contributor: yes + id: 'John Q. Doe' + institute: + - 1 + - 2 + name: 'John Q. Doe' +- id: Juan Pérez + institute: + - 2 + name: Juan Pérez +institute: +- id: federation + name: Federation of Planets +- id: acme + name: Acme Corporation +title: Affiliation Blocks Example +--- + +Lorem ipsum dolor sit amet. diff --git a/paper/lua-filters/bibexport/Makefile b/paper/lua-filters/bibexport/Makefile new file mode 100644 index 0000000..67081b0 --- /dev/null +++ b/paper/lua-filters/bibexport/Makefile @@ -0,0 +1,5 @@ +test: + @pandoc --lua-filter=bibexport.lua sample.md > /dev/null + @diff --strip-trailing-cr -u bibexport.aux expected.bibexport.aux + @diff --strip-trailing-cr -u bibexport.bib expected.bibexport.bib + @rm -f bibexport.aux bibexport.bib diff --git a/paper/lua-filters/bibexport/README.md b/paper/lua-filters/bibexport/README.md new file mode 100644 index 0000000..ea1b54d --- /dev/null +++ b/paper/lua-filters/bibexport/README.md @@ -0,0 +1,31 @@ +# bibexport + +Export all cited references into a single bibtex file. This is +most useful when writing collaboratively while using a large, +private bibtex collection. Using the bibexport filter allows to +create a reduced bibtex file suitable for sharing with +collaborators. + +## Prerequisites + +This filter expects the `bibexport` executable to be installed +and in the user's PATH. + +## Usage + +The filter runs `bibexport` on a temporary *aux* file, creating +the file *bibexport.bib* on success. The name of the temporary +*.aux* file can be set via the `auxfile` meta value; if no value +is specified, *bibexport.aux* will be used as filename. + +Please note that `bibexport` prints messages to stdout. Pandoc +should be called with the `-o` or `--output` option instead of +redirecting stdout to a file. E.g. + + pandoc --lua-filter=bibexport.lua article.md -o article.html + +or, when the filter is called in a one-off fashion + + pandoc --lua-filter=bibexport.lua article.md -o /dev/null + + diff --git a/paper/lua-filters/bibexport/bibexport.lua b/paper/lua-filters/bibexport/bibexport.lua new file mode 100644 index 0000000..9174e69 --- /dev/null +++ b/paper/lua-filters/bibexport/bibexport.lua @@ -0,0 +1,82 @@ +local utils = require 'pandoc.utils' +local List = require 'pandoc.List' + +local citation_id_set = {} + +-- Collect all citation IDs. +function Cite (c) + local cs = c.citations + for i = 1, #cs do + citation_id_set[cs[i].id or cs[i].citationId] = true + end +end + +--- Return a list of citation IDs +function citation_ids () + local citations = {}; + for cid, _ in pairs(citation_id_set) do + citations[#citations + 1] = cid + end + return citations +end + +function bibdata (bibliography) + function bibname (bibitem) + if type(bibitem) == 'string' then + return bibitem:gsub('%.bib$', '') + else + -- bibitem is assumed to be a list of inlines + return utils.stringify(pandoc.Span(bibitem)):gsub('%.bib$', '') + end + end + + local bibs = bibliography.t == 'MetaList' + and List.map(bibliography, bibname) + or {bibname(bibliography)} + return table.concat(bibs, ',') +end + +function aux_content(bibliography) + local cites = citation_ids() + table.sort(cites) + local citations = table.concat(cites, ',') + return table.concat( + { + '\\bibstyle{alpha}', + '\\bibdata{' .. bibdata(bibliography) .. '}', + '\\citation{' .. citations .. '}', + '', + }, + '\n' + ) +end + +function write_dummy_aux (bibliography, auxfile) + local filename + if type(auxfile) == 'string' then + filename = auxfile + elseif type(auxfile) == 'table' then + -- assume list of inlines + filename = utils.stringify(pandoc.Span(auxfile)) + else + filename = 'bibexport.aux' + end + local fh = io.open(filename, 'w') + fh:write(aux_content(bibliography)) + fh:close() + io.stdout:write('Aux written to ' .. filename .. '\n') + return filename +end + +function Pandoc (doc) + local meta = doc.meta + if not meta.bibliography then + return nil + else + -- create a dummy .aux file + local auxfile_name = write_dummy_aux(meta.bibliography, meta.auxfile) + os.execute('bibexport ' .. auxfile_name) + io.stdout:write('Output written to bibexport.bib\n') + return nil + end +end diff --git a/paper/lua-filters/bibexport/coffee.bib b/paper/lua-filters/bibexport/coffee.bib new file mode 100644 index 0000000..80b723c --- /dev/null +++ b/paper/lua-filters/bibexport/coffee.bib @@ -0,0 +1,48 @@ +@article{BrSm02, + author = {C. F. Brice and A. P. Smith}, + title = {Effects of caffeine on mood and performance: a study on + realistic consumption}, + journal = {Psychopharmacology (Berlin)}, + year = 2002, + volume = 164, + pages = {188--192} +} + +@article{LoSnMuKo95, + author = {M. M. Lorist and J. Snel and G. Mulder and A. Kok}, + title = {Aging, caffeine, and information processing: an + event-related potential analysis}, + journal = {Electroencephalogr Clin Neurophysiol}, + year = 1995, + volume = 96, + pages = {453--467} +} + +@article{Li95, + author = {L. Linde}, + title = {Mental effects of caffeine in fatigued and non-fatigued + female and male subjects}, + journal = {Ergonomics}, + year = 1995, + volume = 38, + pages = {864--885} +} + +@article{KjOs07, + author = {Dannie Kjeldgaard and Jacob Ostberg}, + title = {Coffee grounds and the global cup: glocal consumer culture + in scandinavia}, + journal = {Consumption, Markets and Culture}, + year = 2007, + pages = {175--187}, + volume = 10, + issue = 2 +} + +@book{De92, + author = {G Debry}, + title = {Coffee and Health: Composition, Consumption and Effect upon Health}, + publisher = {John Libbey Eurotext}, + year = 1992, + ISBN = {2-7420-0037-2} +} diff --git a/paper/lua-filters/bibexport/expected.bibexport.aux b/paper/lua-filters/bibexport/expected.bibexport.aux new file mode 100644 index 0000000..d5f3144 --- /dev/null +++ b/paper/lua-filters/bibexport/expected.bibexport.aux @@ -0,0 +1,3 @@ +\bibstyle{alpha} +\bibdata{sample,coffee} +\citation{Li95,Upper_writers_1974} diff --git a/paper/lua-filters/bibexport/expected.bibexport.bib b/paper/lua-filters/bibexport/expected.bibexport.bib new file mode 100644 index 0000000..57448ed --- /dev/null +++ b/paper/lua-filters/bibexport/expected.bibexport.bib @@ -0,0 +1,27 @@ + + +@article{Li95, + author = {L. Linde}, + journal = {Ergonomics}, + pages = {864--885}, + title = {Mental effects of caffeine in fatigued and + non-fatigued female and male subjects}, + volume = {38}, + year = {1995}, +} + +@article{Upper_writers_1974, + author = {Upper, Dennis}, + journal = {Journal of Applied Behavior Analysis}, + number = {3}, + pages = {497--497}, + publisher = {Blackwell Publishing Ltd}, + title = {The unsuccessful self-treatment of a case of + “writer's block”}, + volume = {7}, + year = {1974}, + doi = {10.1901/jaba.1974.7-497a}, + issn = {1938-3703}, + url = {http://dx.doi.org/10.1901/jaba.1974.7-497a}, +} + diff --git a/paper/lua-filters/bibexport/sample.bib b/paper/lua-filters/bibexport/sample.bib new file mode 100644 index 0000000..876eea1 --- /dev/null +++ b/paper/lua-filters/bibexport/sample.bib @@ -0,0 +1,13 @@ +@article {Upper_writers_1974, + author = {Upper, Dennis}, + title = {The unsuccessful self-treatment of a case of “writer's block”}, + journal = {Journal of Applied Behavior Analysis}, + volume = {7}, + number = {3}, + publisher = {Blackwell Publishing Ltd}, + issn = {1938-3703}, + url = {http://dx.doi.org/10.1901/jaba.1974.7-497a}, + doi = {10.1901/jaba.1974.7-497a}, + pages = {497--497}, + year = {1974}, +} diff --git a/paper/lua-filters/bibexport/sample.md b/paper/lua-filters/bibexport/sample.md new file mode 100644 index 0000000..d458b37 --- /dev/null +++ b/paper/lua-filters/bibexport/sample.md @@ -0,0 +1,18 @@ +--- +bibliography: +- sample.bib +- coffee.bib +... + +# Abstract + +This is an example article. It was written under the influence of +coffee, which acts to counter fatigue [@Li95]. + + +# Further reading + +Authors struggling to fill their document with content are referred to +@Upper_writers_1974. + +# References diff --git a/paper/lua-filters/cito/Makefile b/paper/lua-filters/cito/Makefile new file mode 100644 index 0000000..1997818 --- /dev/null +++ b/paper/lua-filters/cito/Makefile @@ -0,0 +1,6 @@ +test: + @pandoc --lua-filter=cito.lua --output=output.md --standalone sample.md + @diff -u expected.md output.md + @rm -f output.md + +.PHONY: test diff --git a/paper/lua-filters/cito/README.md b/paper/lua-filters/cito/README.md new file mode 100644 index 0000000..b0e2cd3 --- /dev/null +++ b/paper/lua-filters/cito/README.md @@ -0,0 +1,76 @@ +# cito + +This filter extracts optional CiTO (Citation Typing Ontology) +information from citations and stores the information in the +document's metadata. The extracted info is intended to be used in +combination with other filters, templates, or custom writers. It +is mandatory to run pandoc-citeproc *after* this filter if CiTO +data is embedded in the document; otherwise pandoc-citeproc will +interpret CiTO properties as part of the citation ID. + +## Using the Citation Typing Ontology + +The [citation typing ontology] (CiTO) allows authors to specify the +reason a citation is given. This is helpful for the authors and +their co-authors, and furthermore adds data that can be used by +readers to search and navigate relevant publications. + +A CiTO annotation must come before the citation key and be +followed by a colon. E.g., `@method_in:towbin_1979` signifies +that the citation with ID *towbin_1979* is cited because the +method described in that paper has been used in the paper at +hand. + +[citation typing ontology]: http://purl.org/spar/cito + +## Recognized CiTO properties + +Below is the list of CiTO properties recognized by the filter, +together with the aliases that can be used as shorthands. + +- agrees_with + - agree_with +- citation +- cites +- cites_as_authority + - as_authority + - authority +- cites_as_data_source +- cites_as_evidence + - as_evidence + - evidence +- cites_as_metadata_document + - as_metadata_document + - metadata_document + - metadata +- cites_as_recommended_reading + - as_recommended_reading + - recommended_reading +- disagrees_with + - disagree + - disagrees +- disputes +- documents +- extends +- includes_excerpt_from + - excerpt + - excerpt_from +- includes_quotation_from + - quotation + - quotation_from +- obtains_background_from + - background + - background_from +- refutes +- replies_to +- updates +- uses_data_from + - data + - data_from +- uses_method_in + - method + - method_in + +## References + +This approach was described in <https://doi.org/10.7717/peerj-cs.112>. diff --git a/paper/lua-filters/cito/cito.lua b/paper/lua-filters/cito/cito.lua new file mode 100644 index 0000000..a1a3421 --- /dev/null +++ b/paper/lua-filters/cito/cito.lua @@ -0,0 +1,138 @@ +-- Copyright © 2017–2019 Albert Krewinkel, Robert Winkler +-- +-- This library is free software; you can redistribute it and/or modify it +-- under the terms of the MIT license. See LICENSE for details. + +local _version = '1.0.0' +local properties_and_aliases = { + agrees_with = { + 'agree_with' + }, + citation = { + }, + cites = { + }, + cites_as_authority = { + 'as_authority', + 'authority' + }, + cites_as_data_source = { + "as_data_source", + "data_source" + }, + cites_as_evidence = { + 'as_evidence', + 'evidence' + }, + cites_as_metadata_document = { + 'as_metadata_document', + 'metadata_document', + 'metadata' + }, + cites_as_recommended_reading = { + 'as_recommended_reading', + 'recommended_reading' + }, + disagrees_with = { + 'disagree', + 'disagrees' + }, + disputes = { + }, + documents = { + }, + extends = { + }, + includes_excerpt_from = { + 'excerpt', + 'excerpt_from' + }, + includes_quotation_from = { + 'quotation', + 'quotation_from' + }, + obtains_background_from = { + 'background', + 'background_from' + }, + refutes = { + }, + replies_to = { + }, + updates = { + }, + uses_data_from = { + 'data', + 'data_from' + }, + uses_method_in = { + 'method', + 'method_in' + }, +} + +local default_cito_property = 'citation' + +--- Map from cito aliases to the actual cito property. +local properties_by_alias = {} +for property, aliases in pairs(properties_and_aliases) do + -- every property is an alias for itself + properties_by_alias[property] = property + for _, alias in pairs(aliases) do + properties_by_alias[alias] = property + end +end + +--- Split citation ID into cito property and the actual citation ID. If +--- the ID does not seem to contain a CiTO property, the +--- `default_cito_property` will be returned, together with the +--- unchanged input ID. +local function split_cito_from_id (citation_id) + local pattern = '^(.+):(.+)$' + local prop_alias, split_citation_id = citation_id:match(pattern) + + if properties_by_alias[prop_alias] then + return properties_by_alias[prop_alias], split_citation_id + end + + return default_cito_property, citation_id +end + +--- Citations by CiTO properties. +local function store_cito (cito_cites, prop, cite_id) + if not prop then + return + end + if not cito_cites[prop] then + cito_cites[prop] = {} + end + table.insert(cito_cites[prop], cite_id) +end + +--- Returns a Cite filter function which extracts CiTO information and +--- add it to the given collection table. +local function extract_cito (cito_cites) + return function (cite) + for k, citation in pairs(cite.citations) do + local cito_prop, cite_id = split_cito_from_id(citation.id) + store_cito(cito_cites, cito_prop, cite_id) + citation.id = cite_id + end + return cite + end +end + +--- Lists of citation IDs, indexed by CiTO properties. +local citations_by_property = {} + +return { + { + Cite = extract_cito(citations_by_property) + }, + { + Meta = function (meta) + meta.cito_cites = citations_by_property + return meta + end + } +} diff --git a/paper/lua-filters/cito/expected.md b/paper/lua-filters/cito/expected.md new file mode 100644 index 0000000..707ff74 --- /dev/null +++ b/paper/lua-filters/cito/expected.md @@ -0,0 +1,19 @@ +--- +cito_cites: + cites_as_evidence: + - Li95 + cites_as_recommended_reading: + - 'Upper\_writers\_1974' +--- + +Abstract +======== + +This is an example article. It was written under the influence of +coffee, which acts to counter fatigue [@Li95]. + +Further reading +=============== + +Authors struggling to fill their document with content are referred to +@Upper_writers_1974. diff --git a/paper/lua-filters/cito/sample.bib b/paper/lua-filters/cito/sample.bib new file mode 100644 index 0000000..4a4ff62 --- /dev/null +++ b/paper/lua-filters/cito/sample.bib @@ -0,0 +1,24 @@ +@article{Li95, + author = {L. Linde}, + journal = {Ergonomics}, + pages = {864--885}, + title = {Mental effects of caffeine in fatigued and + non-fatigued female and male subjects}, + volume = {38}, + year = {1995}, +} + +@article{Upper_writers_1974, + author = {Upper, Dennis}, + journal = {Journal of Applied Behavior Analysis}, + number = {3}, + pages = {497--497}, + publisher = {Blackwell Publishing Ltd}, + title = {The unsuccessful self-treatment of a case of + “writer's block”}, + volume = {7}, + year = {1974}, + doi = {10.1901/jaba.1974.7-497a}, + issn = {1938-3703}, + url = {http://dx.doi.org/10.1901/jaba.1974.7-497a}, +} diff --git a/paper/lua-filters/cito/sample.md b/paper/lua-filters/cito/sample.md new file mode 100644 index 0000000..a9989cb --- /dev/null +++ b/paper/lua-filters/cito/sample.md @@ -0,0 +1,10 @@ +# Abstract + +This is an example article. It was written under the influence of +coffee, which acts to counter fatigue [@cites_as_evidence:Li95]. + + +# Further reading + +Authors struggling to fill their document with content are referred to +@recommended_reading:Upper_writers_1974. diff --git a/paper/lua-filters/diagram-generator/.gitignore b/paper/lua-filters/diagram-generator/.gitignore new file mode 100644 index 0000000..01e67c0 --- /dev/null +++ b/paper/lua-filters/diagram-generator/.gitignore @@ -0,0 +1,2 @@ +sample.html +tmp-latex diff --git a/paper/lua-filters/diagram-generator/Makefile b/paper/lua-filters/diagram-generator/Makefile new file mode 100644 index 0000000..ccd79c8 --- /dev/null +++ b/paper/lua-filters/diagram-generator/Makefile @@ -0,0 +1,13 @@ +.PHONY: test +test: sample.html + +sample.html: sample.md + @pandoc --self-contained \ + --lua-filter=diagram-generator.lua \ + --metadata=pythonPath:"python3" \ + --metadata=title:"README" \ + --output=$@ $< + +clean: + rm -f sample.html + rm -rf tmp-latex diff --git a/paper/lua-filters/diagram-generator/README.md b/paper/lua-filters/diagram-generator/README.md new file mode 100644 index 0000000..d04e204 --- /dev/null +++ b/paper/lua-filters/diagram-generator/README.md @@ -0,0 +1,252 @@ +# Diagram Generator Lua Filter + +## Introduction +This Lua filter is used to create images with or without captions from code +blocks. Currently PlantUML, Graphviz, Ti*k*Z and Python can be processed. +This document also serves as a test document, which is why the subsequent +test diagrams are integrated in every supported language. + +## Prerequisites +To be able to use this Lua filter, the respective external tools must be +installed. However, it is sufficient if the tools to be used are installed. +If you only want to use PlantUML, you don't need LaTeX or Python, etc. + +### PlantUML +To use PlantUML, you must install PlantUML itself. See the +[PlantUML website](http://plantuml.com/) for more details. It should be +noted that PlantUML is a Java program and therefore Java must also +be installed. + +By default, this filter expects the plantuml.jar file to be in the +working directory. Alternatively, the environment variable +`PLANTUML` can be set with a path. If, for example, a specific +PlantUML version is to be used per pandoc document, the +`plantumlPath` meta variable can be set. + +Furthermore, this filter assumes that Java is located in the +system or user path. This means that from any place of the system +the `java` command is understood. Alternatively, the `JAVA_HOME` +environment variable gets used. To use a specific Java version per +pandoc document, use the `javaPath` meta variable. Please notice +that `JAVA_HOME` must be set to the java's home directory e.g. +`c:\Program Files\Java\jre1.8.0_201\` whereas `javaPath` must be +set to the absolute path of `java.exe` e.g. +`c:\Program Files\Java\jre1.8.0_201\bin\java.exe`. + +Example usage: + +~~~~~~~~~~~~~~~~ +```{.plantuml caption="This is an image, created by **PlantUML**."} +@startuml +Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response +Alice -> Bob: Another authentication Request Alice <-- Bob: another Response +@enduml +``` +~~~~~~~~~~~~~~~~ + +### Graphviz +To use Graphviz you only need to install Graphviz, as you can read +on its [website](http://www.graphviz.org/). There are no other +dependencies. + +This filter assumes that the `dot` command is located in the path +and therefore can be used from any location. Alternatively, you can +set the environment variable `DOT` or use the pandoc's meta variable +`dotPath`. + +Example usage from [the Graphviz +gallery](https://graphviz.gitlab.io/_pages/Gallery/directed/fsm.html): + +~~~~~~~~~~~~~~~~ +```{.graphviz caption="This is an image, created by **Graphviz**'s dot."} +digraph finite_state_machine { + rankdir=LR; + size="8,5" + node [shape = doublecircle]; LR_0 LR_3 LR_4 LR_8; + node [shape = circle]; + LR_0 -> LR_2 [ label = "SS(B)" ]; + LR_0 -> LR_1 [ label = "SS(S)" ]; + LR_1 -> LR_3 [ label = "S($end)" ]; + LR_2 -> LR_6 [ label = "SS(b)" ]; + LR_2 -> LR_5 [ label = "SS(a)" ]; + LR_2 -> LR_4 [ label = "S(A)" ]; + LR_5 -> LR_7 [ label = "S(b)" ]; + LR_5 -> LR_5 [ label = "S(a)" ]; + LR_6 -> LR_6 [ label = "S(b)" ]; + LR_6 -> LR_5 [ label = "S(a)" ]; + LR_7 -> LR_8 [ label = "S(b)" ]; + LR_7 -> LR_5 [ label = "S(a)" ]; + LR_8 -> LR_6 [ label = "S(b)" ]; + LR_8 -> LR_5 [ label = "S(a)" ]; +} +``` +~~~~~~~~~~~~~~~~ + +### Ti*k*Z +Ti*k*Z (cf. [Wikipedia](https://en.wikipedia.org/wiki/PGF/TikZ)) is a +description language for graphics of any kind that can be used within +LaTeX (cf. [Wikipedia](https://en.wikipedia.org/wiki/LaTeX)). + +Therefore a LaTeX system must be installed on the system. The Ti*k*Z code is +embedded into a dynamic LaTeX document. This temporary document gets +translated into a PDF document using LaTeX (`pdflatex`). Finally, +Inkscape is used to convert the PDF file to the desired format. + +Note: We are using Inkscape here to use a stable solution for the +convertion. Formerly ImageMagick was used instead. ImageMagick is +not able to convert PDF files. Hence, it uses Ghostscript to do +so, cf. [1](https://stackoverflow.com/a/6599718/2258393). +Unfortunately, Ghostscript behaves unpredictable during Windows and +Linux tests cases, cf. [2](https://stackoverflow.com/questions/21774561/some-pdfs-are-converted-improperly-using-imagemagick), +[3](https://stackoverflow.com/questions/9064706/imagemagic-convert-command-pdf-convertion-with-bad-size-orientation), [4](https://stackoverflow.com/questions/18837093/imagemagic-renders-image-with-black-background), +[5](https://stackoverflow.com/questions/37392798/pdf-to-svg-is-not-perfect), +[6](https://stackoverflow.com/q/10288065/2258393), etc. By using Inkscape, +we need one dependency less and get rid of unexpected Ghostscript issues. + +Due to this more complicated process, the use of Ti*k*Z is also more +complicated overall. The process is error-prone: An insufficiently +configured LaTeX installation or an insufficiently configured +Inkscape installation can lead to errors. Overall, this results in +the following dependencies: + +- Any LaTeX installation. This should be configured so that +missing packages are installed automatically. This filter uses the +`pdflatex` command which is available by the system's path. Alternatively, +you can set the `PDFLATEX` environment variable. In case you have to use +a specific LaTeX version on a pandoc document basis, you might set the +`pdflatexPath` meta variable. + +- An installation of [Inkscape](https://inkscape.org/). +It is assumed that the `inkscape` command is in the path and can be +executed from any location. Alternatively, the environment +variable `INKSCAPE` can be set with a path. If a specific +version per pandoc document is to be used, the `inkscapePath` +meta-variable can be set. + +In order to use additional LaTeX packages, use the optional +`additionalPackages` attribute in your document, as in the +example below. + +Example usage from [TikZ +examples](http://www.texample.net/tikz/examples/parallelepiped/) by +[Kjell Magne Fauske](http://www.texample.net/tikz/examples/nav1d/): + +~~~~~~~~~~~~~~~~ +```{.tikz caption="This is an image, created by **TikZ i.e. LaTeX**." + additionalPackages="\usepackage{adjustbox}"} +\usetikzlibrary{arrows} +\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em] +\tikzstyle{init} = [pin edge={to-,thin,black}] + +\resizebox{16cm}{!}{% + \trimbox{3.5cm 0cm 0cm 0cm}{ + \begin{tikzpicture}[node distance=2.5cm,auto,>=latex'] + \node [int, pin={[init]above:$v_0$}] (a) {$\frac{1}{s}$}; + \node (b) [left of=a,node distance=2cm, coordinate] {a}; + \node [int, pin={[init]above:$p_0$}] at (0,0) (c) + [right of=a] {$\frac{1}{s}$}; + \node [coordinate] (end) [right of=c, node distance=2cm]{}; + \path[->] (b) edge node {$a$} (a); + \path[->] (a) edge node {$v$} (c); + \draw[->] (c) edge node {$p$} (end) ; + \end{tikzpicture} + } +} +``` +~~~~~~~~~~~~~~~~ + +### Python +In order to use Python to generate a diagram, your Python code must store the +final image data in a temporary file with the correct format. In case you use +matplotlib for a diagram, add the following line to do so: + +```python +plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$") +``` + +The placeholder `$FORMAT$` gets replace by the necessary format. Most of the +time, this will be `png` or `svg`. The second placeholder, `$DESTINATION$` +gets replaced by the path and file name of the destination. Both placeholders +can be used as many times as you want. Example usage from the [Matplotlib +examples](https://matplotlib.org/gallery/lines_bars_and_markers/cohere.html#sphx-glr-gallery-lines-bars-and-markers-cohere-py): + +~~~~~~~~~~~~~~~~ +```{.py2image caption="This is an image, created by **Python**."} +import matplotlib +matplotlib.use('Agg') + +import sys +import numpy as np +import matplotlib.pyplot as plt + +# Fixing random state for reproducibility +np.random.seed(19680801) + +dt = 0.01 +t = np.arange(0, 30, dt) +nse1 = np.random.randn(len(t)) # white noise 1 +nse2 = np.random.randn(len(t)) # white noise 2 + +# Two signals with a coherent part at 10Hz and a random part +s1 = np.sin(2 * np.pi * 10 * t) + nse1 +s2 = np.sin(2 * np.pi * 10 * t) + nse2 + +fig, axs = plt.subplots(2, 1) +axs[0].plot(t, s1, t, s2) +axs[0].set_xlim(0, 2) +axs[0].set_xlabel('time') +axs[0].set_ylabel('s1 and s2') +axs[0].grid(True) + +cxy, f = axs[1].cohere(s1, s2, 256, 1. / dt) +axs[1].set_ylabel('coherence') + +fig.tight_layout() +plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$") +``` +~~~~~~~~~~~~~~~~ + +Precondition to use Python is a Python environment which contains all +necessary libraries you want to use. To use, for example, the standard +[Anaconda Python](https://www.anaconda.com/distribution/) environment +on a Microsoft Windows system ... + +- set the environment variable `PYTHON` or the meta key `pythonPath` +to `c:\ProgramData\Anaconda3\python.exe` + +- set the environment variable `PYTHON_ACTIVATE` or the meta +key `activatePythonPath` to `c:\ProgramData\Anaconda3\Scripts\activate.bat`. + +Pandoc will activate this Python environment and starts Python with your code. + +## How to run pandoc +This section will show, how to call Pandoc in order to use this filter with +meta keys. The following command assume, that the filters are stored in the +subdirectory `filters`. Further, this is a example for a Microsoft Windows +system. + +Command to use PlantUML (a single line): + +``` +pandoc.exe README.md -f markdown -t docx --self-contained --standalone --lua-filter=filters\diagram-generator.lua --metadata=plantumlPath:"c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar" --metadata=javaPath:"c:\Program Files\Java\jre1.8.0_201\bin\java.exe" -o README.docx +``` + +All available environment variables: + +- `PLANTUML` e.g. `c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar`; Default: `plantuml.jar` +- `INKSCAPE` e.g. `c:\Program Files\Inkscape\inkscape.exe`; Default: `inkscape` +- `PYTHON` e.g. `c:\ProgramData\Anaconda3\python.exe`; Default: n/a +- `PYTHON_ACTIVATE` e.g. `c:\ProgramData\Anaconda3\Scripts\activate.bat`; Default: n/a +- `JAVA_HOME` e.g. `c:\Program Files\Java\jre1.8.0_201`; Default: n/a +- `DOT` e.g. `c:\ProgramData\chocolatey\bin\dot.exe`; Default: `dot` +- `PDFLATEX` e.g. `c:\Program Files\MiKTeX 2.9\miktex\bin\x64\pdflatex.exe`; Default: `pdflatex` + +All available meta keys: + +- `plantumlPath` +- `inkscapePath` +- `pythonPath` +- `activatePythonPath` +- `javaPath` +- `dotPath` +- `pdflatexPath` diff --git a/paper/lua-filters/diagram-generator/diagram-generator.lua b/paper/lua-filters/diagram-generator/diagram-generator.lua new file mode 100644 index 0000000..aef2ea4 --- /dev/null +++ b/paper/lua-filters/diagram-generator/diagram-generator.lua @@ -0,0 +1,295 @@ +--[[ + This Lua filter is used to create images with or without captions from + code blocks. Currently PlantUML, GraphViz, Tikz, and Python can be + processed. For further details, see README.md. + + Thanks to @floriandd2ba and @jgm for the initial implementation of + the PlantUML filter, which I used as a template. Thanks also @muxueqz + for the code to generate a GraphViz image. +]] + +-- The PlantUML path. If set, uses the environment variable PLANTUML or the +-- value "plantuml.jar" (local PlantUML version). In order to define a +-- PlantUML version per pandoc document, use the meta data to define the key +-- "plantumlPath". +local plantumlPath = os.getenv("PLANTUML") or "plantuml.jar" + +-- The Inkscape path. In order to define an Inkscape version per pandoc +-- document, use the meta data to define the key "inkscapePath". +local inkscapePath = os.getenv("INKSCAPE") or "inkscape" + +-- The Python path. In order to define a Python version per pandoc document, +-- use the meta data to define the key "pythonPath". +local pythonPath = os.getenv("PYTHON") + +-- The Python environment's activate script. Can be set on a per document +-- basis by using the meta data key "activatePythonPath". +local pythonActivatePath = os.getenv("PYTHON_ACTIVATE") + +-- The Java path. In order to define a Java version per pandoc document, +-- use the meta data to define the key "javaPath". +local javaPath = os.getenv("JAVA_HOME") +if javaPath then + javaPath = javaPath .. package.config:sub(1,1) .. "bin" + .. package.config:sub(1,1) .. "java" +else + javaPath = "java" +end + +-- The dot (Graphviz) path. In order to define a dot version per pandoc +-- document, use the meta data to define the key "dotPath". +local dotPath = os.getenv("DOT") or "dot" + +-- The pdflatex path. In order to define a pdflatex version per pandoc +-- document, use the meta data to define the key "pdflatexPath". +local pdflatexPath = os.getenv("PDFLATEX") or "pdflatex" + +-- The default format is SVG i.e. vector graphics: +local filetype = "svg" +local mimetype = "image/svg+xml" + +-- Check for output formats that potentially cannot use SVG +-- vector graphics. In these cases, we use a different format +-- such as PNG: +if FORMAT == "docx" then + filetype = "png" + mimetype = "image/png" +elseif FORMAT == "pptx" then + filetype = "png" + mimetype = "image/png" +elseif FORMAT == "rtf" then + filetype = "png" + mimetype = "image/png" +end + +-- Execute the meta data table to determine the paths. This function +-- must be called first to get the desired path. If one of these +-- meta options was set, it gets used instead of the corresponding +-- environment variable: +function Meta(meta) + plantumlPath = meta.plantumlPath or plantumlPath + inkscapePath = meta.inkscapePath or inkscapePath + pythonPath = meta.pythonPath or pythonPath + pythonActivatePath = meta.activatePythonPath or pythonActivatePath + javaPath = meta.javaPath or javaPath + dotPath = meta.dotPath or dotPath + pdflatexPath = meta.pdflatexPath or pdflatexPath +end + +-- Call plantuml.jar with some parameters (cf. PlantUML help): +local function plantuml(puml, filetype) + local final = pandoc.pipe(javaPath, {"-jar", plantumlPath, "-t" .. filetype, "-pipe", "-charset", "UTF8"}, puml) + return final +end + +-- Call dot (GraphViz) in order to generate the image +-- (thanks @muxueqz for this code): +local function graphviz(code, filetype) + local final = pandoc.pipe(dotPath, {"-T" .. filetype}, code) + return final +end + +-- Compile LaTeX with Tikz code to an image: +local function tikz2image(src, filetype, additionalPackages) + + -- Define file names: + local outfile = string.format("./tmp-latex/file.%s", filetype) + local tmp = "./tmp-latex/file" + local tmpDir = "./tmp-latex/" + + -- Ensure, that the tmp directory exists: + os.execute("mkdir -p tmp-latex") + + -- Build and write the LaTeX document: + local f = io.open(tmp .. ".tex", 'w') + f:write("\\documentclass{standalone}\n\\usepackage{tikz}\n") + + -- Any additional package(s) are desired? + if additionalPackages then + f:write(additionalPackages) + end + + f:write("\\begin{document}\n") + f:write(src) + f:write("\n\\end{document}\n") + f:close() + + -- Execute the LaTeX compiler: + pandoc.pipe(pdflatexPath, {'-output-directory', tmpDir, tmp}, '') + + -- Build the basic Inkscape command for the conversion: + local baseCommand = " --without-gui --file=" .. tmp .. ".pdf" + local knownFormat = false + + if filetype == "png" then + + -- Append the subcommands to convert into a PNG file: + baseCommand = baseCommand .. " --export-png=" + .. tmp .. ".png --export-dpi=300" + knownFormat = true + + elseif filetype == "svg" then + + -- Append the subcommands to convert into a SVG file: + baseCommand = baseCommand .. " --export-plain-svg=" .. tmp .. ".svg" + knownFormat = true + + end + + -- Unfortunately, continuation is only possible, if we know the actual + -- format: + local imgData = nil + if knownFormat then + + -- We know the desired format. Thus, execute Inkscape: + os.execute("\"" .. inkscapePath .. "\"" .. baseCommand) + + -- Try to open the image: + local r = io.open(tmp .. "." .. filetype, 'rb') + + -- Read the image, if available: + if r then + imgData = r:read("*all") + r:close() + end + + -- Delete the image tmp file: + os.remove(outfile) + end + + -- Remove the temporary files: + os.remove(tmp .. ".tex") + os.remove(tmp .. ".pdf") + os.remove(tmp .. ".log") + os.remove(tmp .. ".aux") + + return imgData +end + +-- Run Python to generate an image: +local function py2image(code, filetype) + + -- Define the temp files: + local outfile = string.format('%s.%s', os.tmpname(), filetype) + local pyfile = os.tmpname() + + -- Replace the desired destination's file type in the Python code: + local extendedCode = string.gsub(code, "%$FORMAT%$", filetype) + + -- Replace the desired destination's path in the Python code: + extendedCode = string.gsub(extendedCode, "%$DESTINATION%$", outfile) + + -- Write the Python code: + local f = io.open(pyfile, 'w') + f:write(extendedCode) + f:close() + + -- Execute Python in the desired environment: + local pycmd = pythonPath .. ' ' .. pyfile + local command = pythonActivatePath + and pythonActivatePath .. ' && ' .. pycmd + or pycmd + os.execute(command) + + -- Try to open the written image: + local r = io.open(outfile, 'rb') + local imgData = nil + + -- When the image exist, read it: + if r then + imgData = r:read("*all") + r:close() + else + io.stderr:write(string.format("File '%s' could not be opened", outfile)) + end + + -- Delete the tmp files: + os.remove(pyfile) + os.remove(outfile) + + return imgData +end + +-- Executes each document's code block to find matching code blocks: +function CodeBlock(block) + + -- Predefine a potential image: + local fname = nil + + -- Using a table with all known generators i.e. converters: + local converters = { + plantuml = plantuml, + graphviz = graphviz, + tikz = tikz2image, + py2image = py2image, + } + + -- Check if a converter exists for this block. If not, return the block + -- unchanged. + local img_converter = converters[block.classes[1]] + if not img_converter then + return nil + end + + -- Call the correct converter which belongs to the used class: + local success, img = pcall(img_converter, block.text, + filetype, block.attributes["additionalPackages"] or nil) + + -- Was ok? + if success and img then + -- Hash the figure name and content: + fname = pandoc.sha1(img) .. "." .. filetype + + -- Store the data in the media bag: + pandoc.mediabag.insert(fname, mimetype, img) + + else + + -- an error occured; img contains the error message + io.stderr:write(tostring(img)) + io.stderr:write('\n') + + end + + -- Case: This code block was an image e.g. PlantUML or dot/Graphviz, etc.: + if fname then + + -- Define the default caption: + local caption = {} + local enableCaption = nil + + -- If the user defines a caption, use it: + if block.attributes["caption"] then + caption = pandoc.read(block.attributes.caption).blocks[1].content + + -- This is pandoc's current hack to enforce a caption: + enableCaption = "fig:" + end + + -- Create a new image for the document's structure. Attach the user's + -- caption. Also use a hack (fig:) to enforce pandoc to create a + -- figure i.e. attach a caption to the image. + local imgObj = pandoc.Image(caption, fname, enableCaption) + + -- Now, transfer the attribute "name" from the code block to the new + -- image block. It might gets used by the figure numbering lua filter. + -- If the figure numbering gets not used, this additional attribute + -- gets ignored as well. + if block.attributes["name"] then + imgObj.attributes["name"] = block.attributes["name"] + end + + -- Finally, put the image inside an empty paragraph. By returning the + -- resulting paragraph object, the source code block gets replaced by + -- the image: + return pandoc.Para{ imgObj } + end +end + +-- Normally, pandoc will run the function in the built-in order Inlines -> +-- Blocks -> Meta -> Pandoc. We instead want Meta -> Blocks. Thus, we must +-- define our custom order: +return { + {Meta = Meta}, + {CodeBlock = CodeBlock}, +} diff --git a/paper/lua-filters/diagram-generator/sample.md b/paper/lua-filters/diagram-generator/sample.md new file mode 100644 index 0000000..231ccdc --- /dev/null +++ b/paper/lua-filters/diagram-generator/sample.md @@ -0,0 +1,244 @@ +# Diagram Generator Lua Filter + +## Introduction +This Lua filter is used to create images with or without captions from code +blocks. Currently PlantUML, Graphviz, Ti*k*Z and Python can be processed. +This document also serves as a test document, which is why the subsequent +test diagrams are integrated in every supported language. + +## Prerequisites +To be able to use this Lua filter, the respective external tools must be +installed. However, it is sufficient if the tools to be used are installed. +If you only want to use PlantUML, you don't need LaTeX or Python, etc. + +### PlantUML +To use PlantUML, you must install PlantUML itself. See the +[PlantUML website](http://plantuml.com/) for more details. It should be +noted that PlantUML is a Java program and therefore Java must also +be installed. + +By default, this filter expects the plantuml.jar file to be in the +working directory. Alternatively, the environment variable +`PLANTUML` can be set with a path. If, for example, a specific +PlantUML version is to be used per pandoc document, the +`plantumlPath` meta variable can be set. + +Furthermore, this filter assumes that Java is located in the +system or user path. This means that from any place of the system +the `java` command is understood. Alternatively, the `JAVA_HOME` +environment variable gets used. To use a specific Java version per +pandoc document, use the `javaPath` meta variable. Please notice +that `JAVA_HOME` must be set to the java's home directory e.g. +`c:\Program Files\Java\jre1.8.0_201\` whereas `javaPath` must be +set to the absolute path of `java.exe` e.g. +`c:\Program Files\Java\jre1.8.0_201\bin\java.exe`. + +Example usage: + +```{.plantuml caption="This is an image, created by **PlantUML**."} +@startuml +Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response +Alice -> Bob: Another authentication Request Alice <-- Bob: another Response +@enduml +``` + +### Graphviz +To use Graphviz you only need to install Graphviz, as you can read +on its [website](http://www.graphviz.org/). There are no other +dependencies. + +This filter assumes that the `dot` command is located in the path +and therefore can be used from any location. Alternatively, you can +set the environment variable `DOT` or use the pandoc's meta variable +`dotPath`. + +Example usage from [the Graphviz +gallery](https://graphviz.gitlab.io/_pages/Gallery/directed/fsm.html): + +```{.graphviz caption="This is an image, created by **Graphviz**'s dot."} +digraph finite_state_machine { + rankdir=LR; + size="8,5" + node [shape = doublecircle]; LR_0 LR_3 LR_4 LR_8; + node [shape = circle]; + LR_0 -> LR_2 [ label = "SS(B)" ]; + LR_0 -> LR_1 [ label = "SS(S)" ]; + LR_1 -> LR_3 [ label = "S($end)" ]; + LR_2 -> LR_6 [ label = "SS(b)" ]; + LR_2 -> LR_5 [ label = "SS(a)" ]; + LR_2 -> LR_4 [ label = "S(A)" ]; + LR_5 -> LR_7 [ label = "S(b)" ]; + LR_5 -> LR_5 [ label = "S(a)" ]; + LR_6 -> LR_6 [ label = "S(b)" ]; + LR_6 -> LR_5 [ label = "S(a)" ]; + LR_7 -> LR_8 [ label = "S(b)" ]; + LR_7 -> LR_5 [ label = "S(a)" ]; + LR_8 -> LR_6 [ label = "S(b)" ]; + LR_8 -> LR_5 [ label = "S(a)" ]; +} +``` + +### Ti*k*Z +Ti*k*Z (cf. [Wikipedia](https://en.wikipedia.org/wiki/PGF/TikZ)) is a +description language for graphics of any kind that can be used within +LaTeX (cf. [Wikipedia](https://en.wikipedia.org/wiki/LaTeX)). + +Therefore a LaTeX system must be installed on the system. The Ti*k*Z code is +embedded into a dynamic LaTeX document. This temporary document gets +translated into a PDF document using LaTeX (`pdflatex`). Finally, +Inkscape is used to convert the PDF file to the desired format. + +Note: We are using Inkscape here to use a stable solution for the +convertion. Formerly ImageMagick was used instead. ImageMagick is +not able to convert PDF files. Hence, it uses Ghostscript to do +so, cf. [1](https://stackoverflow.com/a/6599718/2258393). +Unfortunately, Ghostscript behaves unpredictable during Windows and +Linux tests cases, cf. [2](https://stackoverflow.com/questions/21774561/some-pdfs-are-converted-improperly-using-imagemagick), +[3](https://stackoverflow.com/questions/9064706/imagemagic-convert-command-pdf-convertion-with-bad-size-orientation), [4](https://stackoverflow.com/questions/18837093/imagemagic-renders-image-with-black-background), +[5](https://stackoverflow.com/questions/37392798/pdf-to-svg-is-not-perfect), +[6](https://stackoverflow.com/q/10288065/2258393), etc. By using Inkscape, +we need one dependency less and get rid of unexpected Ghostscript issues. + +Due to this more complicated process, the use of Ti*k*Z is also more +complicated overall. The process is error-prone: An insufficiently +configured LaTeX installation or an insufficiently configured +Inkscape installation can lead to errors. Overall, this results in +the following dependencies: + +- Any LaTeX installation. This should be configured so that +missing packages are installed automatically. This filter uses the +`pdflatex` command which is available by the system's path. Alternatively, +you can set the `PDFLATEX` environment variable. In case you have to use +a specific LaTeX version on a pandoc document basis, you might set the +`pdflatexPath` meta variable. + +- An installation of [Inkscape](https://inkscape.org/). +It is assumed that the `inkscape` command is in the path and can be +executed from any location. Alternatively, the environment +variable `INKSCAPE` can be set with a path. If a specific +version per pandoc document is to be used, the `inkscapePath` +meta-variable can be set. + +In order to use additional LaTeX packages, use the optional +`additionalPackages` attribute in your document, as in the +example below. + +Example usage from [TikZ +examples](http://www.texample.net/tikz/examples/parallelepiped/) by +[Kjell Magne Fauske](http://www.texample.net/tikz/examples/nav1d/): + +```{.tikz caption="This is an image, created by **TikZ i.e. LaTeX**." + additionalPackages="\usepackage{adjustbox}"} +\usetikzlibrary{arrows} +\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em] +\tikzstyle{init} = [pin edge={to-,thin,black}] + +\resizebox{16cm}{!}{% + \trimbox{3.5cm 0cm 0cm 0cm}{ + \begin{tikzpicture}[node distance=2.5cm,auto,>=latex'] + \node [int, pin={[init]above:$v_0$}] (a) {$\frac{1}{s}$}; + \node (b) [left of=a,node distance=2cm, coordinate] {a}; + \node [int, pin={[init]above:$p_0$}] at (0,0) (c) + [right of=a] {$\frac{1}{s}$}; + \node [coordinate] (end) [right of=c, node distance=2cm]{}; + \path[->] (b) edge node {$a$} (a); + \path[->] (a) edge node {$v$} (c); + \draw[->] (c) edge node {$p$} (end) ; + \end{tikzpicture} + } +} +``` + +### Python +In order to use Python to generate an diagram, your Python code must store the +final image data in a temporary file with the correct format. In case you use +matplotlib for a diagram, add the following line to do so: + +```python +plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$") +``` + +The placeholder `$FORMAT$` gets replace by the necessary format. Most of the +time, this will be `png` or `svg`. The second placeholder, `$DESTINATION$` +gets replaced by the path and file name of the destination. Both placeholders +can be used as many times as you want. Example usage from the [Matplotlib +examples](https://matplotlib.org/gallery/lines_bars_and_markers/cohere.html#sphx-glr-gallery-lines-bars-and-markers-cohere-py): + +```{.py2image caption="This is an image, created by **Python**."} +import matplotlib +matplotlib.use('Agg') + +import sys +import numpy as np +import matplotlib.pyplot as plt + +# Fixing random state for reproducibility +np.random.seed(19680801) + +dt = 0.01 +t = np.arange(0, 30, dt) +nse1 = np.random.randn(len(t)) # white noise 1 +nse2 = np.random.randn(len(t)) # white noise 2 + +# Two signals with a coherent part at 10Hz and a random part +s1 = np.sin(2 * np.pi * 10 * t) + nse1 +s2 = np.sin(2 * np.pi * 10 * t) + nse2 + +fig, axs = plt.subplots(2, 1) +axs[0].plot(t, s1, t, s2) +axs[0].set_xlim(0, 2) +axs[0].set_xlabel('time') +axs[0].set_ylabel('s1 and s2') +axs[0].grid(True) + +cxy, f = axs[1].cohere(s1, s2, 256, 1. / dt) +axs[1].set_ylabel('coherence') + +fig.tight_layout() +plt.savefig("$DESTINATION$", dpi=300, fomat="$FORMAT$") +``` + +Precondition to use Python is a Python environment which contains all +necessary libraries you want to use. To use, for example, the standard +[Anaconda Python](https://www.anaconda.com/distribution/) environment +on a Microsoft Windows system ... + +- set the environment variable `PYTHON` or the meta key `pythonPath` +to `c:\ProgramData\Anaconda3\python.exe` + +- set the environment variable `PYTHON_ACTIVATE` or the meta +key `activatePythonPath` to `c:\ProgramData\Anaconda3\Scripts\activate.bat`. + +Pandoc will activate this Python environment and starts Python with your code. + +## How to run pandoc +This section will show, how to call Pandoc in order to use this filter with +meta keys. The following command assume, that the filters are stored in the +subdirectory `filters`. Further, this is a example for a Microsoft Windows +system. + +Command to use PlantUML (a single line): + +``` +pandoc.exe README.md -f markdown -t docx --self-contained --standalone --lua-filter=filters\diagram-generator.lua --metadata=plantumlPath:"c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar" --metadata=javaPath:"c:\Program Files\Java\jre1.8.0_201\bin\java.exe" -o README.docx +``` + +All available environment variables: + +- `PLANTUML` e.g. `c:\ProgramData\chocolatey\lib\plantuml\tools\plantuml.jar`; Default: `plantuml.jar` +- `INKSCAPE` e.g. `c:\Program Files\Inkscape\inkscape.exe`; Default: `inkscape` +- `PYTHON` e.g. `c:\ProgramData\Anaconda3\python.exe`; Default: n/a +- `PYTHON_ACTIVATE` e.g. `c:\ProgramData\Anaconda3\Scripts\activate.bat`; Default: n/a +- `JAVA_HOME` e.g. `c:\Program Files\Java\jre1.8.0_201`; Default: n/a +- `DOT` e.g. `c:\ProgramData\chocolatey\bin\dot.exe`; Default: `dot` +- `PDFLATEX` e.g. `c:\Program Files\MiKTeX 2.9\miktex\bin\x64\pdflatex.exe`; Default: `pdflatex` + +All available meta keys: + +- `plantumlPath` +- `inkscapePath` +- `pythonPath` +- `activatePythonPath` +- `javaPath` +- `dotPath` +- `pdflatexPath`
\ No newline at end of file diff --git a/paper/lua-filters/minted/Makefile b/paper/lua-filters/minted/Makefile new file mode 100644 index 0000000..5d509f9 --- /dev/null +++ b/paper/lua-filters/minted/Makefile @@ -0,0 +1,65 @@ +.PHONY: all +all: sample_beamer.pdf sample_latex.pdf sample.html + +# NOTE: `pandoc_inputs` can have multiple filenames if you want to send `pandoc` +# more than one input file at once. In the commands for the targets that depend +# on `pandoc_inputs` you will see a pattern `$^ > $@`. It's less magic than it +# seems, but useful to point out if you have not seen these before. They are +# called "Automatic Variables", and more documentation can be found here: +# +# https://www.gnu.org/software/make/manual/html_node/Automatic-Variables.html +# +# So by depending on $(pandoc_inputs) and using $^ as the input files to +# `pandoc`, $^ will expand to all filenames in `pandoc_inputs` and the target +# will re-run when the timestamp of _any_ file listed in `pandoc_inputs` is +# updated. By redirecting the output to $@, it will send the `pandoc` output to +# the target name. In the examples below, $@ expands to either +# `sample_beamer.tex`, `sample_latex.tex`, or `sample.html` (depending on the +# target name). +# +# TL;DR: You should be able to copy-paste the commands below and just rename the +# target names to match whatever output filenames you want. +pandoc_inputs := sample.md + +# Sample beamer presentation. +sample_beamer.tex: $(pandoc_inputs) + pandoc -s -t beamer --no-highlight --lua-filter=minted.lua $^ > $@ + +sample_beamer.pdf: sample_beamer.tex + latexmk -pdf -shell-escape -jobname=sample_beamer sample_beamer + +# Sample latex document. +sample_latex.tex: $(pandoc_inputs) + pandoc -s -t latex --no-highlight --lua-filter=minted.lua $^ > $@ + +sample_latex.pdf: sample_latex.tex + latexmk -pdf -shell-escape -jobname=sample_latex sample_latex + +# Sample html5 document. +sample.html: $(pandoc_inputs) + pandoc -s -t html5 --lua-filter=minted.lua $^ > $@ + +# --- + +.PHONY: clean realclean +clean: + @# latexmk errors if no auxiliary files exist to cleanup. Using `|| true` + @# just makes it so that the subsequent commands will also execute. + latexmk -c sample_beamer >/dev/null 2>&1 || true + @# latexmk does not clean all beamer files + rm -f sample_beamer.{nav,snm,vrb} + rm -rf _minted-sample_beamer/ + latexmk -c sample_latex >/dev/null 2>&1 || true + rm -rf _minted-sample_latex/ + +realclean: clean + rm -f sample_beamer.{tex,pdf} + rm -f sample_latex.{tex,pdf} + rm -f sample.html + +.PHONY: test lint +lint: + flake8 --max-line-length=80 run_minted_tests.py background_color.py + +test: + @./run_minted_tests.py diff --git a/paper/lua-filters/minted/README.md b/paper/lua-filters/minted/README.md new file mode 100644 index 0000000..b26491d --- /dev/null +++ b/paper/lua-filters/minted/README.md @@ -0,0 +1,316 @@ +# minted + +This filter enables users to use the [`minted`][minted] package with the +`beamer` and `latex` writers. Users may attach any desired `minted` specific +styling / attributes to their code-blocks (or via document metadata). These +`minted` specific attributes will be _removed_ for any writers that are not +`beamer` or `latex`, since many of the `minted` options require using `latex` +specific syntax that can cause problems in other output formats. For example, +if the `fontsize=\footnotesize` attribute were applied to a code block, an +`html` export would include `data-fontsize="\footnotesize"`, which may produce +errors or more commonly be entirely meaningless for non-latex writers. + +The `minted` package will be used as a _replacement_ for the existing `pandoc` +inline code and code block elements. Behind the scenes, `minted` builds on top +of the `fancyvrb` latex package, using [pygments][pygments] to perform the +highlighting. The `minted` package contains _many_ options for customizing +output, users are encouraged to read / review section 5.3 of the +[minted documentation][minted_docs]. **This filter does not make any attempts +to validate arguments supplied to the `minted` package**. Invalid / conflicting +arguments are a usage error. + +**Contents** + +- [Setup](#setup) + - [LaTeX Preamble Configuration](#latex-preamble-configuration) + - [PDF Compilation](#pdf-compilation) +- [Minted Filter Settings](#minted-filter-settings) + - [Default Settings](#default-settings) + - [All Metadata Settings](#all-metadata-settings) + - [`no_default_autogobble`](#no_default_autogobble-boolean) + - [`no_mintinline`](#no_mintinline-boolean) + - [`default_block_language`](#default_block_language-string) + - [`default_inline_language`](#default_inline_language-string) + - [`block_attributes`](#block_attributes-list-of-strings) + - [`inline_attributes`](#inline_attributes-list-of-strings) +- [Important Usage Notes](#important-usage-notes) +- [Bonus](#bonus) + +# Setup + +## LaTeX Preamble Configuration + +Since this filter will emit `\mintline` commands for inline code, and +`\begin{minted} ... \end{minted}` environments for code blocks, you must ensure +that your document includes the `minted` package in the preamble of your +`beamer` or `latex` document. The filter cannot accomplish this for you. + +**Option 1** + +Use the `header-includes` feature of `pandoc` (`-H` / `--include-in-header`). +This will be injected into the preamble section of your `beamer` or `latex` +document. The bare minimum you need in this file is + +```latex +\usepackage{minted} +``` + +However, there are many other things you can set here (related or unrelated to +this filter), and this is a good opportunity to perform some global setup on the +`minted` package. Some examples: + +```latex +\usepackage{minted} + +% Set the `style=tango` attribute for all minted blocks. Can still be overriden +% per block (e.g., you want to change just one). Run `pygmentize -L` to see +% all available options. +\usemintedstyle{tango} + +% Depending on which pygments style you choose, comments and preprocessor +% directives may be italic. The `tango` style is one of these. This disables +% all italics in the `minted` environment. +\AtBeginEnvironment{minted}{\let\itshape\relax} + +% This disables italics for the `\mintinline` commands. +% Credit: https://tex.stackexchange.com/a/469702/113687 +\usepackage{xpatch} +\xpatchcmd{\mintinline}{\begingroup}{\begingroup\let\itshape\relax}{}{} +``` + +The `minted` package has many options, see the +[minted documentation][minted_docs] for more information. For example, see the +`bgcolor` option for the `minted` package. In this "header-include" file would +be an excellent location to `\definecolor`s that you want to use with `bgcolor`. + +**Option 1.5** + +You can also set `header-includes` in the metadata of your document. The above +example could be set as (noting the escaped backslashes): + +```yaml +colorlinks: true +header-includes: + # Include the minted package, set global style, define colors, etc. + - "\\usepackage{minted}" + - "\\usemintedstyle{tango}" + # Prevent italics in the `minted` environment. + - "\\AtBeginEnvironment{minted}{\\let\\itshape\\relax}" + # Prevent italics in the `\mintinline` command. + - "\\usepackage{xpatch}" + - "`\\xpatchcmd{\\mintinline}{\\begingroup}{\\begingroup\\let\\itshape\\relax}{}{}`{=latex}" +``` + +Note on the last line calling `\xpatchcmd`, we escape the backslashes and +additionally force `pandoc` to treat this as `latex` code by making it an inline +`latex` code element. See [pandoc issue 2139 (comment)][pandoc_issue_2139] for +more information. + +Formally, you may want to apply the ``-"`\\raw_tex`{=latex}"`` trick to all +metadata to indicate it is `latex` specific code. However, since `pandoc` +strips out any raw `latex` when converting to other writers, it isn't necessary. + +**Option 2** + +You can also create your own custom `beamer` or `latex` template to have much +finer control over what is / is not included in your document. You may obtain +a copy of the template that `pandoc` uses by default by running +`pandoc -D beamer` or `pandoc -D latex` depending on your document type. + +After you have modified the template to suit your needs (including at the very +least a `\usepackage{minted}`), specify your template file to `pandoc` using +the `--template <path/to/template/file>` command line argument. + +## PDF Compilation + +To compile a PDF, there are two things that the `minted` package requires be +available: an escaped shell to be able to run external commands (the +`-shell-escape` command line flag), and the ability to create and later read +auxiliary files (`minted` runs `pygmentize` for the highlighting). + +At the time of writing this, only one of these is accessible using `pandoc` +directly. One may pass `--pdf-engine-opt=-shell-escape` to forward the +`-shell-escape` flag to the latex engine being used. Unfortunately, though, +the second component (related to temporary files being created) is not supported +by `pandoc`. See [pandoc issue 4271][pandoc_issue_4271]. + +**However**, in reality this is an minor issue that can easily be worked around. +Instead of generating `md => pdf`, you just use `pandoc` to generate `md => tex` +and then compile `tex => pdf` yourself. See the [sample Makefile](Makefile) for +examples of how to execute both stages. **Furthermore**, you will notice a +significant advantage of managing the `pdf` compilation yourself: the generated +`minted` files are cached and unless you `make clean` (or remove them manually), +unchanged code listings will be reused. That is, you will have faster +compilation times :slightly_smiling_face: + +# Minted Filter Settings + +Direct control over the settings of this filter are performed by setting +sub-keys of a `minted` metadata key for your document. + +## Default Settings + +By default, this filter + +1. Transforms all inline `Code` elements to `\mintinline`. This can be disabled + globally by setting `no_mintinline: true`. + +2. Transforms all `CodeBlock` elements to `\begin{minted} ... \end{minted}` raw + latex code. This cannot be disabled. + +3. Both (1) and (2) default to the `"text"` pygments lexer, meaning that inline + code or code blocks without a specific code class applied will receive no + syntax highlighting. This can be changed globally by setting + `default_block_language: "lexer"` or `default_inline_language: "lexer"`. + +4. All `CodeBlock` elements have the `autogobble` attribute applied to them, + which informs `minted` to trim all common preceding whitespace. This can be + disabled globally by setting `no_default_autogobble: true`. However, doing + this is **strongly discouraged**. Consider a code block nested underneath + a list item. Pandoc will (correctly) generate indented code, meaning you + will need to manually inform `minted` to `gobble=indent` where `indent` is + the number of spaces to trim. Note that `pandoc` may not reproduce the same + indentation level of the original document. + +## All Metadata Settings + +Each of the following are nested under the `minted` metadata key. + +### `no_default_autogobble` (boolean) + +By default this filter will always use `autogobble` with minted, which will +automatically trim common preceding whitespace. This is important because +code blocks nested under a list or other block elements _will_ have common +preceding whitespace that you _will_ want trimmed. + +### `no_mintinline` (boolean) + +Globally prevent this filter from emitting `\mintinline` calls for inline +Code elements, emitting `\texttt` instead. Possibly useful in saving +compile time for large documents that do not seek to have syntax +highlighting on inline code elements. + +### `default_block_language` (string) + +The default pygments lexer class to use for code blocks. By default this +is `"text"`, meaning no syntax highlighting. This is a fallback value, code +blocks that explicitly specify a lexer will not use it. + +### `default_inline_language` (string) + +Same as `default_block_language`, only for inline code (typed in single +backticks). The default is also `"text"`, and changing is discouraged. + +### `block_attributes` (list of strings) + +Any default attributes to apply to _all_ code blocks. These may be +overriden on a per-code-block basis. See section 5.3 of the +[minted documentation][minted_docs] for available options. + +### `inline_attributes` (list of strings) + +Any default attributes to apply to _all_ inline code. These may be +overriden on a per-code basis. See section 5.3 of the +[minted documentation][minted_docs] for available options. + +[minted_docs]: http://mirrors.ctan.org/macros/latex/contrib/minted/minted.pdf +[minted]: https://ctan.org/pkg/minted?lang=en +[pygments]: http://pygments.org/ +[pandoc_issue_2139]: https://github.com/jgm/pandoc/issues/2139#issuecomment-310522113 +[pandoc_issue_4271]: https://github.com/jgm/pandoc/issues/4721 + +# Important Usage Notes + +Refer to the [`sample.md`](sample.md) file for some live examples of how to use +this filter. If you execute `make` in this directory, `sample_beamer.pdf`, +`sample_latex.pdf`, and `sample.html` will all be generated to demonstrate the +filter in action. + +`pandoc` allows you to specify additional attributes on either the closing +backtick of an inline code element, or after the third backtick of a fenced +code block. This is done using `{curly braces}`, an example: + +```md +`#include <type_traits>`{.cpp .showspaces style=bw} +``` + +or + + ```{.cpp .showspaces style=bw} + #include <type_traits> + ``` + +In order, these are + +- `.cpp`: specify the language lexer class. +- `.showspaces`: a `minted` boolean attribute. +- `style=bw`: a `minted` attribute that takes an argument (`bw` is a pygments + style, black-white, just an example). + +There are two rules that must not be violated: + +1. Any time you want to supply extra arguments to `minted` to a specific inline + code or code block element, **the lexer class must always be first, and + always be present**. + + This is a limitation of the implementation of this filter. + +2. Observe the difference between specifying boolean attributes vs attributes + that take an argument. Boolean `minted` attributes **must** have a leading + `.`, and `minted` attributes that take an argument **may not** have a leading + `.`. + + - **Yes**: `{.cpp .showspaces}`, **No**: `{.cpp showspaces}` + - **Yes**: `{.cpp style=bw}`, **No**: `{.cpp .style=bw}` + + If you violate this, then `pandoc` will likely not produce an actual inline + `Code` or `CodeBlock` element, but instead something else (undefined). + +Last, but not least, you will see that the `--no-highlight` flag is used in the +`Makefile` for the latex targets. This is added in the spirit of the filter +being a "full replacement" for `pandoc` highlighting with `minted`. This only +affects inline code elements that meet the following criteria: + +1. The inline code element has a lexer, e.g., `{.cpp}`. +2. The inline code element can actually be parsed for that language by `pandoc`. + +If these two conditions are met, and you do **not** specify `--no-highlight`, +the `pandoc` highlighting engine will take over. Users are encouraged to build +the samples (`make` in this directory) and look at the end of the +`Special Characters are Supported` section. If you remove `--no-highlight`, +`make realclean`, and then `make` again, you will see that the pandoc +highlighting engine will colorize the `auto foo = [](){};`. + +Simply put: if you do not want any pandoc highlighting in your LaTeX, **make +sure you add `--no-highlight`** and it will not happen. + +It is advantageous for this filter to rely on this behavior, because it means +that the filter does not need to worry about escaping special characters for +LaTeX -- `pandoc` will do that for us. Inspect the generated `sample_*.tex` +files (near the end) to see the difference. `--no-highlight` will produce +`\texttt` commands, but omitting this flag will result in some `\VERB` commands +from `pandoc`. + +# Bonus + +Included here is a simple python script to help you get the right color +definitions for `bgcolor` with minted. Just run +[`background_color.py`](background_color.py) with a single argument that is the +name of the pygments style you want the `latex` background color definition for: + +```console +$ ./background_color.py monokai +Options for monokai (choose *one*): + + (*) \definecolor{monokai_bg}{HTML}{272822} + (*) \definecolor{monokai_bg}{RGB}{39,40,34} + (*) \definecolor{monokai_bg}{rgb}{0.1529,0.1569,0.1333} + |--------/ + | + +--> You can rename this too :) +``` + +See the contents of [`sample.md`](sample.md) (click on "View Raw" to see the +comments in the metadata section). Notably, in order to use `\definecolor` you +should make sure that the `xcolor` package is actually included. Comments in +the file explain the options. diff --git a/paper/lua-filters/minted/background_color.py b/paper/lua-filters/minted/background_color.py new file mode 100755 index 0000000..e830db4 --- /dev/null +++ b/paper/lua-filters/minted/background_color.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +r""" +A simple script to print out the RGB ``\definecolor`` command for the background +color of a specified pygments style name. +""" + +import sys +try: + from pygments.styles import get_style_by_name +except ImportError as ie: + sys.stderr.write("Please install the Pygments package:\n{0}\n".format(ie)) + sys.exit(1) + + +if __name__ == "__main__": + # Make sure we have a style name provided. + if len(sys.argv) != 2: + sys.stderr.write("Usage: {0} <style_name>\n\n".format(sys.argv[0])) + sys.stderr.write(" Tip: run `pygmentize -L` to see style names.\n") + sys.exit(1) + + # Grab the style object, error out if invalid name. + style_name = sys.argv[1] + try: + style = get_style_by_name(style_name) + except Exception as e: + sys.stderr.write("Unable to find {0}:\n{1}\n".format(style_name, e)) + sys.exit(1) + + # Convert the hexadecimal string into rgb. + background_hex = style.background_color.replace("#", "") + if len(background_hex) != 6: + sys.stderr.write("Unknown hex color: {0}\n".format(background_hex)) + sys.exit(1) + + try: + r = int(background_hex[0:2], 16) + g = int(background_hex[2:4], 16) + b = int(background_hex[4:6], 16) + except Exception as e: + sys.stderr.write("Unable to convert to integers:\n{0}\n".format(e)) + sys.exit(1) + + # Build out the various options for \definecolor + # All should be equivalent, but users may have a preference of one format + # over another :p + tex_color_name = "{0}_bg".format(style_name) + def_HTML = r"\definecolor{{{0}}}{{HTML}}{{{1}}}".format( + tex_color_name, background_hex.upper() + ) + def_RGB = r"\definecolor{{{0}}}{{RGB}}{{{1}}}".format( + tex_color_name, "{0},{1},{2}".format(r, g, b) + ) + def_rgb = r"\definecolor{{{0}}}{{rgb}}{{{1}}}".format( + tex_color_name, + ",".join(["{0:.4}".format(float(c) / 255.0) for c in [r, g, b]]) + ) + + # Enumerate the options + print("Options for {0} (choose *one*):\n".format(style_name)) + print(" (*) {0}".format(def_HTML)) + print(" (*) {0}".format(def_RGB)) + print(" (*) {0}".format(def_rgb)) + + # Make sure they know that `{style_name}_bg` can be changed to whatever + # they want to be using in their document. + notice = "{0}|{1}/".format( + len(r" (*) \definecolor{") * " ", + (len(tex_color_name) - 2) * "-" + ) + vline = notice[0:notice.find("|")+1] + can_change = vline.replace("|", "+--> You can rename this too :)") + print(notice) + print(vline) + print(can_change) diff --git a/paper/lua-filters/minted/minted.lua b/paper/lua-filters/minted/minted.lua new file mode 100644 index 0000000..19f608e --- /dev/null +++ b/paper/lua-filters/minted/minted.lua @@ -0,0 +1,456 @@ +--[[ +minted -- enable the minted environment for code listings in beamer and latex. + +MIT License + +Copyright (c) 2019 Stephen McDowell + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +]] + +-------------------------------------------------------------------------------- +-- Quick documentation. See full documentation here: -- +-- https://github.com/pandoc/lua-filters/blob/master/minted -- +-------------------------------------------------------------------------------- +--[[ +Brief overview of metadata keys that you can use in your document: + +minted: + no_default_autogobble: <boolean>, *DISCOURAGED* + no_mintinline: <boolean> + default_block_language: <string> + default_inline_language: <string> + block_attributes: <list of strings> + - attr_1 + - attr_2 + - ... + inline_attributes: <list of strings> + - attr_1 + - attr_2 + - ... + +In words, underneath the `minted` metadata key, you have the following options: + +### `no_default_autogobble` (boolean) + +By default this filter will always use `autogobble` with minted, which will +automatically trim common preceding whitespace. This is important because +code blocks nested under a list or other block elements _will_ have common +preceding whitespace that you _will_ want trimmed. + +### `no_mintinline` (boolean) + +Globally prevent this filter from emitting `\mintinline` calls for inline +Code elements, emitting `\texttt` instead. Possibly useful in saving +compile time for large documents that do not seek to have syntax +highlighting on inline code elements. + +### `default_block_language` (string) + +The default pygments lexer class to use for code blocks. By default this +is `"text"`, meaning no syntax highlighting. This is a fallback value, code +blocks that explicitly specify a lexer will not use it. + +### `default_inline_language` (string) + +Same as `default_block_language`, only for inline code (typed in single +backticks). The default is also `"text"`, and changing is discouraged. + +### `block_attributes` (list of strings) + +Any default attributes to apply to _all_ code blocks. These may be +overriden on a per-code-block basis. See section 5.3 of the +[minted documentation][minted_docs] for available options. + +### `inline_attributes` (list of strings) + +Any default attributes to apply to _all_ inline code. These may be +overriden on a per-code basis. See section 5.3 of the +[minted documentation][minted_docs] for available options. + +[minted_docs]: http://mirrors.ctan.org/macros/latex/contrib/minted/minted.pdf +]] + +local List = require('pandoc.List') + +-------------------------------------------------------------------------------- +-- Potential metadata elements to override. -- +-------------------------------------------------------------------------------- +local minted_no_mintinline = false +local minted_default_block_language = "text" +local minted_default_inline_language = "text" +local minted_block_attributes = {} +local minted_inline_attributes = {} + +-------------------------------------------------------------------------------- +-- Constants used to differentiate Code and CodeBlock elements. -- +-------------------------------------------------------------------------------- +local MintedInline = 0 +local MintedBlock = 1 + +-------------------------------------------------------------------------------- +-- Utility functions. -- +-------------------------------------------------------------------------------- +-- Return the string lexer class to be used with minted. `elem` should be +-- either a Code or CodeBlock element (whose `classes` list will be inspected +-- first). `kind` is assumed to be either `MintedInline` or `MintedBlock` in +-- order to choose the appropriate fallback lexer when unspecified. +local function minted_language(elem, kind) + -- If the code [block] attached classes, we assume the first one is the + -- lexer class to use. + if #elem.classes > 0 then + return elem.classes[1] + end + -- Allow user-level metadata to override the inline language. + if kind == MintedInline then + return minted_default_inline_language + end + -- Allow user-level metadata to override the block language. + if kind == MintedBlock then + return minted_default_block_language + end + + -- Failsafe, should not hit here unless function called incorrectly. + return "text" +end + +-- Returns a boolean specifying whether or not the specified string `cls` is an +-- option that is supported by the minted package. +local function is_minted_class(cls) + -- Section 5.3 Available Options of Minted documentation. Note that many of + -- these do not apply to \mintinline (inline Code). Users are responsible + -- for supplying valid arguments to minted. For example, specifying + -- `autogobble` and `gobble` at the same time is a usage error. + -- + -- http://mirrors.ctan.org/macros/latex/contrib/minted/minted.pdf + local all_minted_options = List:new{ + "autogobble", "baselinestretch", "beameroverlays", "breakafter", + "breakaftergroup", "breakaftersymbolpre", "breakaftersymbolpost", + "breakanywhere", "breakanywheresymbolpre", "breakanywheresymbolpost", + "breakautoindent", "breakbefore", "breakbeforegroup", + "breakbeforesymbolpre", "breakbeforesymbolpost", "breakbytoken", + "breakbytokenanywhere", "breakindent", "breakindentnchars", "breaklines", + "breaksymbol", "breaksymbolleft", "breaksymbolright", "breaksymbolindent", + "breaksymbolindentnchars", "breaksymbolindentleft", + "breaksymbolindentleftnchars", "breaksymbolindentright", + "breaksymbolindentrightnchars", "breaksymbolsep", "breaksymbolsepnchars", + "breaksymbolsepleft", "breaksymbolsepleftnchars", "breaksymbolsepright", + "breaksymbolseprightnchars", "bgcolor", "codetagify", "curlyquotes", + "encoding", "escapeinside", "firstline", "firstnumber", "fontfamily", + "fontseries", "fontsize", "fontshape", "formatcom", "frame", "framerule", + "framesep", "funcnamehighlighting", "gobble", "highlightcolor", + "highlightlines", "keywordcase", "label", "labelposition", "lastline", + "linenos", "numberfirstline", "numbers", "mathescape", "numberblanklines", + "numbersep", "obeytabs", "outencoding", "python3", "resetmargins", + "rulecolor", "samepage", "showspaces", "showtabs", "space", "spacecolor", + "startinline", "style", "stepnumber", "stepnumberfromfirst", + "stepnumberoffsetvalues", "stripall", "stripnl", "tab", "tabcolor", + "tabsize", "texcl", "texcomments", "xleftmargin", "xrightmargin" + } + return all_minted_options:includes(cls, 0) +end + +-- Return a string for the minted attributes `\begin{minted}[attributes]` or +-- `\mintinline[attributes]`. Attributes are acquired by inspecting the +-- specified element's `classes` and `attr` fields. Any global attributes +-- provided in the document metadata will be included _only_ if they do not +-- override the element-level attributes. +-- +-- `elem` should either be a Code or CodeBlock element, and `kind` is assumed to +-- be either `MintedInline` or `MintedBlock`. The `kind` determines which +-- global default attribute list to use. +local function minted_attributes(elem, kind) + -- The full listing of attributes that will be joined and returned. + local minted_attributes = {} + + -- Book-keeping, track xxx=yyy keys `xxx` that have been added to + -- `minted_attributes` to make checking optional global defaults via the + -- `block_attributes` or `inline_attributes` easier. + local minted_keys = {} + + -- Boolean style options for minted (e.g., ```{.bash .autogobble}) will appear + -- in the list of classes. + for _, cls in ipairs(elem.classes) do + if is_minted_class(cls) then + table.insert(minted_attributes, cls) + table.insert(minted_keys, cls) + end + end + + -- Value options using key=value (e.g., ```{.bash fontsize=\scriptsize}) show + -- up in the list of attributes. + for _, attr in ipairs(elem.attributes) do + cls, value = attr[1], attr[2] + if is_minted_class(cls) then + table.insert(minted_attributes, cls .. "=" .. value) + table.insert(minted_keys, cls) + end + end + + -- Add any global defaults _only_ if they do not conflict. Note that conflict + -- is only in the literal sense. If a user has `autogobble` and `gobble=2` + -- specified, these do conflict in the minted sense, but this filter makes no + -- checks on validity ;) + local global_defaults = nil + if kind == MintedInline then + global_defaults = minted_inline_attributes + elseif kind == MintedBlock then + global_defaults = minted_block_attributes + end + for _, global_attr in ipairs(global_defaults) do + -- Either use the index of `=` minus one, or -1 if no `=` present. Fallback + -- on -1 means that the substring is the original string. + local end_idx = (string.find(global_attr, "=") or 0) - 1 + local global_key = string.sub(global_attr, 1, end_idx) + local can_insert_global = true + for _, existing_key in ipairs(minted_keys) do + if existing_key == global_key then + can_insert_global = false + break + end + end + + if can_insert_global then + table.insert(minted_attributes, global_attr) + end + end + + -- Return a comma delimited string for specifying the attributes to minted. + return table.concat(minted_attributes, ",") +end + +-- Return the specified `elem` with any minted data removed from the `classes` +-- and `attr`. Otherwise writers such as the HTML writer might produce invalid +-- code since latex makes heavy use of the \backslash. +local function remove_minted_attibutes(elem) + -- Remove any minted items from the classes. + classes = {} + for _, cls in ipairs(elem.classes) do + if not is_minted_class(cls) and cls ~= "no_minted" then + table.insert(classes, cls) + end + end + elem.classes = classes + + -- Remove any minted items from the attributes. + extra_attrs = {} + for _, attr in ipairs(elem.attributes) do + cls, value = attr[1], attr[2] + if not is_minted_class(cls) then + table.insert(extra_attrs, {cls, value}) + end + end + elem.attributes = extra_attrs + + -- Return the (potentially modified) element for pandoc to take over. + return elem +end + +-- Return a `start_delim` and `end_delim` that can safely wrap around the +-- specified `text` when used inline. If no special characters occur in `text`, +-- then a pair of braces are returned. Otherwise, if any character of +-- `possible_delims` are not in `text`, then it is returned. If no delimiter +-- could be found, an error is raised. +local function minted_inline_delims(text) + local start_delim, end_delim + if text:find('[{}]') then + -- Try some other delimiter (the alphanumeric digits are in Python's + -- string.digits + string.ascii_letters order) + possible_delims = ('|!@#^&*-=+' .. '0123456789' .. + 'abcdefghijklmnopqrstuvwxyz' .. + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') + for char in possible_delims:gmatch('.') do + if not text:find(char, 1, true) then + start_delim = char + end_delim = char + break + end + end + if not start_delim then + local msg = 'Unable to determine delimiter to use around inline code %q' + error(msg:format(text)) + end + else + start_delim = '{' + end_delim = '}' + end + + return start_delim, end_delim +end + +-------------------------------------------------------------------------------- +-- Pandoc overrides. -- +-------------------------------------------------------------------------------- +-- Override the pandoc Meta function so that we can parse the metadata for the +-- document and store the necessary variables locally to use in other functions +-- such as Code and CodeBlock (helper methods). +function Meta(m) + -- Grab the `minted` metadata, quit early if not present. + local minted = m["minted"] + local found_autogobble = false + local always_autogobble = true + if minted ~= nil then + -- Parse and set the global bypass to turn off all \mintinline calls. + local no_mintinline = minted["no_mintinline"] + if no_mintinline ~= nil then + minted_no_mintinline = no_mintinline + end + + -- Parse and set the default block language. + local default_block_language = minted.default_block_language + and pandoc.utils.stringify(minted.default_block_language) + if default_block_language ~= nil then + minted_default_block_language = default_block_language + end + + -- Parse and set the default inline language. + local default_inline_language = minted.default_inline_language + and pandoc.utils.stringify(minted.default_inline_language) + if default_inline_language ~= nil then + minted_default_inline_language = default_inline_language + end + + -- Parse the global default minted attributes to use on every block. + local block_attributes = minted["block_attributes"] + if block_attributes ~= nil then + for _, attr in ipairs(block_attributes) do + if attr == "autogobble" then + found_autogobble = true + end + table.insert(minted_block_attributes, attr[1].text) + end + end + + -- Allow users to turn off autogobble for blocks, but really they should not + -- ever seek to do this (indented code blocks under list for example). + local no_default_autogobble = minted["no_default_autogobble"] + if no_default_autogobble ~= nil then + always_autogobble = not no_default_autogobble + end + + -- Parse the global default minted attributes to use on ever inline. + local inline_attributes = minted["inline_attributes"] + if inline_attributes ~= nil then + for _, attr in ipairs(inline_attributes) do + table.insert(minted_inline_attributes, attr[1].text) + end + end + end + + -- Make sure autogobble is turned on by default if no `minted` meta key is + -- provided for the document. + if always_autogobble and not found_autogobble then + table.insert(minted_block_attributes, "autogobble") + end + + -- Return the metadata to pandoc (unchanged). + return m +end + +-- Override inline code elements to use \mintinline for beamer / latex writers. +-- Other writers have all minted attributes removed. +function Code(elem) + if FORMAT == "beamer" or FORMAT == "latex" then + -- Allow a bypass to turn off \mintinline via adding .no_minted class. + local found_no_minted_class = false + for _, cls in ipairs(elem.classes) do + if cls == "no_minted" then + found_no_minted_class = true + break + end + end + + -- Check for local or global bypass to turn off \mintinline + if minted_no_mintinline or found_no_minted_class then + return nil -- Return `nil` signals to `pandoc` that elem is not changed. + end + + local start_delim, end_delim = minted_inline_delims(elem.text) + local language = minted_language(elem, MintedInline) + local attributes = minted_attributes(elem, MintedInline) + local raw_minted = string.format( + "\\mintinline[%s]{%s}%s%s%s", + attributes, + language, + start_delim, + elem.text, + end_delim + ) + -- NOTE: prior to pandoc commit 24a0d61, `beamer` cannot be used as the + -- RawBlock format. Using `latex` should not cause any problems. + return pandoc.RawInline("latex", raw_minted) + else + return remove_minted_attibutes(elem) + end +end + +-- Override code blocks to use \begin{minted}...\end{minted} for beamer / latex +-- writers. Other writers have all minted attributes removed. +function CodeBlock(block) + if FORMAT == "beamer" or FORMAT == "latex" then + local language = minted_language(block, MintedBlock) + local attributes = minted_attributes(block, MintedBlock) + local raw_minted = string.format( + "\\begin{minted}[%s]{%s}\n%s\n\\end{minted}", + attributes, + language, + block.text + ) + -- NOTE: prior to pandoc commit 24a0d61, `beamer` cannot be used as the + -- RawBlock format. Using `latex` should not cause any problems. + return pandoc.RawBlock("latex", raw_minted) + else + return remove_minted_attibutes(block) + end +end + +-- Override headers to make all beamer frames fragile, since any minted +-- environments or \mintinline invocations will halt compilation if the frame +-- is not marked as fragile. +function Header(elem) + if FORMAT == 'beamer' then + -- Check first that 'fragile' is not already present. + local has_fragile = false + for _, val in ipairs(elem.classes) do + if val == 'fragile' then + has_fragile = true + break + end + end + + -- If not found, add fragile to the list of classes. + if not has_fragile then + table.insert(elem.classes, 'fragile') + end + + -- NOTE: pass the remaining work to pandoc, noting that 2.5 and below + -- may duplicate the 'fragile' specifier. Duplicated fragile does *not* + -- cause compile errors. + return elem + end +end + +-- NOTE: order of return matters, Meta needs to be first otherwise the metadata +-- from the document will not be loaded _first_. +return { + {Meta = Meta}, + {Code = Code}, + {CodeBlock = CodeBlock}, + {Header = Header} +} diff --git a/paper/lua-filters/minted/run_minted_tests.py b/paper/lua-filters/minted/run_minted_tests.py new file mode 100755 index 0000000..15803da --- /dev/null +++ b/paper/lua-filters/minted/run_minted_tests.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python + +""" +Unit tests for the pandoc minted.lua filter. +""" + +# Lint this file with: flake8 --max-line-length=80 +import os +import string +import subprocess +import sys +import textwrap + +code_block = textwrap.dedent(''' + ## A Code Block + + ```{.cpp} + auto mult = []<typename T, typename U>(T const & x, U const & y) { + return x * y; + }; + ``` +''') +""" +The base CodeBlock code. {.cpp} is used as a replacement marker in most tests! +""" + +inline_delims = '|!@#^&*-=+' + string.digits + string.ascii_letters +inline_code = textwrap.dedent(''' + ## Inline Code + + `#include <type_traits>`{.cpp} + C and C++ use `{` and `}` to delimit scopes. + Some other special characters: + These check bypass: `~!@#$%^&*()-=_+[]\\{}|;\':",./<>?` + These check regular inline: ''' + ' '.join( + '`{' + inline_delims[:i] + '`' for i in range(len(inline_delims)) +)) +""" +The base Code code. {.cpp} is used as a replacement marker in most tests! +""" + + +def run_pandoc(pandoc_args, stdin): + """Run pandoc with the specified arguments, returning the output.""" + # The input / output should be small enough for these tests that buffer + # overflows should not happen. + pandoc_proc = subprocess.Popen( + ["pandoc"] + pandoc_args, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE + ) + + # Python 3.x and later require communicating with bytes. + if sys.version_info[0] >= 3: + stdin = bytes(stdin, "utf-8") + + stdout, stderr = pandoc_proc.communicate(input=stdin) + if pandoc_proc.returncode != 0: + sys.stderr.write("Non-zero exit code of {ret} from pandoc!\n".format( + ret=pandoc_proc.returncode + )) + sys.stderr.write("pandoc stderr: {stderr}".format( + stderr=stderr.decode("utf-8") + )) + sys.exit(1) + + return stdout.decode("utf-8") + + +def fail_test(test_name, messages, ansi_color_code="31"): + """ + Print failure message and ``sys.exit(1)``. + + ``test_name`` (str) + The name of the test (to make finding in code easier). + + ``messages`` (list of str -- or -- str) + A single string, or list of strings, to print out to ``stderr`` that + explain the reason for the test failure. + + ``ansi_color_code`` (str) + A an ANSI color code to use to colorize the failure message :) Default + is ``"31"``, which is red. + """ + sys.stderr.write( + "\033[0;{ansi_color_code}mTest {test_name} FAILED\033[0m\n".format( + ansi_color_code=ansi_color_code, test_name=test_name + ) + ) + if isinstance(messages, list): + for m in messages: + sys.stderr.write("--> {m}\n".format(m=m)) + else: + sys.stderr.write("--> {messages}\n".format(messages=messages)) + sys.exit(1) + + +def ensure_fragile(test_name, pandoc_output): + r""" + Ensure that every \begin{frame} has (at least one) fragile. + + ``test_name`` (str) + The name of the test (forwards to ``fail_test``). + + ``pandoc_output`` (str) + The pandoc output for the test case. + """ + for line in pandoc_output.splitlines(): + if r"\begin{frame}" in line: + if "fragile" not in line: + fail_test( + test_name, + r"\begin{frame} without 'fragile': {line}".format(line=line) + ) + + +def ensure_present(test_name, string, pandoc_output): + """ + Assert that ``string`` is found in ``pandoc_output``. + + ``test_name`` (str) + The name of the test (forwards to ``fail_test``). + + ``string`` (str) + The string to check verbatim ``string in pandoc_output``. + + ``pandoc_output`` (str) + The pandoc output for the test case. + """ + if string not in pandoc_output: + fail_test( + test_name, + "The requested string '{string}' was not found in:\n{pout}".format( + string=string, pout=pandoc_output + ) + ) + + +def ensure_not_present(test_name, string, pandoc_output): + """ + Assert that ``string`` is **not** found in ``pandoc_output``. + + ``test_name`` (str) + The name of the test (forwards to ``fail_test``). + + ``string`` (str) + The string to check verbatim ``string not in pandoc_output``. + + ``pandoc_output`` (str) + The pandoc output for the test case. + """ + if string in pandoc_output: + fail_test( + test_name, + "The forbidden string '{string}' was found in:\n{pout}".format( + string=string, pout=pandoc_output + ) + ) + + +def run_tex_tests(pandoc_args, fmt): + """ + Run same tests for latex writers. + + ``pandoc_args`` (list of str) + The base list of arguments to forward to pandoc. Some tests may remove + the ``--no-highlight`` flag to validate whether or not pandoc + highlighting macros appear as expected (or not at all). + + ``fmt`` (str) + The format is assumed to be either 'latex' or 'beamer'. + """ + def verify(test_name, args, md, *strings): + """Run pandoc, ensure fragile, and string in output.""" + output = run_pandoc(args + ["-t", fmt], md) + if fmt == "beamer": + ensure_fragile(test_name, output) + else: # latex writer + ensure_not_present(test_name, "fragile", output) + for s in strings: + ensure_present(test_name, s, output) + # Make sure the pandoc highlighting is not being used + if "--no-highlight" in args: + ensure_not_present(test_name, r"\VERB", output) + # if `nil` is present, that likely means a problem parsing the metadata + ensure_not_present(test_name, "nil", output) + + ############################################################################ + # CodeBlock tests. # + ############################################################################ + begin_minted = r"\begin{{minted}}[{attrs}]{{{lang}}}" + verify( + "[code-block] default", + pandoc_args, + code_block, + begin_minted.format(attrs="autogobble", lang="cpp") + ) + verify( + "[code-block] no_default_autogobble", + pandoc_args, + textwrap.dedent(''' + --- + minted: + no_default_autogobble: true + --- + {code_block} + ''').format(code_block=code_block), + begin_minted.format(attrs="", lang="cpp") + ) + verify( + "[code-block] default block language is 'text'", + pandoc_args, + code_block.replace("{.cpp}", ""), + begin_minted.format(attrs="autogobble", lang="text") + ) + verify( + "[code-block] user provided default_block_language", + pandoc_args, + textwrap.dedent(''' + --- + minted: + default_block_language: "haskell" + --- + {code_block} + ''').format(code_block=code_block.replace("{.cpp}", "")), + begin_minted.format(attrs="autogobble", lang="haskell") + ) + verify( + "[code-block] user provided block_attributes", + pandoc_args, + textwrap.dedent(''' + --- + minted: + block_attributes: + - "showspaces" + - "space=." + --- + {code_block} + ''').format(code_block=code_block), + begin_minted.format( + attrs=",".join(["showspaces", "space=.", "autogobble"]), + lang="cpp" + ) + ) + verify( + "[code-block] user provided block_attributes and no_default_autogobble", + pandoc_args, + textwrap.dedent(''' + --- + minted: + no_default_autogobble: true + block_attributes: + - "style=monokai" + - "bgcolor=monokai_bg" + --- + {code_block} + ''').format(code_block=code_block), + begin_minted.format( + attrs=",".join(["style=monokai", "bgcolor=monokai_bg"]), lang="cpp" + ) + ) + verify( + "[code-block] attributes on code block", + pandoc_args, + code_block.replace( + "{.cpp}", "{.cpp .showspaces bgcolor=tango_bg style=tango}" + ), + begin_minted.format( + attrs=",".join([ + "showspaces", "bgcolor=tango_bg", "style=tango", "autogobble" + ]), + lang="cpp" + ) + ) + verify( + "[code-block] attributes on code block + user block_attributes", + pandoc_args, + textwrap.dedent(''' + --- + minted: + block_attributes: + - "showspaces" + - "space=." + --- + {code_block} + ''').format( + code_block=code_block.replace( + "{.cpp}", "{.cpp bgcolor=tango_bg style=tango}" + ) + ), + begin_minted.format( + attrs=",".join([ + "bgcolor=tango_bg", + "style=tango", + "showspaces", + "space=.", + "autogobble" + ]), + lang="cpp" + ) + ) + verify( + "[code-block] traditional fenced code block", + pandoc_args, + code_block.replace("{.cpp}", "cpp"), + begin_minted.format(attrs="autogobble", lang="cpp") + ) + verify( + "[code-block] non-minted attributes not forwarded", + pandoc_args, + code_block.replace("{.cpp}", "{.cpp .showspaces .hello}"), + begin_minted.format( + attrs=",".join(["showspaces", "autogobble"]), lang="cpp" + ) + ) + + ############################################################################ + # Inline Code tests. # + ############################################################################ + mintinline = r"\mintinline[{attrs}]{{{lang}}}" + verify( + "[inline-code] default", + pandoc_args, + inline_code, + mintinline.format(attrs="", lang="cpp"), + "|{|", + "|}|", + *[ + delim + '{' + inline_delims[:i] + delim + for i, delim in enumerate(inline_delims) + ] + ) + verify( + "[inline-code] default language is text", + pandoc_args, + inline_code, + mintinline.format(attrs="", lang="text"), + "|{|", + "|}|" + ) + # begin: global no_mintinline shared testing with / without --no-highlight + inline_no_mintinline_globally_md = textwrap.dedent(''' + --- + minted: + no_mintinline: true + --- + {inline_code} + ''').format(inline_code=inline_code) + inline_no_mintinline_globally_strings = [ + r"\texttt{\{}", + r"\texttt{\}}", + (r"\texttt{" + + r"\textasciitilde{}!@\#\$\%\^{}\&*()-=\_+{[}{]}\textbackslash{}\{\}" + + r"""\textbar{};\textquotesingle{}:",./\textless{}\textgreater{}?}""") + ] + verify( + "[inline-code] no_mintinline off globally", + pandoc_args, + inline_no_mintinline_globally_md, + r"\texttt{\#include\ \textless{}type\_traits\textgreater{}}", + *inline_no_mintinline_globally_strings + ) + verify( + "[inline-code] no_mintinline off globally, remove --no-highlight", + [arg for arg in pandoc_args if arg != "--no-highlight"], + inline_no_mintinline_globally_md, + r"\VERB|\PreprocessorTok{#include }\ImportTok{<type_traits>}|", + *inline_no_mintinline_globally_strings + ) + # end: global no_mintinline shared testing with / without --no-highlight + # begin: no_minted shared testing with / without --no-highlight + inline_no_minted_md = inline_code.replace("{.cpp}", "{.cpp .no_minted}") + inline_no_minted_strings = ["|{|", "|}|"] + verify( + "[inline-code] .no_minted on single inline Code", + pandoc_args, + inline_no_minted_md, + r"texttt{\#include\ \textless{}type\_traits\textgreater{}}", + *inline_no_minted_strings + ) + verify( + "[inline-code] .no_minted on single inline Code, remove --no-highlight", + [arg for arg in pandoc_args if arg != "--no-highlight"], + inline_no_minted_md, + r"\VERB|\PreprocessorTok{#include }\ImportTok{<type_traits>}|", + *inline_no_minted_strings + ) + # end: no_minted shared testing with / without --no-highlight + verify( + "[inline-code] user provided default_inline_language", + pandoc_args, + textwrap.dedent(''' + --- + minted: + default_inline_language: "haskell" + --- + {inline_code} + ''').format(inline_code=inline_code), + mintinline.format(attrs="", lang="haskell") + ) + verify( + "[inline-code] user provided inline_attributes", + pandoc_args, + textwrap.dedent(''' + --- + minted: + inline_attributes: + - "showspaces" + - "space=." + --- + {inline_code} + ''').format(inline_code=inline_code), + mintinline.format( + attrs=",".join(["showspaces", "space=."]), lang="cpp" + ), + mintinline.format( + attrs=",".join(["showspaces", "space=."]), lang="text" + ) + ) + verify( + "[inline-code] attributes on inline code", + pandoc_args, + inline_code.replace( + "{.cpp}", "{.cpp .showspaces bgcolor=tango_bg style=tango}" + ), + mintinline.format( + attrs=",".join(["showspaces", "bgcolor=tango_bg", "style=tango"]), + lang="cpp" + ) + ) + verify( + "[inline-code] attributes on inline code + user inline_attributes", + pandoc_args, + textwrap.dedent(''' + --- + minted: + inline_attributes: + - "showspaces" + - "space=." + --- + {inline_code} + ''').format( + inline_code=inline_code.replace( + "{.cpp}", "{.cpp bgcolor=tango_bg style=tango}" + ) + ), + mintinline.format( + attrs=",".join([ + "bgcolor=tango_bg", + "style=tango", + "showspaces", + "space=." + ]), + lang="cpp" + ) + ) + verify( + "[inline-code] non-minted attributes not forwarded", + pandoc_args, + inline_code.replace("{.cpp}", "{.cpp .showspaces .hello}"), + mintinline.format(attrs="showspaces", lang="cpp") + ) + + +def run_html_tests(args): + """ + Run tests with an html5 writer to make sure minted commands are not used. + Also make sure minted specific attributes are indeed stripped. + + ``args`` (list of str) + The base list of arguments to forward to pandoc. + """ + def verify(test_name, md, attrs=[]): + """Verify minted and any strings in attrs not produced""" + output = run_pandoc(args + ["-t", "html5"], md) + ensure_not_present(test_name, "mint", output) + ensure_not_present(test_name, "fragile", output) + if attrs: + for a in attrs: + ensure_not_present(test_name, a, output) + # if `nil` is present, that likely means a problem parsing the metadata + ensure_not_present(test_name, "nil", output) + + verify(r"[html] no \begin{minted}", code_block) + verify(r"[html] no \mintinline", inline_code) + verify( + r"[html] no \begin{minted} or \mintinline", + "{code_block}\n\n{inline_code}".format( + code_block=code_block, inline_code=inline_code + ) + ) + verify( + "[html] code block minted specific attributes stripped", + code_block.replace( + "{.cpp}", + "{.cpp .showspaces space=. bgcolor=minted_bg style=minted}" + ), + ["showspaces", "space", "bgcolor", "style"] + ) + verify( + "[html] inline code minted specific attributes stripped", + inline_code.replace( + "{.cpp}", + "{.cpp .showspaces space=. bgcolor=minted_bg style=minted}" + ), + ["showspaces", "space", "bgcolor", "style"] + ) + + +if __name__ == "__main__": + # Initial path setup for input tests and lua filter + this_file_dir = os.path.abspath(os.path.dirname(__file__)) + minted_lua = os.path.join(this_file_dir, "minted.lua") + if not os.path.isfile(minted_lua): + sys.stderr.write("Cannot find '{minted_lua}'...".format( + minted_lua=minted_lua + )) + sys.exit(1) + + args = ["--fail-if-warnings", "--no-highlight", "--lua-filter", minted_lua] + run_tex_tests(args, "beamer") + run_tex_tests(args, "latex") + run_html_tests(args) diff --git a/paper/lua-filters/minted/sample.md b/paper/lua-filters/minted/sample.md new file mode 100644 index 0000000..7197047 --- /dev/null +++ b/paper/lua-filters/minted/sample.md @@ -0,0 +1,135 @@ +--- +title: Pandoc Minted Sample +# NOTE: If you want to use `\definecolor` commands in your `header-includes` +# section, setting `colorlinks: true` will `\usepackage{xcolor}` which is needed +# for `\definecolor`. You can alternatively `\usepackage{xcolor}` explicitly in +# in the `header-includes` section if you do not want everything else that +# `colorlinks: true` will bring in. See `pandoc -D latex` output to see +# everything that `colorlinks: true` will do _in addition_ to including xcolor. +colorlinks: true +header-includes: + # Include the minted package, set global style, define colors, etc. + - "\\usepackage{minted}" + - "\\usemintedstyle{tango}" + - "\\definecolor{tango_bg}{rgb}{0.9725,0.9725,0.9725}" + - "\\definecolor{monokai_bg}{rgb}{0.1529,0.1569,0.1333}" + # NOTE: comment out these last three and recompile to see the italics used + # by default for the `tango` style. + # Prevent italics in the `minted` environment. + - "\\AtBeginEnvironment{minted}{\\let\\itshape\\relax}" + # Prevent italics in the `\mintinline` command. + - "\\usepackage{xpatch}" + - "`\\xpatchcmd{\\mintinline}{\\begingroup}{\\begingroup\\let\\itshape\\relax}{}{}`{=latex}" +minted: + block_attributes: + - "bgcolor=tango_bg" +--- + +## Inline Code in Pandoc + +- Raw inline code: + + ```md + `#include <type_traits>` + ``` + + \vspace*{-3ex} produces: `#include <type_traits>` + +- Apply just a lexer: + + ```md + `#include <type_traits>`{.cpp} + ``` + + \vspace*{-3ex} produces: `#include <type_traits>`{.cpp} + +- Change the background color and highlighting style: + + ```{.md fontsize=\scriptsize} + <!-- Note: we defined monokai_bg in the metadata! --> + `#include <type_traits>`{.cpp bgcolor=monokai_bg style=monokai} + ``` + + \vspace*{-3ex} produces: + `#include <type_traits>`{.cpp bgcolor=monokai_bg style=monokai} + + - Must **always** include language (`.cpp` here) **first**, always! + +## Inline Code Bypasses + +- Want the regular teletype text? Specify **both** the lexer class name and one + additional class `.no_minted`. + + ```{.md} + <!-- The "text lexer" --> + `no minted`{.text .no_minted} + ``` + + \vspace*{-3ex} produces: `no mintinline`{.text .no_minted} vs `with mintinline` + + - Inspect generated code, the PDF output is indistinguishable. + +- Alternatively, you can set `no_mintinline: true`{.yaml style=paraiso-light} to prevent the filter + from emitting _any_ `\mintinline`{.latex} calls. + - If you don't need syntax highlighting on your inline code elements, this may + greatly improve compile times for large documents. + + +## Code Blocks + +- Use the defaults, but still supply the lexer: + + ```bash + echo "Hi there" # How are you? + ``` + + \vspace*{-3ex} produces + + ```bash + echo "Hi there" # How are you? + ``` + + \vspace*{-3ex} + +- As with inline code, you can change whatever you want: + + ```{.bash bgcolor=monokai_bg style=monokai} + echo "Hi there" # How are you? + ``` + + \vspace*{-3ex} produces + + ```{.bash bgcolor=monokai_bg style=monokai} + echo "Hi there" # How are you? + ``` + + \vspace*{-3ex} + + - Must **always** include language (`.bash` here) **first**, always! + + +## Special Characters are Supported + +- Code blocks: + + ```md + `~!@#$%^&*()-=_+[]}{|;':",.\/<>? + ``` + + \vspace*{-3ex} + +- Inline code + + ``with mintinline `~!@#$%^&*()-=_+[]}{|;':",.\/<>?`` + + Note: If you use almost all special characters *and* all alphanumeric + characters in a single inline code fragment, minted may not be able to find a + suitable delimiter to place around the \LaTeX\ inline command. + +- Inline code with bypass + + ``no mintinline `~!@#$%^&*()-=_+[]}{|;':",.\/<>?``{.text .no_minted} + +- Specific lexer with mintinline: `auto foo = [](){};`{.cpp} +- Without mintinline: `auto foo = [](){};`{.cpp .no_minted} + - Output color depends on `--no-highlight` flag for `pandoc`. diff --git a/paper/lua-filters/multiple-bibliographies/Makefile b/paper/lua-filters/multiple-bibliographies/Makefile new file mode 100644 index 0000000..a42ce28 --- /dev/null +++ b/paper/lua-filters/multiple-bibliographies/Makefile @@ -0,0 +1,6 @@ +test: sample.md multiple-bibliographies.lua + @pandoc --lua-filter=multiple-bibliographies.lua \ + --standalone --to=native $< 2>/dev/null \ + | diff -u - expected.native + +.PHONY: test diff --git a/paper/lua-filters/multiple-bibliographies/README.md b/paper/lua-filters/multiple-bibliographies/README.md new file mode 100644 index 0000000..0111ee6 --- /dev/null +++ b/paper/lua-filters/multiple-bibliographies/README.md @@ -0,0 +1,33 @@ +# multiple-bibliographies + +This filter allows to create multiple bibliographies using +`pandoc-citeproc`. The content of each bibliography is controlled +via YAML values and the file in which a bibliographic entry is +specified. + +## Usage + +Instead of using the usual *bibliography* metadata field, all +bibliographies must be defined via a separate field of the scheme +*bibliographyX*, e.g. + + --- + bibliography_main: main-bibliography.bib + bibliography_software: software.bib + --- + +The placement of bibliographies is controlled via special divs. + + # References + + ::: {#refs_main} + ::: + + # Software + + ::: {#refs_software} + ::: + +Each refsX div should have a matching bibliographyX entry in the +header. These divs are filled with citations from the respective +bib-file. diff --git a/paper/lua-filters/multiple-bibliographies/expected.native b/paper/lua-filters/multiple-bibliographies/expected.native new file mode 100644 index 0000000..a7ead12 --- /dev/null +++ b/paper/lua-filters/multiple-bibliographies/expected.native @@ -0,0 +1,14 @@ +Pandoc (Meta {unMeta = fromList [("bibliography_recommended_reading",MetaInlines [Str "secondary.bib"]),("bibliography_sources",MetaInlines [Str "primary.bib"]),("nocite",MetaInlines [Cite [Citation {citationId = "Knu86", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@Knu86"],Str ",",Space,Cite [Citation {citationId = "Bae", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@Bae"]]),("title",MetaInlines [Str "Multiple",Space,Str "Bibliographies",Space,Str "Demo"])]}) +[Para [Cite [Citation {citationId = "Nie72", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 1}] [Str "Nietzsche",Space,Str "(1872)"],Str ",",Space,Cite [Citation {citationId = "Bel", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 2}] [Str "Bellori",Space,Str "(1672)"]] +,Header 1 ("references",[],[]) [Str "References"] +,Div ("refs_sources",[],[]) + [Div ("ref-Bel",[],[]) + [Para [Str "Bellori.",Space,Str "1672.",Space,Emph [Str "Le",Space,Str "Vite",Space,Str "de\8217",Space,Str "Pittori,",Space,Str "Scultori",Space,Str "E",Space,Str "Architetti",Space,Str "Moderni"],Str "."]] + ,Div ("ref-Nie72",[],[]) + [Para [Str "Nietzsche,",Space,Str "Friedrich.",Space,Str "1872.",Space,Emph [Str "Die",Space,Str "Geburt",Space,Str "Der",Space,Str "Trag\246die",Space,Str "Aus",Space,Str "Dem",Space,Str "Geiste",Space,Str "Der",Space,Str "Musik"],Str "."]]] +,Header 1 ("recommended-reading",[],[]) [Str "Recommended",Space,Str "Reading"] +,Div ("refs_recommended_reading",[],[]) + [Div ("ref-Bae",[],[]) + [Para [Str "B\228tschmann,",Space,Str "Oskar.",Space,Str "1985.",Space,Emph [Str "Pygmalion",Space,Str "Als",Space,Str "Betrachter"],Str "."]] + ,Div ("ref-Knu86",[],[]) + [Para [Str "Knuth,",Space,Str "Donald",Space,Str "E.",Space,Str "1986.",Space,Emph [Str "The",Space,Str "Texbook"],Str "."]]]] diff --git a/paper/lua-filters/multiple-bibliographies/multiple-bibliographies.lua b/paper/lua-filters/multiple-bibliographies/multiple-bibliographies.lua new file mode 100644 index 0000000..934e6ea --- /dev/null +++ b/paper/lua-filters/multiple-bibliographies/multiple-bibliographies.lua @@ -0,0 +1,110 @@ +--[[ +multiple-bibliographies – create multiple bibliographies + +Copyright © 2018-2019 Albert Krewinkel + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +]] +local List = require 'pandoc.List' +local utils = require 'pandoc.utils' +local stringify = utils.stringify +local run_json_filter = utils.run_json_filter + +--- Collection of all cites in the document +local all_cites = {} +--- Document meta value +local doc_meta = pandoc.Meta{} + +--- Div used by pandoc-citeproc to insert the bibliography. +local refs_div = pandoc.Div({}, pandoc.Attr('refs')) + +local supports_quiet_flag = (function () + local version = pandoc.pipe('pandoc-citeproc', {'--version'}, '') + local major, minor, patch = version:match 'pandoc%-citeproc (%d+)%.(%d+)%.?(%d*)' + major, minor, patch = tonumber(major), tonumber(minor), tonumber(patch) + return major > 0 + or minor > 14 + or (minor == 14 and patch >= 5) +end)() + +--- Resolve citations in the document by combining all bibliographies +-- before running pandoc-citeproc on the full document. +local function resolve_doc_citations (doc) + -- combine all bibliographies + local meta = doc.meta + local orig_bib = meta.bibliography + meta.bibliography = pandoc.MetaList{orig_bib} + for name, value in pairs(meta) do + if name:match('^bibliography_') then + table.insert(meta.bibliography, value) + end + end + -- add dummy div to catch the created bibliography + table.insert(doc.blocks, refs_div) + -- resolve all citations + doc = run_json_filter(doc, 'pandoc-citeproc') + -- remove catch-all bibliography + table.remove(doc.blocks) + -- restore bibliography to original value + doc.meta.bibliography = orig_bib + return doc +end + +--- Explicitly create a new meta object with all fields relevant for +--- pandoc-citeproc. +local function meta_for_pandoc_citeproc (bibliography) + -- We could just indiscriminately copy all meta fields, but let's be + -- explicit about what's important. + local fields = { + 'bibliography', 'references', 'csl', 'citation-style', + 'link-citations', 'citation-abbreviations', 'lang', + 'suppress-bibliography', 'reference-section-title', + 'notes-after-punctuation', 'nocite' + } + local new_meta = pandoc.Meta{} + for _, field in ipairs(fields) do + new_meta[field] = doc_meta[field] + end + new_meta.bibliography = bibliography + return new_meta +end + +--- Create a bibliography for a given topic. This acts on all divs whose +-- ID starts with "refs", followed by nothing but underscores and +-- alphanumeric characters. +local function create_topic_bibliography (div) + local name = div.identifier:match('^refs([_%w]*)$') + local bibfile = name and doc_meta['bibliography' .. name] + if not bibfile then + return nil + end + local tmp_blocks = {pandoc.Para(all_cites), refs_div} + local tmp_meta = meta_for_pandoc_citeproc(bibfile) + local tmp_doc = pandoc.Pandoc(tmp_blocks, tmp_meta) + local filter_args = {FORMAT, supports_quiet_flag and '-q' or nil} + local res = run_json_filter(tmp_doc, 'pandoc-citeproc', filter_args) + -- First block of the result contains the dummy paragraph, second is + -- the refs Div filled by pandoc-citeproc. + div.content = res.blocks[2].content + return div +end + +return { + { + -- Collect all citations and the doc's Meta value for other filters. + Cite = function (c) all_cites[#all_cites + 1] = c end, + Meta = function (m) doc_meta = m end, + }, + { Pandoc = resolve_doc_citations }, + { Div = create_topic_bibliography }, +} diff --git a/paper/lua-filters/multiple-bibliographies/primary.bib b/paper/lua-filters/multiple-bibliographies/primary.bib new file mode 100644 index 0000000..8c9decc --- /dev/null +++ b/paper/lua-filters/multiple-bibliographies/primary.bib @@ -0,0 +1,10 @@ +@book{Bel,
+ author = {Bellori},
+ title = {Le vite de' pittori, scultori e architetti moderni},
+ year = {1672},
+}
+@book{Nie72,
+ author = {Nietzsche, Friedrich},
+ title = {Die Geburt der Tragödie aus dem Geiste der Musik},
+ year = {1872},
+}
diff --git a/paper/lua-filters/multiple-bibliographies/sample.md b/paper/lua-filters/multiple-bibliographies/sample.md new file mode 100644 index 0000000..d4ab2eb --- /dev/null +++ b/paper/lua-filters/multiple-bibliographies/sample.md @@ -0,0 +1,17 @@ +--- +title: Multiple Bibliographies Demo +bibliography_sources: primary.bib +bibliography_recommended_reading: secondary.bib +nocite: '@Knu86, @Bae' +--- +@Nie72, @Bel + +# References + +::: {#refs_sources} +::: + +# Recommended Reading + +::: {#refs_recommended_reading} +::: diff --git a/paper/lua-filters/multiple-bibliographies/secondary.bib b/paper/lua-filters/multiple-bibliographies/secondary.bib new file mode 100644 index 0000000..45e6306 --- /dev/null +++ b/paper/lua-filters/multiple-bibliographies/secondary.bib @@ -0,0 +1,10 @@ +@book{Bae,
+ author = {Bätschmann, Oskar},
+ title = {Pygmalion als Betrachter},
+ year = {1985}
+}
+@book{Knu86,
+ author = {Knuth, Donald E.},
+ year = {1986},
+ title = {The \TeX book},
+}
diff --git a/paper/lua-filters/pagebreak/Makefile b/paper/lua-filters/pagebreak/Makefile new file mode 100644 index 0000000..c8786b0 --- /dev/null +++ b/paper/lua-filters/pagebreak/Makefile @@ -0,0 +1,4 @@ +test: + @pandoc --lua-filter=pagebreak.lua sample.md | diff -u expected.html - + +.PHONY: test diff --git a/paper/lua-filters/pagebreak/README.md b/paper/lua-filters/pagebreak/README.md new file mode 100644 index 0000000..b9a5e04 --- /dev/null +++ b/paper/lua-filters/pagebreak/README.md @@ -0,0 +1,68 @@ +pagebreak +========= + +This filter converts paragraps containing only the LaTeX +`\newpage` or `\pagebreak` command into appropriate pagebreak +markup for other formats. The command must be the only contents +of a raw TeX block in order to be recognized. I.e., for Markdown +the following is sufficient: + + Paragraph before page break + + \newpage + + Paragraph after page break + + +Usage +----- + +Fully supported output formats are: + +- Docx, +- LaTeX, +- HTML, and +- EPUB. + +ODT is supported, but requires additional settings in the +reference document (see below). + +In all other formats, the page break is represented using the +form feed character. + + +### Usage with HTML +If you want to use an HTML class rather than an inline style set +the value of the metadata key `newpage_html_class` or the +environment variable `PANDOC_NEWPAGE_HTML_CLASS` (the metadata +'wins' if both are defined) to the name of the class and use CSS +like this: + + @media all { + .page-break { display: none; } + } + @media print { + .page-break { display: block; page-break-after: always; } + } + + +### Usage with ODT + +To use with ODT you must create a reference ODT with a named +paragraph style called `Pagebreak` (or whatever you set the +metadata field `newpage_odt_style` or the environment variable +`PANDOC_NEWPAGE_ODT_STYLE` to) and define it as having no extra +space before or after but set it to have a pagebreak after it +<https://help.libreoffice.org/Writer/Text_Flow>. + +(There will be an empty dummy paragraph, which means some extra +vertical space, and you probably want that space to go at the +bottom of the page before the break rather than at the top of +the page after the break!) + + +Alternative syntax +------------------ + +The form feed character as the only element in a paragraph is +supported as an alternative to the LaTeX syntax described above. diff --git a/paper/lua-filters/pagebreak/expected.html b/paper/lua-filters/pagebreak/expected.html new file mode 100644 index 0000000..7998826 --- /dev/null +++ b/paper/lua-filters/pagebreak/expected.html @@ -0,0 +1,6 @@ +<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec hendrerit tempor tellus. Donec pretium posuere tellus.</p> +<div style="page-break-after: always;"></div> +<p>Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nulla posuere. Donec vitae dolor.</p> +<div style="page-break-after: always;"></div> +<p>Pellentesque dapibus suscipit ligula. Donec posuere augue in quam. Suspendisse potenti.</p> +<p>Final paragraph without a preceding pagebreak.</p> diff --git a/paper/lua-filters/pagebreak/pagebreak.lua b/paper/lua-filters/pagebreak/pagebreak.lua new file mode 100644 index 0000000..4c00698 --- /dev/null +++ b/paper/lua-filters/pagebreak/pagebreak.lua @@ -0,0 +1,97 @@ +--[[ +pagebreak – convert raw LaTeX page breaks to other formats + +Copyright © 2017-2019 Benct Philip Jonsson, Albert Krewinkel + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +]] +local stringify_orig = (require 'pandoc.utils').stringify + +local function stringify(x) + return type(x) == 'string' and x or stringify_orig(x) +end + +--- configs – these are populated in the Meta filter. +local pagebreak = { + epub = '<p style="page-break-after: always;"> </p>', + html = '<div style="page-break-after: always;"></div>', + latex = '\\newpage{}', + ooxml = '<w:p><w:r><w:br w:type="page"/></w:r></w:p>', +} + +local function pagebreaks_from_config (meta) + local html_class = + (meta.newpage_html_class and stringify(meta.newpage_html_class)) + or os.getenv 'PANDOC_NEWPAGE_HTML_CLASS' + if html_class and html_class ~= '' then + pagebreak.html = string.format('<div class="%s"></div>', html_class) + end + + local odt_style = + (meta.newpage_odt_style and stringify(meta.newpage_odt_style)) + or os.getenv 'PANDOC_NEWPAGE_ODT_STYLE' + if odt_style and odt_style ~= '' then + pagebreak.odt = string.format('<text:p text:style-name="%s"/>', odt_style) + end +end + +--- Return a block element causing a page break in the given format. +local function newpage(format) + if format == 'docx' then + return pandoc.RawBlock('openxml', pagebreak.ooxml) + elseif format:match 'latex' then + return pandoc.RawBlock('tex', pagebreak.latex) + elseif format:match 'html.*' then + return pandoc.RawBlock('html', pagebreak.html) + elseif format:match 'epub' then + return pandoc.RawBlock('html', pagebreak.epub) + else + -- fall back to insert a form feed character + return pandoc.Para{pandoc.Str '\f'} + end +end + +local function is_newpage_command(command) + return command:match '^\\newpage%{?%}?$' + or command:match '^\\pagebreak%{?%}?$' +end + +-- Filter function called on each RawBlock element. +function RawBlock (el) + -- Don't do anything if the output is TeX + if FORMAT:match 'tex$' then + return nil + end + -- check that the block is TeX or LaTeX and contains only + -- \newpage or \pagebreak. + if el.format:match 'tex' and is_newpage_command(el.text) then + -- use format-specific pagebreak marker. FORMAT is set by pandoc to + -- the targeted output format. + return newpage(FORMAT) + end + -- otherwise, leave the block unchanged + return nil +end + +-- Turning paragraphs which contain nothing but a form feed +-- characters into line breaks. +function Para (el) + if #el.content == 1 and el.content[1].text == '\f' then + return newpage(FORMAT) + end +end + +return { + {Meta = pagebreaks_from_config}, + {RawBlock = RawBlock, Para = Para} +} diff --git a/paper/lua-filters/pagebreak/sample.md b/paper/lua-filters/pagebreak/sample.md new file mode 100644 index 0000000..dc49ce1 --- /dev/null +++ b/paper/lua-filters/pagebreak/sample.md @@ -0,0 +1,14 @@ +Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec +hendrerit tempor tellus. Donec pretium posuere tellus. + +\newpage + +Cum sociis natoque penatibus et magnis dis parturient montes, +nascetur ridiculus mus. Nulla posuere. Donec vitae dolor. + + + +Pellentesque dapibus suscipit ligula. Donec posuere augue in +quam. Suspendisse potenti. + +Final paragraph without a preceding pagebreak. diff --git a/paper/lua-filters/plantuml/Makefile b/paper/lua-filters/plantuml/Makefile new file mode 100644 index 0000000..feff6f4 --- /dev/null +++ b/paper/lua-filters/plantuml/Makefile @@ -0,0 +1,3 @@ +test: + @pandoc --self-contained --lua-filter=plantuml.lua --metadata title=README readme.md -o output.html + diff --git a/paper/lua-filters/plantuml/output.html b/paper/lua-filters/plantuml/output.html new file mode 100644 index 0000000..67c4b58 --- /dev/null +++ b/paper/lua-filters/plantuml/output.html @@ -0,0 +1,45 @@ +<!DOCTYPE html> +<html xmlns="http://www.w3.org/1999/xhtml" lang xml:lang> +<head> + <meta charset="utf-8" /> + <meta name="generator" content="pandoc" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" /> + <title>readme</title> + <style type="text/css"> + code{white-space: pre-wrap;} + span.smallcaps{font-variant: small-caps;} + span.underline{text-decoration: underline;} + div.column{display: inline-block; vertical-align: top; width: 50%;} + </style> + <!--[if lt IE 9]> + <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script> + <![endif]--> +</head> +<body> +<h1 id="plantuml-pandoc-filter">PlantUML Pandoc filter</h1> +<p>PlantUML Pandoc filter to process code blocks with class “plantuml” containing PlantUML notation into images.</p> +<ul> +<li>For textual output formats, use –extract-media=DIR</li> +<li>For HTML formats, you may alternatively use –self-contained</li> +</ul> +<h2 id="example-in-markdown-file">Example in markdown-file</h2> +<p><img src="data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiBjb250ZW50U2NyaXB0VHlwZT0iYXBwbGljYXRpb24vZWNtYXNjcmlwdCIgY29udGVudFN0eWxlVHlwZT0idGV4dC9jc3MiIGhlaWdodD0iMTYxcHgiIHByZXNlcnZlQXNwZWN0UmF0aW89Im5vbmUiIHN0eWxlPSJ3aWR0aDo1NDRweDtoZWlnaHQ6MTYxcHg7IiB2ZXJzaW9uPSIxLjEiIHZpZXdCb3g9IjAgMCA1NDQgMTYxIiB3aWR0aD0iNTQ0cHgiIHpvb21BbmRQYW49Im1hZ25pZnkiPjxkZWZzPjxmaWx0ZXIgaGVpZ2h0PSIzMDAlIiBpZD0iZnE3c29pNjk5dWp3ayIgd2lkdGg9IjMwMCUiIHg9Ii0xIiB5PSItMSI+PGZlR2F1c3NpYW5CbHVyIHJlc3VsdD0iYmx1ck91dCIgc3RkRGV2aWF0aW9uPSIyLjAiLz48ZmVDb2xvck1hdHJpeCBpbj0iYmx1ck91dCIgcmVzdWx0PSJibHVyT3V0MiIgdHlwZT0ibWF0cml4IiB2YWx1ZXM9IjAgMCAwIDAgMCAwIDAgMCAwIDAgMCAwIDAgMCAwIDAgMCAwIC40IDAiLz48ZmVPZmZzZXQgZHg9IjQuMCIgZHk9IjQuMCIgaW49ImJsdXJPdXQyIiByZXN1bHQ9ImJsdXJPdXQzIi8+PGZlQmxlbmQgaW49IlNvdXJjZUdyYXBoaWMiIGluMj0iYmx1ck91dDMiIG1vZGU9Im5vcm1hbCIvPjwvZmlsdGVyPjwvZGVmcz48Zz48bGluZSBzdHlsZT0ic3Ryb2tlOiAjQTgwMDM2OyBzdHJva2Utd2lkdGg6IDEuMDsgc3Ryb2tlLWRhc2hhcnJheTogNS4wLDUuMDsiIHgxPSIzMiIgeDI9IjMyIiB5MT0iMzkuNjA5NCIgeTI9IjEyMC4zMTI1Ii8+PGxpbmUgc3R5bGU9InN0cm9rZTogI0E4MDAzNjsgc3Ryb2tlLXdpZHRoOiAxLjA7IHN0cm9rZS1kYXNoYXJyYXk6IDUuMCw1LjA7IiB4MT0iNTE1LjUiIHgyPSI1MTUuNSIgeTE9IjM5LjYwOTQiIHkyPSIxMjAuMzEyNSIvPjxyZWN0IGZpbGw9IiNGRUZFQ0UiIGZpbHRlcj0idXJsKCNmcTdzb2k2OTl1andrKSIgaGVpZ2h0PSIzMS42MDk0IiBzdHlsZT0ic3Ryb2tlOiAjQTgwMDM2OyBzdHJva2Utd2lkdGg6IDEuNTsiIHdpZHRoPSI0NCIgeD0iOCIgeT0iMyIvPjx0ZXh0IGZpbGw9IiMwMDAwMDAiIGZvbnQtZmFtaWx5PSJzYW5zLXNlcmlmIiBmb250LXNpemU9IjE0IiBsZW5ndGhBZGp1c3Q9InNwYWNpbmdBbmRHbHlwaHMiIHRleHRMZW5ndGg9IjMwIiB4PSIxNSIgeT0iMjQuNTMzMiI+QWxpY2U8L3RleHQ+PHJlY3QgZmlsbD0iI0ZFRkVDRSIgZmlsdGVyPSJ1cmwoI2ZxN3NvaTY5OXVqd2spIiBoZWlnaHQ9IjMxLjYwOTQiIHN0eWxlPSJzdHJva2U6ICNBODAwMzY7IHN0cm9rZS13aWR0aDogMS41OyIgd2lkdGg9IjQ0IiB4PSI4IiB5PSIxMTkuMzEyNSIvPjx0ZXh0IGZpbGw9IiMwMDAwMDAiIGZvbnQtZmFtaWx5PSJzYW5zLXNlcmlmIiBmb250LXNpemU9IjE0IiBsZW5ndGhBZGp1c3Q9InNwYWNpbmdBbmRHbHlwaHMiIHRleHRMZW5ndGg9IjMwIiB4PSIxNSIgeT0iMTQwLjg0NTciPkFsaWNlPC90ZXh0PjxyZWN0IGZpbGw9IiNGRUZFQ0UiIGZpbHRlcj0idXJsKCNmcTdzb2k2OTl1andrKSIgaGVpZ2h0PSIzMS42MDk0IiBzdHlsZT0ic3Ryb2tlOiAjQTgwMDM2OyBzdHJva2Utd2lkdGg6IDEuNTsiIHdpZHRoPSIzOSIgeD0iNDk0LjUiIHk9IjMiLz48dGV4dCBmaWxsPSIjMDAwMDAwIiBmb250LWZhbWlseT0ic2Fucy1zZXJpZiIgZm9udC1zaXplPSIxNCIgbGVuZ3RoQWRqdXN0PSJzcGFjaW5nQW5kR2x5cGhzIiB0ZXh0TGVuZ3RoPSIyNSIgeD0iNTAxLjUiIHk9IjI0LjUzMzIiPkJvYjwvdGV4dD48cmVjdCBmaWxsPSIjRkVGRUNFIiBmaWx0ZXI9InVybCgjZnE3c29pNjk5dWp3aykiIGhlaWdodD0iMzEuNjA5NCIgc3R5bGU9InN0cm9rZTogI0E4MDAzNjsgc3Ryb2tlLXdpZHRoOiAxLjU7IiB3aWR0aD0iMzkiIHg9IjQ5NC41IiB5PSIxMTkuMzEyNSIvPjx0ZXh0IGZpbGw9IiMwMDAwMDAiIGZvbnQtZmFtaWx5PSJzYW5zLXNlcmlmIiBmb250LXNpemU9IjE0IiBsZW5ndGhBZGp1c3Q9InNwYWNpbmdBbmRHbHlwaHMiIHRleHRMZW5ndGg9IjI1IiB4PSI1MDEuNSIgeT0iMTQwLjg0NTciPkJvYjwvdGV4dD48cG9seWdvbiBmaWxsPSIjQTgwMDM2IiBwb2ludHM9IjUwNCw2Ny45NjA5LDUxNCw3MS45NjA5LDUwNCw3NS45NjA5LDUwOCw3MS45NjA5IiBzdHlsZT0ic3Ryb2tlOiAjQTgwMDM2OyBzdHJva2Utd2lkdGg6IDEuMDsiLz48bGluZSBzdHlsZT0ic3Ryb2tlOiAjQTgwMDM2OyBzdHJva2Utd2lkdGg6IDEuMDsiIHgxPSIzMiIgeDI9IjUxMCIgeTE9IjcxLjk2MDkiIHkyPSI3MS45NjA5Ii8+PHRleHQgZmlsbD0iIzAwMDAwMCIgZm9udC1mYW1pbHk9InNhbnMtc2VyaWYiIGZvbnQtc2l6ZT0iMTMiIGxlbmd0aEFkanVzdD0ic3BhY2luZ0FuZEdseXBocyIgdGV4dExlbmd0aD0iMzY4IiB4PSIzOSIgeT0iNjcuMTA0NSI+QXV0aGVudGljYXRpb24gUmVxdWVzdCBCb2IgLS0mZ3Q7IEFsaWNlOiBBdXRoZW50aWNhdGlvbiBSZXNwb25zZTwvdGV4dD48cG9seWdvbiBmaWxsPSIjQTgwMDM2IiBwb2ludHM9IjUwNCw5OC4zMTI1LDUxNCwxMDIuMzEyNSw1MDQsMTA2LjMxMjUsNTA4LDEwMi4zMTI1IiBzdHlsZT0ic3Ryb2tlOiAjQTgwMDM2OyBzdHJva2Utd2lkdGg6IDEuMDsiLz48bGluZSBzdHlsZT0ic3Ryb2tlOiAjQTgwMDM2OyBzdHJva2Utd2lkdGg6IDEuMDsiIHgxPSIzMiIgeDI9IjUxMCIgeTE9IjEwMi4zMTI1IiB5Mj0iMTAyLjMxMjUiLz48dGV4dCBmaWxsPSIjMDAwMDAwIiBmb250LWZhbWlseT0ic2Fucy1zZXJpZiIgZm9udC1zaXplPSIxMyIgbGVuZ3RoQWRqdXN0PSJzcGFjaW5nQW5kR2x5cGhzIiB0ZXh0TGVuZ3RoPSI0NjAiIHg9IjM5IiB5PSI5Ny40NTYxIj5Bbm90aGVyIGF1dGhlbnRpY2F0aW9uIFJlcXVlc3QgQWxpY2UgJmx0Oy0tIEJvYjogYW5vdGhlciBhdXRoZW50aWNhdGlvbiBSZXNwb25zZTwvdGV4dD48IS0tCkBzdGFydHVtbA0KQWxpY2UgLT4gQm9iOiBBdXRoZW50aWNhdGlvbiBSZXF1ZXN0IEJvYiAtIC0+IEFsaWNlOiBBdXRoZW50aWNhdGlvbiBSZXNwb25zZQ0KQWxpY2UgLT4gQm9iOiBBbm90aGVyIGF1dGhlbnRpY2F0aW9uIFJlcXVlc3QgQWxpY2UgPC0gLSBCb2I6IGFub3RoZXIgYXV0aGVudGljYXRpb24gUmVzcG9uc2UNCkBlbmR1bWwNCgpQbGFudFVNTCB2ZXJzaW9uIDEuMjAxOC4wOChTdW4gSnVuIDI0IDE0OjMxOjAwIENFU1QgMjAxOCkKKEdQTCBzb3VyY2UgZGlzdHJpYnV0aW9uKQpKYXZhIFJ1bnRpbWU6IEphdmEoVE0pIFNFIFJ1bnRpbWUgRW52aXJvbm1lbnQKSlZNOiBKYXZhIEhvdFNwb3QoVE0pIDY0LUJpdCBTZXJ2ZXIgVk0KSmF2YSBWZXJzaW9uOiAxLjguMF8xNzItYjExCk9wZXJhdGluZyBTeXN0ZW06IFdpbmRvd3MgMTAKT1MgVmVyc2lvbjogMTAuMApEZWZhdWx0IEVuY29kaW5nOiBDcDEyNTIKTGFuZ3VhZ2U6IGRlCkNvdW50cnk6IERFCi0tPjwvZz48L3N2Zz4=" alt="PlantUML Diagramm" /></p> +<h2 id="run-pandoc">Run pandoc</h2> +<pre><code>pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm</code></pre> +<h2 id="prerequisites">Prerequisites</h2> +<ul> +<li>download PlantUML from http://plantuml.com (needs JAVA)</li> +<li>3 ways to set the environment +<ol type="1"> +<li>plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder</li> +<li>set a Environment Variable PLANTUML with the path to plantuml.jar +<ul> +<li>Windows - with powershell: Set-Item env:PLANTUML “c:.jar”</li> +</ul></li> +<li>change path to plantuml.jar in plantuml.lua</li> +</ol></li> +</ul> +<p>This script based on the example “Converting ABC code to music notation” from https://pandoc.org/lua-filters.html</p> +<p>This script was only tested with markdown to html on a windows environment!</p> +</body> +</html> diff --git a/paper/lua-filters/plantuml/plantuml.lua b/paper/lua-filters/plantuml/plantuml.lua new file mode 100644 index 0000000..7903e6a --- /dev/null +++ b/paper/lua-filters/plantuml/plantuml.lua @@ -0,0 +1,56 @@ +--[[ +# PlantUML Pandoc filter +PlantUML Pandoc filter to process code blocks with class "plantuml" containing PlantUML notation into images. + +* For textual output formats, use --extract-media=DIR +* For HTML formats, you may alternatively use --self-contained + +## Example in markdown-file +```plantuml +@startuml +Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response +Alice -> Bob: Another authentication Request Alice <-- Bob: another authentication Response @enduml +``` +## Run pandoc +``` +pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm +``` + +## Prerequisites +* download PlantUML from http://plantuml.com (needs JAVA) +* 3 ways to set the environment + 1. plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder + 2. set a Environment Variable PLANTUML with the path to plantuml.jar + * Windows - with powershell: Set-Item env:PLANTUML "c:\bin\plantuml.jar" + 3. change path to plantuml.jar in plantuml.lua + +This script based on the example "Converting ABC code to music notation" from https://pandoc.org/lua-filters.html +**This script was only tested with markdown to html on a windows environment!** +]] + +-- Path to PlantUML.jar +-- if you use opinion 3 change the path to plantuml.jar like this: +-- local plantumlPath = os.getenv("PLANTUML") or "c:\\bin\\plantuml.jar" +local plantumlPath = os.getenv("PLANTUML") or "plantuml.jar" + +-- SVG has a much better quality +-- local filetype = "png" +-- local mimetype = "image/png" +local filetype = "svg" +local mimetype = "image/svg+xml" + +-- call plantuml.jar wit some parameters (see plantuml help) +local function plantuml(puml, filetype, plantumlPath) + local final = pandoc.pipe("java", {"-jar", plantumlPath, "-t" .. filetype, "-pipe", "-charset", "UTF8"}, puml) + return final +end + +-- search for class "plantuml" and replace with image +function CodeBlock(block) + if block.classes[1] == "plantuml" then + local img = plantuml(block.text, filetype, plantumlPath) + local fname = pandoc.sha1(img) .. "." .. filetype + pandoc.mediabag.insert(fname, mimetype, img) + return pandoc.Para{ pandoc.Image({pandoc.Str("PlantUML Diagramm")}, fname) } + end +end diff --git a/paper/lua-filters/plantuml/readme.md b/paper/lua-filters/plantuml/readme.md new file mode 100644 index 0000000..de5ba74 --- /dev/null +++ b/paper/lua-filters/plantuml/readme.md @@ -0,0 +1,30 @@ +# PlantUML Pandoc filter +PlantUML Pandoc filter to process code blocks with class "plantuml" containing PlantUML notation into images. + +* For textual output formats, use --extract-media=DIR +* For HTML formats, you may alternatively use --self-contained + +## Example in markdown-file +```plantuml +@startuml +Alice -> Bob: Authentication Request Bob --> Alice: Authentication Response +Alice -> Bob: Another authentication Request Alice <-- Bob: another authentication Response +@enduml +``` +## Run pandoc +``` +pandoc --self-contained --lua-filter=plantuml.lua readme.md -o output.htm +``` + +## Prerequisites +* download PlantUML from http://plantuml.com (needs JAVA) +* 3 ways to set the environment + 1. plantuml.lua and plantuml.jar in the same folder and start pandoc always from this folder + 2. set a Environment Variable PLANTUML with the path to plantuml.jar + * Windows - with powershell: Set-Item env:PLANTUML "c:\bin\plantuml.jar" + 3. change path to plantuml.jar in plantuml.lua + + +This script based on the example "Converting ABC code to music notation" from https://pandoc.org/lua-filters.html + +This script was only tested with markdown to html on a windows environment! diff --git a/paper/lua-filters/runtests.sh b/paper/lua-filters/runtests.sh new file mode 100755 index 0000000..bbcbbba --- /dev/null +++ b/paper/lua-filters/runtests.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# argument is list of filters + +FILTERS=$* +let err=0 +for d in $FILTERS ; do + make --no-print-directory -C $d test + if [ $? -eq 0 ]; then + echo "PASS $d" + else + echo "FAIL $d" + err=1 + fi +done +exit $err + diff --git a/paper/lua-filters/scholarly-metadata/Makefile b/paper/lua-filters/scholarly-metadata/Makefile new file mode 100644 index 0000000..fadf7dd --- /dev/null +++ b/paper/lua-filters/scholarly-metadata/Makefile @@ -0,0 +1,8 @@ +test: sample.md scholarly-metadata.lua + @pandoc --lua-filter=scholarly-metadata.lua --standalone --to=markdown $< \ + | diff -u expected.md - + +expected.md: sample.md scholarly-metadata.lua + pandoc --lua-filter=scholarly-metadata.lua --standalone --output $@ $< + +.PHONY: test diff --git a/paper/lua-filters/scholarly-metadata/README.md b/paper/lua-filters/scholarly-metadata/README.md new file mode 100644 index 0000000..7fb1d2a --- /dev/null +++ b/paper/lua-filters/scholarly-metadata/README.md @@ -0,0 +1,91 @@ +# scholarly-metadata + +The filter turns metadata entries for authors and their +affiliations into a canonical form. This allows users to +conveniently declare document authors and their affiliations, +while making it possible to rely on default object metadata +structures when using the data in other filters or when accessing +the data from custom templates. + + +## Canonical format for authors and affiliations + +Authors and affiliations entries are treated as *named objects*. +All named objects will have an ID and a name, i.e. they are +metadata objects with *at least* those two keys: + + - id: namedObjectExample + name: Example for a named object. + +The filter converts the *author* and *institute* metadata fields +into lists of named objects. + +E.g., the following YAML data + + author: + - Jane Doe: + email: 'jane.doe@example.edu' + - John Q. Doe + + +will be transformed into + + author: + - email: 'jane.doe\@example.edu' + id: Jane Doe + name: Jane Doe + - id: 'John Q. Doe' + name: 'John Q. Doe' + +Internally, `id` will be a simple string, while `name` is of type +`MetaInlines`. + + +## Referencing affiliations + +Author affiliations are a common feature of scholarly +publications. It is possible to add institutes to each author +object. Three methods of doing this are supported. + +1. **Referencing institutes by list index**: affiliations can be + listed in the *institute* metadata field and then referenced + by using the numerical index: + + institute: + - Acme Corporation + - Federation of Planets + author: + - Jane Doe: + institute: [1, 2] + - John Q. Doe: + institute: [2] + + This is also the canonical representation used to keep track + of author affiliations. + +2. **Referencing institutes by ID**: using numerical indices is + error prone and difficult to maintain when adding or removing + authors or affilications. It is hence possible to use IDs + instead: + + institute: + - acme: Acme Corporation + - federation: Federation of Planets + author: + - Jane Doe: + institute: [acme, federation] + - John Q. Doe: + institute: [federation] + +3. **Adding institute as an attribute**: sometimes it might be + more convenient to give an affiliation directly in the + author's YAML object. Those objects can still be referenced + by ID from authors listed below such entry. + + author: + - Jane Doe: + institute: + - Acme Cooproration + - federation: Federation of Planets + - John Q. Doe: + institute: [federation] diff --git a/paper/lua-filters/scholarly-metadata/expected.md b/paper/lua-filters/scholarly-metadata/expected.md new file mode 100644 index 0000000..353436f --- /dev/null +++ b/paper/lua-filters/scholarly-metadata/expected.md @@ -0,0 +1,41 @@ +--- +author: +- id: Jane Doe + institute: + - 1 + - 2 + name: Jane Doe +- id: 'John Q. Doe' + institute: + - 1 + name: 'John Q. Doe' +- id: Peder Ås + institute: + - 1 + name: Peder Ås +- id: Juan Pérez + institute: + - 3 + name: Juan Pérez +- id: Max Mustermann + name: Max Mustermann +institute: +- address: '23 Science Street, Eureka, Mississippi, USA' + id: fosg + name: Formatting Open Science Group +- id: fop + name: Federation of Planets +- id: Acme Corporation + name: Acme Corporation +--- + +Abstract +======== + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod +tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim +veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea +commodo consequat. Duis aute irure dolor in reprehenderit in voluptate +velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint +occaecat cupidatat non proident, sunt in culpa qui officia deserunt +mollit anim id est laborum. diff --git a/paper/lua-filters/scholarly-metadata/sample.md b/paper/lua-filters/scholarly-metadata/sample.md new file mode 100644 index 0000000..855272e --- /dev/null +++ b/paper/lua-filters/scholarly-metadata/sample.md @@ -0,0 +1,30 @@ +--- +author: + - Jane Doe: + institute: + - fosg + - fop + - John Q. Doe: + institute: fosg + - Peder Ås: + institute: fosg + - Juan Pérez: + institute: + - name: Acme Corporation + - Max Mustermann +institute: + - fosg: + name: Formatting Open Science Group + address: 23 Science Street, Eureka, Mississippi, USA + - fop: Federation of Planets +... + +# Abstract + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut +enim ad minim veniam, quis nostrud exercitation ullamco laboris +nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in +reprehenderit in voluptate velit esse cillum dolore eu fugiat +nulla pariatur. Excepteur sint occaecat cupidatat non proident, +sunt in culpa qui officia deserunt mollit anim id est laborum. diff --git a/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua b/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua new file mode 100644 index 0000000..3ec529c --- /dev/null +++ b/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua @@ -0,0 +1,180 @@ +--[[ +ScholarlyMeta – normalize author/affiliation meta variables + +Copyright (c) 2017-2019 Albert Krewinkel, Robert Winkler + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +]] +local List = require 'pandoc.List' + +-- Split a string at commas. +local function comma_separated_values(str) + local acc = List:new{} + for substr in str:gmatch('([^,]*)') do + acc[#acc + 1] = substr:gsub('^%s*', ''):gsub('%s*$', '') -- trim + end + return acc +end + +--- Ensure the return value is a list. +local function ensure_list (val) + if type(val) ~= 'table' then + -- create singleton list (or empty list if val == nil). + return List:new{val} + elseif val.t == 'MetaInlines' then + -- check if this is really a comma-separated list + local csv = comma_separated_values(pandoc.utils.stringify(val)) + if #csv >= 2 then + return csv + end + return List:new{val} + elseif val.t == 'MetaList' then + return List:new(val) + else + -- MetaBlocks or MetaMap, use as a singleton + return List:new{val} + end +end + +--- Returns a function which checks whether an object has the given ID. +local function has_id (id) + return function(x) return x.id == id end +end + +--- Copy all key-value pairs of the first table into the second iff there is no +-- such key yet in the second table. +-- @returns the second argument +function add_missing_entries(a, b) + for k, v in pairs(a) do + b[k] = b[k] or v + end + return b +end + +--- Create an object with a name. The name is either taken directly from the +-- `name` field, or from the *only* field name (i.e., key) if the object is a +-- dictionary with just one entry. If neither exists, the name is left unset +-- (`nil`). +function to_named_object (obj) + local named = {} + if type(obj) ~= 'table' then + -- if the object isn't a table, just use its value as a name. + named.name = pandoc.MetaInlines{pandoc.Str(tostring(obj))} + named.id = tostring(obj) + elseif obj.t == 'MetaInlines' then + -- Treat inlines as the name + named.name = obj + named.id = pandoc.utils.stringify(obj) + elseif obj.name ~= nil then + -- object has name attribute → just create a copy of the object + add_missing_entries(obj, named) + named.id = pandoc.utils.stringify(named.id or named.name) + elseif next(obj) and next(obj, next(obj)) == nil then + -- the entry's key is taken as the name, the value contains the + -- attributes. + key, attribs = next(obj) + if type(attribs) == "string" or attribs.t == 'MetaInlines' then + named.name = attribs + else + add_missing_entries(attribs, named) + named.name = named.name or pandoc.MetaInlines{pandoc.Str(tostring(key))} + end + named.id = named.id and pandoc.utils.stringify(named.id) or key + else + -- this is not a named object adhering to the usual conventions. + error('not a named object: ' .. tostring(obj)) + end + return named +end + +--- Resolve institute placeholders to full named objects +local function resolve_institutes (institute, known_institutes) + local unresolved_institutes + if institute == nil then + unresolved_institutes = {} + elseif type(institute) == "string" or type(institute) == "number" then + unresolved_institutes = {institute} + else + unresolved_institutes = institute + end + + local result = List:new{} + for i, inst in ipairs(unresolved_institutes) do + result[i] = + known_institutes[tonumber(inst)] or + known_institutes:find_if(has_id(pandoc.utils.stringify(inst))) or + to_named_object(inst) + end + return result +end + +--- Insert a named object into a list; if an object of the same name exists +-- already, add all properties only present in the new object to the existing +-- item. +function merge_on_id (list, namedObj) + local elem, idx = list:find_if(has_id(namedObj.id)) + local res = elem and add_missing_entries(namedObj, elem) or namedObj + local obj_idx = idx or (#list + 1) + -- return res, obj_idx + list[obj_idx] = res + return res, #list +end + +--- Flatten a list of lists. +local function flatten (lists) + local result = List:new{} + for _, lst in ipairs(lists) do + result:extend(lst) + end + return result +end + +--- Canonicalize authors and institutes +local function canonicalize(raw_author, raw_institute) + local institutes = ensure_list(raw_institute):map(to_named_object) + local authors = ensure_list(raw_author):map(to_named_object) + + for _, author in ipairs(authors) do + author.institute = resolve_institutes( + ensure_list(author.institute), + institutes + ) + end + + -- Merge institutes defined in author objects with those defined in the + -- top-level list. + local author_insts = flatten(authors:map(function(x) return x.institute end)) + for _, inst in ipairs(author_insts) do + merge_on_id(institutes, inst) + end + + -- replace institutes with their indices + local to_index = function (inst) + return tostring(select(2, institutes:find_if(has_id(inst.id)))) + end + for _, author in ipairs(authors) do + author.institute = pandoc.MetaList(author.institute:map(to_index)) + end + + return authors, institutes +end + + +return { + { + Meta = function(meta) + meta.author, meta.institute = canonicalize(meta.author, meta.institute) + return meta + end + } +} diff --git a/paper/lua-filters/scrlttr2/Makefile b/paper/lua-filters/scrlttr2/Makefile new file mode 100644 index 0000000..acd4c7e --- /dev/null +++ b/paper/lua-filters/scrlttr2/Makefile @@ -0,0 +1,9 @@ +test: sample.md scrlttr2.lua sample.pdf + @pandoc --to=latex --lua-filter=scrlttr2.lua -s sample.md | \ + sh expected-strings.sh + @rm sample.pdf + +%.pdf: %.md scrlttr2.lua + @pandoc --lua-filter=scrlttr2.lua --output=$@ $< + +.PHONY: test diff --git a/paper/lua-filters/scrlttr2/README.md b/paper/lua-filters/scrlttr2/README.md new file mode 100644 index 0000000..2a4e440 --- /dev/null +++ b/paper/lua-filters/scrlttr2/README.md @@ -0,0 +1,60 @@ +# scrlttr2 + +This filter allows to write DIN 5008 letter using the [scrlttr2] +LaTeX document class from KOMA script. It converts metadata to +the appropriate KOMA variables and allows using the default LaTeX +template shipped with pandoc. + +[scrlttr2]: https://www.ctan.org/pkg/scrlttr2 + +## Base variables + + - `opening`: phrase used as an opening; + defaults to "Dear Sir/Madam," + - `closing`: closing phrase; defaults to "Sincerely," + - `address`: recipient's street address; + defaults to "no address given" + - `date`: the date of the letter; defaults to the current day. + +## KOMA Variables + +Currently, the following metadata fields are translated to KOMA +variables: + +- `fromaddress` (alias: `return-address`): address of the sender +- `fromfax` (alias: `fax`): sender's fax number +- `fromemail` (alias: `email`): sender's email +- `fromlogo` (alias: `logo`): image to be used as the sender's logo +- `fromname` (alias: `author`): sender name +- `fromphone` (alias: `phone`): sender's phone number +- `fromurl` (alias: `url`): sender's URL +- `customer`: customer number +- `invoice`: invoice number +- `myref`: sender's reference +- `place`: sender's place used near date +- `signature`: sender's signature +- `subject`: letter's subject +- `title`: letter title +- `yourref`: addressee's reference + +The values of these variables are converted to MetaInlines. If a +list is given, then each list item is used as a line, e.g., + + fromaddress: + - 35 Industry Way + - Springfield + +The `KOMAoptions` value is inferred from the given variables, but +can be overwritten by specifying it explicitly. + +See the scrlttr2 documentation for details. + +## Intended Usage + +Many sender variables don't change, so it is sensible to provide +default values for these. Authors using Markdown to draft letters +can use a separate YAML file for this. E.g., if there is a file +`default.yml` which contains the sender's details, then only the +addressee's data must be specified. + + pandoc --lua-filter=scrlttr2 letter.md default.yml -o out.pdf diff --git a/paper/lua-filters/scrlttr2/expected-strings.sh b/paper/lua-filters/scrlttr2/expected-strings.sh new file mode 100644 index 0000000..f2b54c2 --- /dev/null +++ b/paper/lua-filters/scrlttr2/expected-strings.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +latex_result="$(cat -)" + +assert_contains () +{ + printf '%s' "$latex_result" | grep -qF "$1" - + if [ $? -ne 0 ]; then + printf 'Output does not contain `%s`.\n' "$1" >&2 + exit 1 + fi +} + +# whether we are using the scrlttr2 class +assert_contains '{scrlttr2}' + +assert_contains '\setkomavar{fromname}{Jane Doe}' +assert_contains '\setkomavar{fromaddress}{35 Industry Way\\ Springfield}' +assert_contains '\setkomavar{subject}{Letter of Reference}' +assert_contains '\setkomavar{date}{February 29, 2020}' + +# Custom opening and default closing +assert_contains '\opening{To Whom It May Concern,}' +assert_contains '\closing{Sincerely,}' + +# Author and date +assert_contains '\author{Jane Doe}' +assert_contains '\date{February 29, 2020}' + +# Recipient address +assert_contains '\begin{letter}{Fireworks Inc.\\ 123 Fake St\\ 58008 Springfield}' diff --git a/paper/lua-filters/scrlttr2/sample.md b/paper/lua-filters/scrlttr2/sample.md new file mode 100644 index 0000000..5f13554 --- /dev/null +++ b/paper/lua-filters/scrlttr2/sample.md @@ -0,0 +1,16 @@ +--- +author: Jane Doe +fromaddress: + - 35 Industry Way + - Springfield +opening: To Whom It May Concern, +subject: Letter of Reference +date: February 29, 2020 +address: + - Fireworks Inc. + - 123 Fake St + - 58008 Springfield +... + +I strongly recommend to embiggen your team by giving John Doe the position of a +yak shaver. He has shown cromulent performance as a bike shedder. diff --git a/paper/lua-filters/scrlttr2/scrlttr2.lua b/paper/lua-filters/scrlttr2/scrlttr2.lua new file mode 100644 index 0000000..78f38fd --- /dev/null +++ b/paper/lua-filters/scrlttr2/scrlttr2.lua @@ -0,0 +1,161 @@ +-- Ensure unpack also works if pandoc was compiled against Lua 5.1 +local unpack = unpack or table.unpack +local List = require 'pandoc.List' +local stringify = (require 'pandoc.utils')['stringify'] + +--- Set some default options +local default = { + opening = 'Dear Sir/Madam,', + closing = 'Sincerely,', + address = 'no address given' +} + +--- Return a list of inlines representing a call to a latex command. +local function latex_command (command, ...) + local entry = { + pandoc.RawInline('latex', '\\' .. command), + } + for _, arg in ipairs{...} do + entry[#entry + 1] = pandoc.RawInline('latex', '{') + if type(arg) ~= 'table' then + entry[#entry + 1] = pandoc.RawInline('latex', tostring(arg)) + else + List.extend(entry, arg) + end + entry[#entry + 1] = pandoc.RawInline('latex', '}') + end + return entry +end + +--- Convert the given meta-value to a list of inlines +local function ensure_inlines (val) + if not val or type(val) == 'string' or type(val) == 'boolean' then + return pandoc.MetaInlines{pandoc.Str(tostring(val))} + elseif type(val) == 'table' and val.t == 'MetaInlines' then + return val + elseif type(val) == 'table' then + local res = List:new{} + for i = 1, #val do + res:extend(val[i]) + res[#res + 1] = pandoc.RawInline('latex', '\\\\ ') + end + res[#res] = nil -- drop last linebreak + return pandoc.MetaInlines(res) + else + return pandoc.MetaInlines{pandoc.Str(pandoc.utils.stringify(val))} + end +end + +--- Convert the given value to a MetaList +local function ensure_meta_list (val) + if not val or val.t ~= 'MetaList' then + return pandoc.MetaList{} + else + return val + end +end + +--- Set supported variables as KOMA variables. +function setkomavar_commands (meta) + local set_vars = {} + local res = {} + local function set_koma_var (name, value, enable) + if value ~= nil then + res[#res + 1] = latex_command('setkomavar', name, ensure_inlines(value)) + if enable then + set_vars[#set_vars + 1] = name + end + end + end + + set_koma_var('fromname', meta.fromname or meta.author) + set_koma_var('fromaddress', meta.fromaddress or meta['return-address']) + set_koma_var('subject', meta.subject) + set_koma_var('title', meta.title) + set_koma_var('signature', meta.signature) + set_koma_var('customer', meta.customer) + set_koma_var('yourref', meta.yourref) + set_koma_var('myref', meta.myref) + set_koma_var('invoice', meta.invoice) + set_koma_var('place', meta.place) + + set_koma_var('fromfax', meta.fromfax or meta.fax, true) + set_koma_var('fromurl', meta.fromurl or meta.url, true) + set_koma_var('fromlogo', meta.fromlogo or meta.logo, true) + set_koma_var('fromemail', meta.fromemail or meta.email, true) + set_koma_var('fromphone', meta.fromphone or meta.phone, true) + + -- don't set date if date is set to `false` + if meta.date == nil or meta.date == true then + if meta['date-format'] then + set_koma_var('date', os.date(stringify(date_format))) + else + set_koma_var('date', pandoc.MetaInlines{pandoc.RawInline('latex', '\\today')}) + end + elseif meta.date then + set_koma_var('date', meta.date) + end + + if meta['KOMAoptions'] or #set_vars >= 1 then + res[#res + 1] = latex_command( + 'KOMAoptions', + meta['KOMAoptions'] + or table.concat(set_vars, '=true,') .. '=true' + ) + end + + return res +end + +--- Bring Metadata in a form suitable for the scrlttr KOMA class +local function make_koma_metadata(meta) + local header_includes = ensure_meta_list(meta['header-includes']) + List.extend(header_includes, setkomavar_commands(meta)) + + local include_before = ensure_meta_list(meta['include-before']) + List.extend( + include_before, + { + pandoc.MetaInlines( + latex_command( + 'begin', + 'letter', + ensure_inlines(meta.address or default.address) + ) + ), + + pandoc.MetaInlines( + latex_command('opening', meta.opening or default.opening) + ), + } + ) + + local include_after = ensure_meta_list(meta['include-after']) + List.extend( + include_after, + { + pandoc.MetaInlines( + latex_command('closing', meta.closing or default.closing) + ), + pandoc.MetaInlines(latex_command('end', 'letter')), + } + ) + + -- unset or reset some unwanted vars + meta.data = nil -- set via komavar 'date' + meta.title = nil -- set via komavar 'subject' + meta.indent = true -- disable parskib + -- set documentclass to scrlttr2 if it's unset + meta.documentclass = meta.documentclass or pandoc.MetaString'scrlttr2' + + + meta['header-includes'] = header_includes + meta['include-before'] = include_before + meta['include-after'] = include_after + + return meta +end + +return { + {Meta = make_koma_metadata} +} diff --git a/paper/lua-filters/section-refs/Makefile b/paper/lua-filters/section-refs/Makefile new file mode 100644 index 0000000..dcceb70 --- /dev/null +++ b/paper/lua-filters/section-refs/Makefile @@ -0,0 +1,26 @@ +OPTIONS_test_default := -t native \ + -M bibliography=bibliography.bib \ + -F pandoc-citeproc \ + --lua-filter=section-refs.lua + +OPTIONS_test_no_citeproc := -t native \ + --lua-filter=section-refs.lua + +OPTIONS_test_refs_name := -t native \ + -M bibliography=bibliography.bib \ + -M reference-section-title="Works Cited" \ + -F pandoc-citeproc \ + --lua-filter=section-refs.lua + +OPTIONS_test_section_level := -t native \ + -M bibliography=bibliography.bib \ + -M reference-section-title="Works Cited" \ + -M section-refs-level=2\ + -F pandoc-citeproc \ + --lua-filter=section-refs.lua + +.PHONY: test +test: test_default test_no_citeproc test_refs_name test_section_level + +test_%: expected_%.native sample.md bibliography.bib + @pandoc sample.md $(OPTIONS_$@) | diff --strip-trailing-cr -u $< - diff --git a/paper/lua-filters/section-refs/README.md b/paper/lua-filters/section-refs/README.md new file mode 100644 index 0000000..941bc36 --- /dev/null +++ b/paper/lua-filters/section-refs/README.md @@ -0,0 +1,19 @@ +# section-refs + +This filter allows the user to put bibliographies at the end of each +section, containing only those references in the section. It works on +the output of `pandoc-citeproc`, and so must be run after +`pandoc-citeproc`. For example: + +~~~ +pandoc input.md -F pandoc-citerproc --lua-filter section-refs.lua +~~~ + +It allows curstomization through two metadata fields: +`reference-section-title` and `section-refs-level` (default 1). The +`section-refs-level` variable controls what level the biblography will +occur at the end of. The header of the generated references section will +be one level higher than `section-refs-level` (so if it occurs at the +end of a level-1 section, it will receive a level-2 header, and so on). + +This filter requires pandoc version >= 2.1. diff --git a/paper/lua-filters/section-refs/bibliography.bib b/paper/lua-filters/section-refs/bibliography.bib new file mode 100644 index 0000000..7ce54d5 --- /dev/null +++ b/paper/lua-filters/section-refs/bibliography.bib @@ -0,0 +1,70 @@ +@BOOK{ainsworth:sheppard, + title = {Jack Sheppard: A Romance}, + author = {William Harrison Ainsworth}, + address = {London}, + publisher = {George Routledge \& Sons}, + year = {1900}, + shorttitle = {Jack Sheppard}, +} + +@Article{altick:aldine, + author = {Richard D. Altick}, + title = {From Aldine to Everyman: Cheap Reprint Series of the + English Classics 1830--1906}, + journal = {Studies in Bibliography}, + year = 1958, + volume = 11, + pages = {3--24} +} + +@BOOK{cohen:jokes, + title = {Jokes: Philosophical Thoughts on Joking Matters}, + publisher = {University of Chicago Press}, + year = 1999, + author = {Ted Cohen}, + address = {Chicago}, + shorttitle = {Jokes} +} + +@Book{dames:physiology, + author = {Nicholas Dames}, + title = {The Physiology of the Novel: Reading, Neural + Science, and the Form of Victorian Fiction}, + publisher = {Oxford University Press}, + year = 2007, + address = {Oxford}, + shorttitle = {Physiology} +} + +@Book{kant:critique2, + author = {Immanuel Kant}, + editor = {Mary Gregor}, + translator = {Mary Gregor}, + title = {Critique of Practical Reason}, + publisher = {Cambridge University Press}, + year = 2001, + address = {Cambridge, UK}, + shorttitle = {Practical} +} + +@Book{lukacs:european, + author = {Georg Luk{\'a}cs}, + title = {Studies in European Realism: A Sociological Survey + of the Writings of Balzac, Stendhal, Zola, Tolstoy, + Gorki, and Others}, + publisher = {The Merlin Press}, + year = 1989, + translator = {Edith Bone}, + address = {London}, + shorttitle = {Studies} +} + +@Book{trollope:autobiography, + author = {Anthony Trollope}, + editor = {Michael Sadleir and Frederick Page}, + title = {An Autobiography}, + publisher = {Oxford University Press}, + year = 1999, + address = {Oxford}, + origdate = 1883 +} diff --git a/paper/lua-filters/section-refs/expected_default.native b/paper/lua-filters/section-refs/expected_default.native new file mode 100644 index 0000000..b1c6945 --- /dev/null +++ b/paper/lua-filters/section-refs/expected_default.native @@ -0,0 +1,25 @@ +[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"] +,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."] +,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."] +,Div ("refs-1",["references"],[]) + [Div ("ref-ainsworth:sheppard",[],[]) + [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]] + ,Div ("ref-altick:aldine",[],[]) + [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]] + ,Div ("ref-dames:physiology",[],[]) + [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]] + ,Div ("ref-dames:physiology",[],[]) + [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]] + ,Div ("ref-kant:critique2",[],[]) + [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]] +,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."] +,Div ("refs-2",["references"],[]) + [Div ("ref-cohen:jokes",[],[]) + [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]] + ,Div ("ref-lukacs:european",[],[]) + [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]] + ,Div ("ref-trollope:autobiography",[],[]) + [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]] diff --git a/paper/lua-filters/section-refs/expected_no_citeproc.native b/paper/lua-filters/section-refs/expected_no_citeproc.native new file mode 100644 index 0000000..2219b6e --- /dev/null +++ b/paper/lua-filters/section-refs/expected_no_citeproc.native @@ -0,0 +1,7 @@ +[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"] +,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@ainsworth:sheppard",Space,Str "27]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@dames:physiology]"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@kant:critique2",Space,Str "29]"],Str "."] +,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@altick:aldine",Space,Str "20]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@dames:physiology]"],Str "."] +,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@lukacs:european",Space,Str "125]"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@cohen:jokes",Space,Str "3]"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@trollope:autobiography",Space,Str "392]"],Str "."]] diff --git a/paper/lua-filters/section-refs/expected_refs_name.native b/paper/lua-filters/section-refs/expected_refs_name.native new file mode 100644 index 0000000..0d026ed --- /dev/null +++ b/paper/lua-filters/section-refs/expected_refs_name.native @@ -0,0 +1,27 @@ +[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"] +,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."] +,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."] +,Div ("refs-1",["references"],[]) + [Header 2 ("bibliography-1",["unnumbered"],[]) [Str "Works",Space,Str "Cited"] + ,Div ("ref-ainsworth:sheppard",[],[]) + [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]] + ,Div ("ref-altick:aldine",[],[]) + [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]] + ,Div ("ref-dames:physiology",[],[]) + [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]] + ,Div ("ref-dames:physiology",[],[]) + [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]] + ,Div ("ref-kant:critique2",[],[]) + [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]] +,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."] +,Div ("refs-2",["references"],[]) + [Header 2 ("bibliography-2",["unnumbered"],[]) [Str "Works",Space,Str "Cited"] + ,Div ("ref-cohen:jokes",[],[]) + [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]] + ,Div ("ref-lukacs:european",[],[]) + [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]] + ,Div ("ref-trollope:autobiography",[],[]) + [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]] diff --git a/paper/lua-filters/section-refs/expected_section_level.native b/paper/lua-filters/section-refs/expected_section_level.native new file mode 100644 index 0000000..1d3c89e --- /dev/null +++ b/paper/lua-filters/section-refs/expected_section_level.native @@ -0,0 +1,31 @@ +[Header 1 ("here-is-one-section",[],[]) [Str "Here",Space,Str "is",Space,Str "one",Space,Str "section"] +,Div ("refs-1",["references"],[]) + [Header 3 ("bibliography-1",["unnumbered"],[]) [Str "Works",Space,Str "Cited"]] +,Header 2 ("a-subsection",[],[]) [Str "A",Space,Str "subsection"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "ainsworth:sheppard", citationPrefix = [], citationSuffix = [Space,Str "27"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 1}] [Str "(Ainsworth",Space,Str "1900,",Space,Str "27)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 2}] [Str "(Dames",Space,Str "2007)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "kant:critique2", citationPrefix = [], citationSuffix = [Space,Str "29"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 3}] [Str "(Kant",Space,Str "2001,",Space,Str "29)"],Str "."] +,Div ("refs-2",["references"],[]) + [Header 3 ("bibliography-2",["unnumbered"],[]) [Str "Works",Space,Str "Cited"] + ,Div ("ref-ainsworth:sheppard",[],[]) + [Para [Str "Ainsworth,",Space,Str "William",Space,Str "Harrison.",Space,Str "1900.",Space,Emph [Str "Jack",Space,Str "Sheppard:",Space,Str "A",Space,Str "Romance"],Str ".",Space,Str "London:",Space,Str "George",Space,Str "Routledge",Space,Str "&",Space,Str "Sons."]] + ,Div ("ref-dames:physiology",[],[]) + [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]] + ,Div ("ref-kant:critique2",[],[]) + [Para [Str "Kant,",Space,Str "Immanuel.",Space,Str "2001.",Space,Emph [Str "Critique",Space,Str "of",Space,Str "Practical",Space,Str "Reason"],Str ".",Space,Str "Edited",Space,Str "and",Space,Str "translated",Space,Str "by",Space,Str "Mary",Space,Str "Gregor.",Space,Str "Cambridge,",Space,Str "UK:",Space,Str "Cambridge",Space,Str "University",Space,Str "Press."]]] +,Header 2 ("another-subsection.",[],[]) [Str "Another",Space,Str "subsection."] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "altick:aldine", citationPrefix = [], citationSuffix = [Space,Str "20"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 4}] [Str "(Altick",Space,Str "1958,",Space,Str "20)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "repeated",SoftBreak,Cite [Citation {citationId = "dames:physiology", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 5}] [Str "(Dames",Space,Str "2007)"],Str "."] +,Div ("refs-3",["references"],[]) + [Header 3 ("bibliography-3",["unnumbered"],[]) [Str "Works",Space,Str "Cited"] + ,Div ("ref-altick:aldine",[],[]) + [Para [Str "Altick,",Space,Str "Richard",Space,Str "D.",Space,Str "1958.",Space,Str "\8220From",Space,Str "Aldine",Space,Str "to",Space,Str "Everyman:",Space,Str "Cheap",Space,Str "Reprint",Space,Str "Series",Space,Str "of",Space,Str "the",Space,Str "English",Space,Str "Classics",Space,Str "1830\8211\&1906.\8221",Space,Emph [Str "Studies",Space,Str "in",Space,Str "Bibliography"],Space,Str "11:",Space,Str "3\8211\&24."]] + ,Div ("ref-dames:physiology",[],[]) + [Para [Str "Dames,",Space,Str "Nicholas.",Space,Str "2007.",Space,Emph [Str "The",Space,Str "Physiology",Space,Str "of",Space,Str "the",Space,Str "Novel:",Space,Str "Reading,",Space,Str "Neural",Space,Str "Science,",Space,Str "and",Space,Str "the",Space,Str "Form",Space,Str "of",Space,Str "Victorian",Space,Str "Fiction"],Str ".",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]] +,Header 1 ("here-is-another-section",[],[]) [Str "Here",Space,Str "is",Space,Str "another",Space,Str "section"] +,Para [Str "Here",Space,Str "is",Space,Str "something",Space,Cite [Citation {citationId = "lukacs:european", citationPrefix = [], citationSuffix = [Space,Str "125"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 6}] [Str "(Luk",Str "\225cs",Space,Str "1989,",Space,Str "125)"],Str ".",Space,Str "And",Space,Str "here",Space,Str "is",Space,Str "something",Space,Str "else",SoftBreak,Cite [Citation {citationId = "cohen:jokes", citationPrefix = [], citationSuffix = [Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 7}] [Str "(Cohen",Space,Str "1999,",Space,Str "3)"],Str ".",Space,Str "Finally,",Space,Str "we",Space,Str "want",Space,Str "to",Space,Str "make",Space,Str "sure",Space,Str "that",Space,Str "we",Space,Str "have",Space,Str "one",Space,Str "last",SoftBreak,Str "citation",Space,Str "here",Space,Cite [Citation {citationId = "trollope:autobiography", citationPrefix = [], citationSuffix = [Space,Str "392"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 8}] [Str "(Trollope",Space,Str "[1883]",Space,Str "1999,",Space,Str "392)"],Str "."] +,Div ("refs-4",["references"],[]) + [Header 3 ("bibliography-4",["unnumbered"],[]) [Str "Works",Space,Str "Cited"] + ,Div ("ref-cohen:jokes",[],[]) + [Para [Str "Cohen,",Space,Str "Ted.",Space,Str "1999.",Space,Emph [Str "Jokes:",Space,Str "Philosophical",Space,Str "Thoughts",Space,Str "on",Space,Str "Joking",Space,Str "Matters"],Str ".",Space,Str "Chicago:",Space,Str "University",Space,Str "of",Space,Str "Chicago",Space,Str "Press."]] + ,Div ("ref-lukacs:european",[],[]) + [Para [Str "Luk",Str "\225cs,",Space,Str "Georg.",Space,Str "1989.",Space,Emph [Str "Studies",Space,Str "in",Space,Str "European",Space,Str "Realism:",Space,Str "A",Space,Str "Sociological",Space,Str "Survey",Space,Str "of",Space,Str "the",Space,Str "Writings",Space,Str "of",Space,Str "Balzac,",Space,Str "Stendhal,",Space,Str "Zola,",Space,Str "Tolstoy,",Space,Str "Gorki,",Space,Str "and",Space,Str "Others"],Str ".",Space,Str "Translated",Space,Str "by",Space,Str "Edith",Space,Str "Bone.",Space,Str "London:",Space,Str "The",Space,Str "Merlin",Space,Str "Press."]] + ,Div ("ref-trollope:autobiography",[],[]) + [Para [Str "Trollope,",Space,Str "Anthony.",Space,Str "(1883)",Space,Str "1999.",Space,Emph [Str "An",Space,Str "Autobiography"],Str ".",Space,Str "Edited",Space,Str "by",Space,Str "Michael",Space,Str "Sadleir",Space,Str "and",Space,Str "Frederick",Space,Str "Page.",Space,Str "Oxford:",Space,Str "Oxford",Space,Str "University",Space,Str "Press."]]]] diff --git a/paper/lua-filters/section-refs/sample.md b/paper/lua-filters/section-refs/sample.md new file mode 100644 index 0000000..00ddb86 --- /dev/null +++ b/paper/lua-filters/section-refs/sample.md @@ -0,0 +1,18 @@ +# Here is one section + +## A subsection + +Here is something [@ainsworth:sheppard 27]. And here is something else +[@dames:physiology]. Finally, we want to make sure that we have one last +citation here [@kant:critique2 29]. + +## Another subsection. + +Here is something [@altick:aldine 20]. And here is something repeated +[@dames:physiology]. + +# Here is another section + +Here is something [@lukacs:european 125]. And here is something else +[@cohen:jokes 3]. Finally, we want to make sure that we have one last +citation here [@trollope:autobiography 392]. diff --git a/paper/lua-filters/section-refs/section-refs.lua b/paper/lua-filters/section-refs/section-refs.lua new file mode 100644 index 0000000..68e61d0 --- /dev/null +++ b/paper/lua-filters/section-refs/section-refs.lua @@ -0,0 +1,138 @@ +function is_ref_div (blk) + return (blk.t == "Div" and blk.identifier == "refs") +end + +function is_ref_header (blk) + return (blk.t == "Header" and blk.identifier == "bibliography") +end + +function get_all_refs (blks) + for _, b in pairs(blks) do + if is_ref_div(b) then + return b.content + end + end +end + +function remove_all_refs (blks) + local out = {} + for _, b in pairs(blks) do + if not (is_ref_div(b) or is_ref_header(b)) then + table.insert(out, b) + end + end + return out +end + +-- We return a {number, ref} pair so we can sort in the individual +-- bibliographies. +function citation_to_numbered_ref (citation, all_refs) + local div_id = "ref-" .. citation.id + for i, d in ipairs(all_refs) do + if d.t == "Div" and d.identifier == div_id then + return {i, d} + end + end +end + + +function get_partial_refs (blocks, all_refs) + local cites = {} + local citegetter = { + Cite = function (el) + for _, c in pairs(el.citations) do + table.insert(cites, c) + end + end + } + + for _, b in pairs(blocks) do + pandoc.walk_block(b, citegetter) + end + + + -- first we make a list of the {number, ref} pairs so we can sort + -- them. Then after sorting, we're going to make a new list with + -- only the second element. + local numbered_refs = {} + for _, c in pairs(cites) do + local r = citation_to_numbered_ref(c, all_refs) + if r then + table.insert(numbered_refs, r) + end + end + + table.sort(numbered_refs, function(x, y) return x[1] < y[1] end) + + local refs = {} + for _, nr in pairs(numbered_refs) do + table.insert(refs, nr[2]) + end + + return refs +end + +function add_section_refs (blks, lvl, refs_title, all_refs) + local output_blks = {} + local section = {} + local refs_num = 0 + + local go = function () + refs_num = refs_num + 1 + local section_refs = get_partial_refs(section, all_refs) + if refs_title then + local hdr = pandoc.Header(lvl + 1, + refs_title, + pandoc.Attr("bibliography-" .. tostring(refs_num), + {"unnumbered"})) + table.insert(section_refs, 1, hdr) + end + local refs_div = pandoc.Div(section_refs, + pandoc.Attr("refs-" .. tostring(refs_num), + {"references"})) + table.insert(section, refs_div) + for _, x in pairs(section) do + table.insert(output_blks, x) + end + end + + -- to avoid putting a bib after an intro paragraph. + local seen_hdr_before = false + for _, b in pairs(blks) do + if b.t == "Header" and b.level <= lvl then + if seen_hdr_before then + go() + section = {b} + else + seen_hdr_before = true + table.insert(section, b) + end + else + table.insert(section, b) + end + end + go() + return output_blks +end + +function Pandoc(doc) + if PANDOC_VERSION == nil then -- if pandoc_version < 2.1 + io.stderr:write("WARNING: pandoc >= 2.1 required for section-refs filter\n") + return doc + end + local refs_title = doc.meta["reference-section-title"] + -- if we get it from a command-line field, read it in as md. + if type(refs_title) == "string" then + refs_title = pandoc.read(refs_title, "markdown").blocks[1].content + end + local lvl = tonumber(doc.meta["section-refs-level"]) or 1 + local all_refs = get_all_refs(doc.blocks) + -- we only want to do something if there are refs to work + -- with. This way, if this is run without pandoc-citeproc, it will + -- just return the same document. + if all_refs then + local unreffed = remove_all_refs(doc.blocks) + local output = add_section_refs(unreffed, lvl, refs_title, all_refs) + return pandoc.Pandoc(output, doc.meta) + end +end diff --git a/paper/lua-filters/short-captions/Makefile b/paper/lua-filters/short-captions/Makefile new file mode 100644 index 0000000..756cf2b --- /dev/null +++ b/paper/lua-filters/short-captions/Makefile @@ -0,0 +1,18 @@ +LF = --lua-filter=short-captions.lua +F = -F pandoc-crossref + +test: + @pandoc $(LF) README.md -w latex | diff --strip-trailing-cr expected-1.tex - + +crossref: + @echo testing short-captions.lua after pandoc-crossref + @pandoc $(F) $(LF) README.md -w latex | diff --strip-trailing-cr expected-2.tex - + @echo testing short-captions.lua before pandoc-crossref + @pandoc $(LF) $(F) README.md -w latex | diff --strip-trailing-cr expected-2.tex - + +readme: + @pandoc -s $(F) $(LF) README.md -o README.pdf + +latex: + @pandoc -s $(F) $(LF) README.md -o README.tex + diff --git a/paper/lua-filters/short-captions/README.md b/paper/lua-filters/short-captions/README.md new file mode 100644 index 0000000..2721004 --- /dev/null +++ b/paper/lua-filters/short-captions/README.md @@ -0,0 +1,52 @@ +--- +title: "short-captions.lua" +lof: true +--- + +# Short captions in \LaTeX\ output + +For latex output, this filter uses the attribute `short-caption` for +figures so that the attribute value appears in the List of Figures, if +one is desired. + +# Usage + +Where you would have a figure in, say, markdown as + + ![The caption](foo.png ) + +You can now specify the figure as + + ![The long caption](foo.png){short-caption="a short caption"} + +If the document metadata includes `lof:true`, then the List of Figures +will use the short caption. This is particularly useful for students +writing dissertations, who often have to include a List of Figures in +the front matter, but where figure captions themselves can be quite +lengthy. + + pandoc --lua-filter=short-captions.lua article.md -o article.tex + + pandoc --lua-filter=short-captions.lua article.md -o article.pdf + + + +# Example + +@Fig:shortcap is an interesting figure with a long caption, but a short +caption in the List of Figures. + +![This is an *extremely* interesting figure that has a lot of detail I +will need to describe in a few sentences. This figure has a short +caption that will appear in the list of figures. Other attributes are +preserved](fig.pdf){#fig:shortcap short-caption="A short caption with +math $x^n + y^n = z^n$" width="50%"} + + +# Limitations + +- The filter will process the `short-caption` attribute value as pandoc + markdown, regardless of the input format. +- It does not work for tables and listings yet. +- But it works with pandoc-crossref, regardless of the order of + application. diff --git a/paper/lua-filters/short-captions/expected-1.tex b/paper/lua-filters/short-captions/expected-1.tex new file mode 100644 index 0000000..a28bb7e --- /dev/null +++ b/paper/lua-filters/short-captions/expected-1.tex @@ -0,0 +1,67 @@ +\hypertarget{short-captions-in-output}{% +\section{\texorpdfstring{Short captions in +\LaTeX~output}{Short captions in ~output}}\label{short-captions-in-output}} + +For latex output, this filter uses the attribute \texttt{short-caption} +for figures so that the attribute value appears in the List of Figures, +if one is desired. + +\hypertarget{usage}{% +\section{Usage}\label{usage}} + +Where you would have a figure in, say, markdown as + +\begin{verbatim} +![The caption](foo.png ) +\end{verbatim} + +You can now specify the figure as + +\begin{verbatim} +![The long caption](foo.png){short-caption="a short caption"} +\end{verbatim} + +If the document metadata includes \texttt{lof:true}, then the List of +Figures will use the short caption. This is particularly useful for +students writing dissertations, who often have to include a List of +Figures in the front matter, but where figure captions themselves can be +quite lengthy. + +\begin{verbatim} +pandoc --lua-filter=short-captions.lua article.md -o article.tex + +pandoc --lua-filter=short-captions.lua article.md -o article.pdf +\end{verbatim} + +\hypertarget{example}{% +\section{Example}\label{example}} + +@Fig:shortcap is an interesting figure with a long caption, but a short +caption in the List of Figures. + +\hypertarget{fig:shortcap}{% +\begin{figure} +\centering +\includegraphics[width=0.5\textwidth,height=\textheight]{fig.pdf} +\caption[{A short caption with math \(x^n + y^n = z^n\)}]{This is an +\emph{extremely} interesting figure that has a lot of detail I will need +to describe in a few sentences. This figure has a short caption that +will appear in the list of figures. Other attributes are preserved} +\label{fig:shortcap} +\end{figure} +} + +\hypertarget{limitations}{% +\section{Limitations}\label{limitations}} + +\begin{itemize} +\tightlist +\item + The filter will process the \texttt{short-caption} attribute value as + pandoc markdown, regardless of the input format. +\item + It does not work for tables and listings yet. +\item + But it works with pandoc-crossref, regardless of the order of + application. +\end{itemize} diff --git a/paper/lua-filters/short-captions/expected-2.tex b/paper/lua-filters/short-captions/expected-2.tex new file mode 100644 index 0000000..95444b3 --- /dev/null +++ b/paper/lua-filters/short-captions/expected-2.tex @@ -0,0 +1,67 @@ +\hypertarget{short-captions-in-output}{% +\section{\texorpdfstring{Short captions in +\LaTeX~output}{Short captions in ~output}}\label{short-captions-in-output}} + +For latex output, this filter uses the attribute \texttt{short-caption} +for figures so that the attribute value appears in the List of Figures, +if one is desired. + +\hypertarget{usage}{% +\section{Usage}\label{usage}} + +Where you would have a figure in, say, markdown as + +\begin{verbatim} +![The caption](foo.png ) +\end{verbatim} + +You can now specify the figure as + +\begin{verbatim} +![The long caption](foo.png){short-caption="a short caption"} +\end{verbatim} + +If the document metadata includes \texttt{lof:true}, then the List of +Figures will use the short caption. This is particularly useful for +students writing dissertations, who often have to include a List of +Figures in the front matter, but where figure captions themselves can be +quite lengthy. + +\begin{verbatim} +pandoc --lua-filter=short-captions.lua article.md -o article.tex + +pandoc --lua-filter=short-captions.lua article.md -o article.pdf +\end{verbatim} + +\hypertarget{example}{% +\section{Example}\label{example}} + +Fig.~\ref{fig:shortcap} is an interesting figure with a long caption, +but a short caption in the List of Figures. + +\hypertarget{fig:shortcap}{% +\begin{figure} +\centering +\includegraphics[width=0.5\textwidth,height=\textheight]{fig.pdf} +\caption[{A short caption with math \(x^n + y^n = z^n\)}]{This is an +\emph{extremely} interesting figure that has a lot of detail I will need +to describe in a few sentences. This figure has a short caption that +will appear in the list of figures. Other attributes are preserved} +\label{fig:shortcap} +\end{figure} +} + +\hypertarget{limitations}{% +\section{Limitations}\label{limitations}} + +\begin{itemize} +\tightlist +\item + The filter will process the \texttt{short-caption} attribute value as + pandoc markdown, regardless of the input format. +\item + It does not work for tables and listings yet. +\item + But it works with pandoc-crossref, regardless of the order of + application. +\end{itemize} diff --git a/paper/lua-filters/short-captions/fig.pdf b/paper/lua-filters/short-captions/fig.pdf Binary files differnew file mode 100644 index 0000000..cac7f39 --- /dev/null +++ b/paper/lua-filters/short-captions/fig.pdf diff --git a/paper/lua-filters/short-captions/short-captions.lua b/paper/lua-filters/short-captions/short-captions.lua new file mode 100644 index 0000000..9aaf309 --- /dev/null +++ b/paper/lua-filters/short-captions/short-captions.lua @@ -0,0 +1,37 @@ +if FORMAT ~= "latex" then + return +end + +local function latex(str) + return pandoc.RawInline('latex', str) +end + +function figure_image (elem) + local image = elem.content and elem.content[1] + return (image.t == 'Image' and image.title == 'fig:') + and image + or nil +end + +function Para (para) + local img = figure_image(para) + if not img or not img.caption or not img.attributes['short-caption'] then + return nil + end + + local short_caption = pandoc.Span( + pandoc.read(img.attributes['short-caption']).blocks[1].c + ) + local hypertarget = "{%%\n" + local label = "\n" + if img.identifier ~= img.title then + hypertarget = string.format("\\hypertarget{%s}{%%\n",img.identifier) + label = string.format("\n\\label{%s}",img.identifier) + end + return pandoc.Para { + latex(hypertarget .. "\\begin{figure}\n\\centering\n"), + img, + latex("\n\\caption["), short_caption, latex("]"), pandoc.Span(img.caption), + latex(label .."\n\\end{figure}\n}\n") + } +end diff --git a/paper/lua-filters/spellcheck/Makefile b/paper/lua-filters/spellcheck/Makefile new file mode 100644 index 0000000..9d51bff --- /dev/null +++ b/paper/lua-filters/spellcheck/Makefile @@ -0,0 +1,2 @@ +test: + @pandoc --lua-filter=spellcheck.lua sample.md | sort | diff --strip-trailing-cr -u expected.txt - diff --git a/paper/lua-filters/spellcheck/README.md b/paper/lua-filters/spellcheck/README.md new file mode 100644 index 0000000..5f5d6a3 --- /dev/null +++ b/paper/lua-filters/spellcheck/README.md @@ -0,0 +1,42 @@ +# spellcheck + +This filter checks the spelling of words in the body of the +document (omitting metadata). The external program `aspell` is +used for the checking, and must be present in the path. + +Why use this instead of just running `aspell` on the +document's source? Because this filter is sensitive to +the semantics of the document in ways that `aspell` is +not: + +- Material in code spans, raw HTML, URLs in links, + and math is not spell-checked, eliminating a big + class of false positives. + +- The filter is sensitive to the `lang` specified in + the document's metadata; this will be treated as the + default language for the document. + +- It is also sensitive to `lang` attributes on native + divs and spans. Thus, for example, in an English + document, `[chevaux]{lang=fr}` will not be registered + as a spelling error. + +To run it, + + pandoc --lua-filter spellcheck.lua sample.md + +A list of misspelled words (or at any rate, words not +in the appropriate dictionary) will be printed to stdout. +If the word is in a div or span with a non-default `lang` +attribute, the relevant language will be indicated in +brackets after the word, separated by a tab. + +To add words to the list for a language, you can add files +with names `.aspell.LANG.pws` in your home directory. Example: + +``` +% cat ~/.aspell.en.pws +personal_ws-1.1 en 0 +goopy +``` diff --git a/paper/lua-filters/spellcheck/expected.txt b/paper/lua-filters/spellcheck/expected.txt new file mode 100644 index 0000000..dd973c8 --- /dev/null +++ b/paper/lua-filters/spellcheck/expected.txt @@ -0,0 +1,2 @@ +missspeling [en] +summer diff --git a/paper/lua-filters/spellcheck/sample.md b/paper/lua-filters/spellcheck/sample.md new file mode 100644 index 0000000..31f7834 --- /dev/null +++ b/paper/lua-filters/spellcheck/sample.md @@ -0,0 +1,15 @@ +--- +lang: fr-FR +... + +Ces sont des mots français. +Mais pas summer. + +[This is a sentence in English, +with one missspeling.]{lang=en} + +::: {lang=en} +Here's a div in English. +Code is ignored: `baoeuthasoe`{.nolang}. +So are [URLs](http://example.com/notaword). +::: diff --git a/paper/lua-filters/spellcheck/spellcheck.lua b/paper/lua-filters/spellcheck/spellcheck.lua new file mode 100644 index 0000000..85ae281 --- /dev/null +++ b/paper/lua-filters/spellcheck/spellcheck.lua @@ -0,0 +1,70 @@ +-- lua filter for spell checking: requires 'aspell'. +-- Copyright (C) 2017-2019 John MacFarlane, released under MIT license + +local text = require('text') +local words = {} +local deflang + +local function add_to_dict(lang, t) + if not words[lang] then + words[lang] = {} + end + if not words[lang][t] then + words[lang][t] = (words[lang][t] or 0) + 1 + end +end + +local function get_deflang(meta) + deflang = (meta.lang and meta.lang[1] and meta.lang[1].c) or 'en' + -- the following is better but won't work in pandoc 2.0.6. + -- it requires pandoc commit ecc46e229fde934f163d1f646383d24bfe2039e1: + -- deflang = (meta.lang and pandoc.utils.stringify(meta.lang)) or 'en' + return {} -- eliminate meta so it doesn't get spellchecked +end + +local function run_spellcheck(lang) + local keys = {} + local wordlist = words[lang] + for k,_ in pairs(wordlist) do + keys[#keys + 1] = k + end + local inp = table.concat(keys, '\n') + local outp = pandoc.pipe('aspell', {'list','-l',lang}, inp) + for w in string.gmatch(outp, "(%a*)\n") do + io.write(w) + if lang ~= deflang then + io.write("\t[" .. lang .. "]") + end + io.write("\n") + end +end + +local function results(el) + pandoc.walk_block(pandoc.Div(el.blocks), {Str = function(e) add_to_dict(deflang, e.text) end}) + for lang,v in pairs(words) do + run_spellcheck(lang) + end + os.exit(0) +end + +local function checkstr(el) + add_to_dict(deflang, el.text) +end + +local function checkspan(el) + local lang = el.attributes.lang + if not lang then return nil end + pandoc.walk_inline(el, {Str = function(e) add_to_dict(lang, e.text) end}) + return {} -- remove span, so it doesn't get checked again +end + +local function checkdiv(el) + local lang = el.attributes.lang + if not lang then return nil end + pandoc.walk_block(el, {Str = function(e) add_to_dict(lang, e.text) end}) + return {} -- remove div, so it doesn't get checked again +end + +return {{Meta = get_deflang}, + {Div = checkdiv, Span = checkspan}, + {Str = function(e) add_to_dict(deflang, e.text) end, Pandoc = results}} diff --git a/paper/lua-filters/table-short-captions/Makefile b/paper/lua-filters/table-short-captions/Makefile new file mode 100644 index 0000000..3df47b1 --- /dev/null +++ b/paper/lua-filters/table-short-captions/Makefile @@ -0,0 +1,24 @@ +LF = --lua-filter=table-short-captions.lua +F = -F pandoc-crossref + +test: sample.md + @pandoc -s $(LF) -t native $< | \ + diff -u expected-sample.native - + +test-with-crossref: sample.md + @pandoc -s $(LF) $(F) -t latex $< | \ + diff -u expected-sample.tex - + +README.pdf: README.md + @pandoc $(LF) $(F) $< -o $@ + +sample.tex: sample.md + @pandoc -s $(LF) $(F) -t latex $< -o $@ + +sample.pdf: sample.md + @pandoc -s $(LF) $(F) -t latex $< -o $@ + +clean: + rm -v *.aux *.dvi *.fdb_latexmk *.fls *.log *.lot *.ps *.pdf sample.tex | true + +.PHONY: test test-with-crossref clean diff --git a/paper/lua-filters/table-short-captions/README.md b/paper/lua-filters/table-short-captions/README.md new file mode 100644 index 0000000..853b809 --- /dev/null +++ b/paper/lua-filters/table-short-captions/README.md @@ -0,0 +1,66 @@ +--- +title: "table-short-captions.lua" +--- + +# Short captions in \LaTeX\ tables output + +For LaTeX output, this filter enables use of the attribute +`short-caption` for tables. The attribute value will appear in the List +of Tables. + +This filter also enables the class `.unlisted` for tables. This will +prevent the table caption from appearing in the List of Tables. + +# Usage + +In Pandoc Markdown, you can add a caption to a table with + + Table: This is the *italicised long caption* of my table, which has + a very long caption. + +If the document metadata includes `lot:true`, then the List of Tables +will be inserted at the beginning of the document. + +The [pandoc-crossref](http://lierdakil.github.io/pandoc-crossref/) +filter extends this, and enables you to specify a custom label for the +table. + + Table: This is the *italicised long caption* of my table, which has + a very long caption. {#tbl:full-of-juicy-data} + +This filter, when run _before_ pandoc-crossref, allows you to add short +captions to the table as a `short-caption` attribute. What is between +the quotes will be parsed as Markdown. + +**Important!:** You _must_ use empty square brackets before the +attributes tag. + + Table: This is the *italicised long caption* of my table, which has + a very long caption. + []{#tbl:full-of-juicy-data short-caption="Short caption for *juicy* data table."} + +Alternatively, if you wish to create a table which is unlisted in the +List of Tables, you can use the `.unlisted` class in the attributes tag. + + Table: This is the *italicised long caption* of my table, which will + not appear in the List of Tables. []{#tbl:full-of-juicy-data .unlisted} + +This filter should prove useful for students writing dissertations, who +often have to include a List of Tables in the front matter, but where +table captions themselves can be quite lengthy. + + pandoc --lua-filter=table-short-captions.lua \ + --filter pandoc-crossref \ + article.md -o article.tex + + pandoc --lua-filter=table-short-captions.lua \ + --filter pandoc-crossref \ + article.md -o article.pdf + + +# Limitations + +- The filter will process the `short-caption` attribute value as pandoc + markdown, regardless of the input format. +- pandoc-crossref should be run after it. +- I have only tested this from a Markdown source. diff --git a/paper/lua-filters/table-short-captions/expected-sample.native b/paper/lua-filters/table-short-captions/expected-sample.native new file mode 100644 index 0000000..c387ebe --- /dev/null +++ b/paper/lua-filters/table-short-captions/expected-sample.native @@ -0,0 +1,68 @@ +Pandoc (Meta {unMeta = fromList [("lot",MetaBool True),("title",MetaInlines [Str "Tests",Space,Str "for",Space,Str "table-short-captions.lua"])]}) +[Para [Str "These",Space,Str "tests",Space,Str "are",Space,Str "written",Space,Str "so",Space,Str "that",Space,Str "if",Space,Strong [Str "bold",Space,Str "font"],Space,Str "appears",Space,Str "in",Space,Str "the",Space,Str "LOT,",Space,Str "something",Space,Str "is",Space,Str "wrong."] +,Para [Str "The",Space,Str "tests",Space,Str "are",Space,Str "split",Space,Str "into",Space,Str "two:",Space,Str "expected",Space,Str "uses,",Space,Str "and",Space,Str "non-standard",Space,Str "uses/errors.",LineBreak,Str "The",Space,Str "non-standard",Space,Str "uses",Space,Str "are",Space,Str "presented",Space,Str "in",Space,Str "this",Space,Str "document",Space,Str "for",Space,Str "troubleshooting",Space,Str "purposes,",Space,Str "and",Space,Str "to",Space,Str "ensure",Space,Str "the",Space,Str "filter",Space,Str "doesn\8217t",Space,Str "crash",Space,Str "in",Space,Str "corner",Space,Str "cases."] +,Header 1 ("standard-usage",[],[]) [Str "Standard",Space,Str "usage"] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl1,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label."] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl2,",Space,Str "in",Space,Str "standard",Space,Code ("",[],[]) "pandoc-crossref",Space,Str "form.",Space,Str "{#tbl:tbl-label2}"] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl3,",Space,Str "which",Space,Str "is",Space,Strong [Str "unlisted"],Str ".",Space,Span ("tbl:tbl-label3",["unlisted"],[]) []] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl4,",Space,Str "which",Space,Str "has",Space,Str "an",Space,Strong [Str "overriding"],Space,Str "short-caption.",Space,Str "This",Space,Str "is",Space,Str "the",Space,Str "expected",Space,Str "usage.",Space,Span ("tbl:tbl-label4",[],[("short-caption","Table 4 *short* capt.")]) []] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Header 1 ("non-standard-usageerrors",[],[]) [Str "Non-standard",Space,Str "usage/errors"] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl5,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label,",Space,Str "but",Space,Str "does",Space,Str "have",Space,Str "empty",Space,Str "braces",Space,Str "at",Space,Str "the",Space,Str "end.",Space,Str "{}"] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl6,",Space,Str "which",Space,Str "does",Space,Str "not",Space,Str "have",Space,Str "a",Space,Str "label,",Space,Str "but",Space,Str "does",Space,Str "have",Space,Str "an",Space,Str "empty",Space,Str "span",Space,Str "at",Space,Str "the",Space,Str "end.",Space,Span ("",[],[]) []] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl7,",Space,Str "which",Space,Str "is",Space,Str "improperly",Space,Str "formatted,",Space,Str "and",Space,Str "will",Space,Str "appear",Space,Str "in",Space,Str "the",Space,Str "list",Space,Str "of",Space,Str "tables.",Space,Str "This",Space,Str "filter",Space,Str "requires",Space,Str "that",Space,Code ("",[],[]) ".unlisted",Space,Str "is",Space,Str "placed",Space,Str "in",Space,Str "a",Space,Str "span.",Space,Str "{#tbl:tbl-label7",Space,Str ".unlisted}"] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl8,",Space,Str "which",Space,Str "has",Space,Str "an",Space,Str "empty",Space,Str "short-caption.",Space,Str "An",Space,Str "empty",Space,Str "short-caption",Space,Str "does",Space,Str "nothing.",Space,Str "The",Space,Str "long",Space,Str "caption",Space,Str "will",Space,Str "still",Space,Str "be",Space,Str "used.",Space,Span ("tbl:tbl-label8",[],[("short-caption","")]) []] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]] +,Table [Str "This",Space,Str "is",Space,Str "the",Space,Emph [Str "italicised",Space,Str "long",Space,Str "caption"],Space,Str "of",Space,Str "tbl9,",Space,Str "which",Space,Str "is",Space,Strong [Str "unlisted"],Str ",",Space,Str "yet",Space,Str "has",Space,Str "a",Space,Str "short-caption.",Space,Span ("tbl:tbl-label9",["unlisted"],[("short-caption","Table 9 **unlisted** *short* capt.")]) []] [AlignDefault,AlignDefault] [0.0,0.0] + [[Plain [Str "cola"]] + ,[Plain [Str "colb"]]] + [[[Plain [Str "a1"]] + ,[Plain [Str "b1"]]] + ,[[Plain [Str "a2"]] + ,[Plain [Str "b2"]]]]] diff --git a/paper/lua-filters/table-short-captions/expected-sample.tex b/paper/lua-filters/table-short-captions/expected-sample.tex new file mode 100644 index 0000000..65a2073 --- /dev/null +++ b/paper/lua-filters/table-short-captions/expected-sample.tex @@ -0,0 +1,291 @@ +% Options for packages loaded elsewhere +\PassOptionsToPackage{unicode=true}{hyperref} +\PassOptionsToPackage{hyphens}{url} +% +\documentclass[ +]{article} +\usepackage{lmodern} +\usepackage{amssymb,amsmath} +\usepackage{ifxetex,ifluatex} +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provides euro and other symbols +\else % if luatex or xelatex + \usepackage{unicode-math} + \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +% Use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +\IfFileExists{microtype.sty}{% use microtype if available + \usepackage[]{microtype} + \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts +}{} +\makeatletter +\@ifundefined{KOMAClassName}{% if non-KOMA class + \IfFileExists{parskip.sty}{% + \usepackage{parskip} + }{% else + \setlength{\parindent}{0pt} + \setlength{\parskip}{6pt plus 2pt minus 1pt}} +}{% if KOMA class + \KOMAoptions{parskip=half}} +\makeatother +\usepackage{xcolor} +\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available +\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}} +\hypersetup{ + pdftitle={Tests for table-short-captions.lua}, + hidelinks, +} +\urlstyle{same} % disable monospaced font for URLs +\usepackage{longtable,booktabs} +% Allow footnotes in longtable head/foot +\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}} +\makesavenoteenv{longtable} +\setlength{\emergencystretch}{3em} % prevent overfull lines +\providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +\setcounter{secnumdepth}{-\maxdimen} % remove section numbering +% Redefines (sub)paragraphs to behave more like sections +\ifx\paragraph\undefined\else + \let\oldparagraph\paragraph + \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} +\fi +\ifx\subparagraph\undefined\else + \let\oldsubparagraph\subparagraph + \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} +\fi + +% Set default figure placement to htbp +\makeatletter +\def\fps@figure{htbp} +\makeatother + +% -- begin:latex-table-short-captions -- +\makeatletter\AtBeginDocument{% +\def\LT@c@ption#1[#2]#3{% % Overwrite the workhorse macro used in formatting a longtable caption. + \LT@makecaption#1\fnum@table{#3}% + \ifdefined\pandoctableshortcapt % If pandoctableshortcapt is defined (even if blank), we should override default behaviour. + \let\@tempa\pandoctableshortcapt% % (Use let, we don't want to expand pandoctableshortcapt!) + \else % If not, fall back to default behaviour + \def\@tempa{#2}% % (Use the argument in square brackets) + \fi + \ifx\@tempa\@empty\else % If @tempa is blank, no lot entry! Otherwise, @tempa becomes the lot title. + {\let\\\space + \addcontentsline{lot}{table}{\protect\numberline{\thetable}{\@tempa}}}% + \fi} +}\makeatother +% -- end:latex-table-short-captions -- +\makeatletter +\@ifpackageloaded{subfig}{}{\usepackage{subfig}} +\@ifpackageloaded{caption}{}{\usepackage{caption}} +\captionsetup[subfloat]{margin=0.5em} +\AtBeginDocument{% +\renewcommand*\figurename{Figure} +\renewcommand*\tablename{Table} +} +\AtBeginDocument{% +\renewcommand*\listfigurename{List of Figures} +\renewcommand*\listtablename{List of Tables} +} +\@ifpackageloaded{float}{}{\usepackage{float}} +\floatstyle{ruled} +\@ifundefined{c@chapter}{\newfloat{codelisting}{h}{lop}}{\newfloat{codelisting}{h}{lop}[chapter]} +\floatname{codelisting}{Listing} +\newcommand*\listoflistings{\listof{codelisting}{List of Listings}} +\makeatother + +\title{Tests for table-short-captions.lua} +\date{} + +\begin{document} +\maketitle + +\listoftables +These tests are written so that if \textbf{bold font} appears in the +LOT, something is wrong. + +The tests are split into two: expected uses, and non-standard +uses/errors.\\ +The non-standard uses are presented in this document for troubleshooting +purposes, and to ensure the filter doesn't crash in corner cases. + +\hypertarget{standard-usage}{% +\section{Standard usage}\label{standard-usage}} + +\begin{longtable}[]{@{}ll@{}} +\caption{This is the \emph{italicised long caption} of tbl1, which does +not have a label.}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\hypertarget{tbl:tbl-label2}{} +\begin{longtable}[]{@{}ll@{}} +\caption{\label{tbl:tbl-label2}This is the \emph{italicised long +caption} of tbl2, in standard \texttt{pandoc-crossref} +form.}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\def\pandoctableshortcapt{} % .unlisted + +\hypertarget{tbl:tbl-label3}{} +\begin{longtable}[]{@{}ll@{}} +\caption{\label{tbl:tbl-label3}This is the \emph{italicised long +caption} of tbl3, which is \textbf{unlisted}.}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\undef\pandoctableshortcapt + +\def\pandoctableshortcapt{Table 4 \emph{short} capt.} + +\hypertarget{tbl:tbl-label4}{} +\begin{longtable}[]{@{}ll@{}} +\caption{\label{tbl:tbl-label4}This is the \emph{italicised long +caption} of tbl4, which has an \textbf{overriding} short-caption. This +is the expected usage.}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\undef\pandoctableshortcapt + +\hypertarget{non-standard-usageerrors}{% +\section{Non-standard usage/errors}\label{non-standard-usageerrors}} + +\begin{longtable}[]{@{}ll@{}} +\caption{This is the \emph{italicised long caption} of tbl5, which does +not have a label, but does have empty braces at the end. +\{\}}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\begin{longtable}[]{@{}ll@{}} +\caption{This is the \emph{italicised long caption} of tbl6, which does +not have a label, but does have an empty span at the end. +}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\begin{longtable}[]{@{}ll@{}} +\caption{This is the \emph{italicised long caption} of tbl7, which is +improperly formatted, and will appear in the list of tables. This filter +requires that \texttt{.unlisted} is placed in a span. \{\#tbl:tbl-label7 +.unlisted\}}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\hypertarget{tbl:tbl-label8}{} +\begin{longtable}[]{@{}ll@{}} +\caption{\label{tbl:tbl-label8}This is the \emph{italicised long +caption} of tbl8, which has an empty short-caption. An empty +short-caption does nothing. The long caption will still be +used.}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\def\pandoctableshortcapt{} % .unlisted + +\hypertarget{tbl:tbl-label9}{} +\begin{longtable}[]{@{}ll@{}} +\caption{\label{tbl:tbl-label9}This is the \emph{italicised long +caption} of tbl9, which is \textbf{unlisted}, yet has a +short-caption.}\tabularnewline +\toprule +cola & colb\tabularnewline +\midrule +\endfirsthead +\toprule +cola & colb\tabularnewline +\midrule +\endhead +a1 & b1\tabularnewline +a2 & b2\tabularnewline +\bottomrule +\end{longtable} + +\undef\pandoctableshortcapt + +\end{document} diff --git a/paper/lua-filters/table-short-captions/sample.md b/paper/lua-filters/table-short-captions/sample.md new file mode 100644 index 0000000..74c27fb --- /dev/null +++ b/paper/lua-filters/table-short-captions/sample.md @@ -0,0 +1,84 @@ +--- +title: "Tests for table-short-captions.lua" +lot: true +--- + +These tests are written so that if **bold font** appears in the LOT, something is wrong. + +The tests are split into two: expected uses, and non-standard uses/errors. +The non-standard uses are presented in this document for troubleshooting purposes, and to ensure the filter doesn't crash in corner cases. + +# Standard usage + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl1, which does not have a label. + + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl2, in standard `pandoc-crossref` form. {#tbl:tbl-label2} + + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl3, which is **unlisted**. []{#tbl:tbl-label3 .unlisted} + + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl4, which has an **overriding** short-caption. This is the expected usage. []{#tbl:tbl-label4 short-caption="Table 4 *short* capt."} + + +# Non-standard usage/errors + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl5, which does not have a label, but does have empty braces at the end. {} + + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl6, which does not have a label, but does have an empty span at the end. []{} + + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl7, which is improperly formatted, and will appear in the list of tables. This filter requires that `.unlisted` is placed in a span. {#tbl:tbl-label7 .unlisted} + + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl8, which has an empty short-caption. An empty short-caption does nothing. The long caption will still be used. []{#tbl:tbl-label8 short-caption=""} + + +| cola | colb | +| ---- | ---- | +| a1 | b1 | +| a2 | b2 | + +Table: This is the *italicised long caption* of tbl9, which is **unlisted**, yet has a short-caption. []{#tbl:tbl-label9 .unlisted short-caption="Table 9 **unlisted** *short* capt."} diff --git a/paper/lua-filters/table-short-captions/table-short-captions.lua b/paper/lua-filters/table-short-captions/table-short-captions.lua new file mode 100644 index 0000000..6f4970b --- /dev/null +++ b/paper/lua-filters/table-short-captions/table-short-captions.lua @@ -0,0 +1,160 @@ +---LaTeXTableShortCapts – enable `.unlisted` and `short-caption=""` properties +-- for Pandoc conversion to LaTeX + +--[[ +Copyright (c) 2019 Blake Riley + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +]] +local List = require 'pandoc.List' + +-- don't do anything unless we target latex +if FORMAT ~= "latex" then + return {} +end + +--- Code for injection into the LaTeX header, +-- to overwrite a macro in longtable captions. +longtable_caption_mod = [[ +% -- begin:latex-table-short-captions -- +\makeatletter\AtBeginDocument{% +\def\LT@c@ption#1[#2]#3{% % Overwrite the workhorse macro used in formatting a longtable caption. + \LT@makecaption#1\fnum@table{#3}% + \@ifundefined{pandoctableshortcapt} + {\def\@tempa{#2}} % Use default behaviour: argument in square brackets + {\let\@tempa\pandoctableshortcapt} % If defined (even if blank), use to override + \ifx\@tempa\@empty\else % If @tempa is blank, no lot entry! Otherwise, @tempa becomes the lot title. + {\let\\\space + \addcontentsline{lot}{table}{\protect\numberline{\thetable}{\@tempa}}}% + \fi} +}\makeatother +% -- end:latex-table-short-captions -- +]] + +--- Creates a def shortcaption block to be placed before the table +-- @tparam ?string sc : The short-caption property value +-- @treturn Plain : The def shortcaption block +local function defshortcapt(sc) + local scblock = List:new{} + scblock:extend {pandoc.RawInline('tex', "\\def\\pandoctableshortcapt{")} + if sc then + scblock:extend (pandoc.read(sc).blocks[1].c) + end + scblock:extend {pandoc.RawInline('tex', "}")} + if not sc then + scblock:extend {pandoc.RawInline('tex', " % .unlisted")} + end + return pandoc.Plain(scblock) +end + +--- The undef shortcaption block to be placed after the table +local undefshortcapt = pandoc.RawBlock('tex', "\\let\\pandoctableshortcapt\\relax") + +--- Parses a mock "Table Attr". +-- We use the Attr of an empty Span as if it were Table Attr. +-- This function extracts what is needed to build a short-caption. +-- @tparam Attr attr : The Attr of the property Span in the table caption +-- @treturn ?string : The identifier +-- @treturn ?string : The "short-caption" property, if present. +-- @treturn bool : Whether ".unlisted" appeared in the classes +local function parse_table_attrs(attr) + -- Find label + local label = nil + if attr.identifier and (#attr.identifier > 0) then + label = attr.identifier + end + + -- Look for ".unlisted" in classes + local unlisted = false + if attr.classes:includes("unlisted") then + unlisted = true + end + + -- If not unlisted, then find the property short-caption. + local short_caption = nil + if not unlisted then + if (attr.attributes["short-caption"]) and + (#attr.attributes["short-caption"] > 0) then + short_caption = attr.attributes['short-caption'] + end + end + + return label, short_caption, unlisted +end + +--- Wraps a table with shortcaption code +-- @tparam Table tbl : The table with {}-wrapped properties in the caption +-- @treturn List[Blocks] : The table with {label} in the caption, +-- optionally wrapped in shortcaption code +function rewrite_longtable_caption(tbl) + -- Escape if there is no caption present. + if not tbl.caption then + return nil + end + + -- Try find the properties block + local is_properties_span = function (inl) + return (inl.t) and (inl.t == "Span") -- is span + and (inl.content) and (#inl.content == 0) -- is empty span + end + local propspan, idx = tbl.caption:find_if(is_properties_span) + + -- If we couldn't find properties, escape. + if not propspan then + return nil + end + + -- Otherwise, parse it all + local label, short_caption, unlisted = parse_table_attrs(propspan.attr) + + -- Excise the span from the caption + tbl.caption[idx] = nil + + -- Put label back into caption for pandoc-crossref + if label then + tbl.caption:extend {pandoc.Str("{#"..label.."}")} + end + + -- Place new table + local result = List:new{} + if short_caption or unlisted then + result:extend {defshortcapt(short_caption)} + end + result:extend {tbl} + if short_caption or unlisted then + result:extend {undefshortcapt} + end + return result +end + +--- Inserts longtable_caption_mod into the header_includes +-- @tparam Meta meta : The document metadata +-- @treturn Meta : The document metadata, with replacement LaTeX macro +-- in header_includes +function add_longtable_caption_mod(meta) + local header_includes = -- test ? a : b + (meta['header-includes'] and meta['header-includes'].t == 'MetaList') + and meta['header-includes'] + or pandoc.MetaList{meta['header-includes']} + header_includes[#header_includes + 1] = + pandoc.MetaBlocks{pandoc.RawBlock('tex', longtable_caption_mod)} + meta['header-includes'] = header_includes + return meta +end + +return { + { + Meta = add_longtable_caption_mod, + Table = rewrite_longtable_caption, + } +} diff --git a/paper/lua-filters/track-changes/.gitignore b/paper/lua-filters/track-changes/.gitignore new file mode 100644 index 0000000..5fdf006 --- /dev/null +++ b/paper/lua-filters/track-changes/.gitignore @@ -0,0 +1,2 @@ +/sample.docx +/sample.pdf diff --git a/paper/lua-filters/track-changes/Makefile b/paper/lua-filters/track-changes/Makefile new file mode 100644 index 0000000..990450e --- /dev/null +++ b/paper/lua-filters/track-changes/Makefile @@ -0,0 +1,26 @@ +.PHONY: test clean + +## PENDING: ensure that LaTeX output can be compiled to PDF. +test: sample.md test-track-changes.sh sample.pdf + @pandoc -t markdown --wrap=preserve \ + --lua-filter=track-changes.lua sample.md | \ + diff --strip-trailing-cr -u - expected_accept.markdown + @pandoc -t markdown --wrap=preserve --track-changes=reject \ + -M trackChanges:reject --lua-filter=track-changes.lua sample.md | \ + diff --strip-trailing-cr -u - expected_reject.markdown + @pandoc -s -t html --wrap=preserve --track-changes=all \ + -M trackChanges:all --lua-filter=track-changes.lua sample.md | \ + diff --strip-trailing-cr -u - expected_draft.html + @pandoc -M trackChanges:all --track-changes=all --wrap=preserve \ + --to=latex --lua-filter=track-changes.lua \ + --standalone sample.md | \ + sh test-track-changes.sh + @rm sample.pdf + +sample.pdf: sample.md track-changes.lua + @pandoc -M trackChanges:all --track-changes=all \ + --lua-filter=track-changes.lua \ + --output $@ $< + +clean: + rm sample.pdf || true diff --git a/paper/lua-filters/track-changes/README.md b/paper/lua-filters/track-changes/README.md new file mode 100644 index 0000000..379b4ff --- /dev/null +++ b/paper/lua-filters/track-changes/README.md @@ -0,0 +1,18 @@ +# Tracks changes in LaTeX and HTML or removes them in other output formats + +The Pandoc Docx reader and writer supports track changes of MS Word +(command line parameter `--track-changes=accept|reject|all`). + +If `--track-changes=all` was used to read a docx file, track changes +and/or comments are included in the AST as spans and are written to any +other output formats than docx and clutters the output. + +This Lua filter addresses this problem by interpreting the parameter +`--track-changes` (pandoc version >= 2.1.1) or the metadata variable +`trackChanges: accept|reject|all` (set either in a YAML block or with +`-M`) and accepts/rejects changes and removes comments for all output +formats including docx. In case of `--track-changes=all` and for html +and latex, it converts track changings and comments to appropriate +commands (for LaTex provided by the [changes +package](https://ctan.org/pkg/changes)) and tries to mimic the +visualization as in MS Word. diff --git a/paper/lua-filters/track-changes/TODO.md b/paper/lua-filters/track-changes/TODO.md new file mode 100644 index 0000000..86290ec --- /dev/null +++ b/paper/lua-filters/track-changes/TODO.md @@ -0,0 +1,18 @@ +# Ideas and ToDos
+
+- [ ] nested comments
+- [x] comments across paragraphs
+- [ ] implement `paragraph-insertion`, `paragraph-deletion`
+- [/] implement multiple classes (see https://github.com/jgm/pandoc/issues/4270#issuecomment-358996343)
+- [x] treat comments with multiple paragraphs (see [#4270](https://github.com/jgm/pandoc/issues/4270))
+- [x] track changes in chapter titles
+- [ ] combine Strs after acceptions/rejections
+- [ ] remove track changes from automatic section identifiers
+- [ ] track changes in captions (figure, table, etc.)
+- [ ] color comments with authors color
+- [x] docx sample
+- [x] `PANDOC_READER_OPTIONS.trackChanges`
+- [x] HTML support with `<ins>`, `<del>` (requested with jgm/pandoc#1560) and `<mark>` with title attribute or spans with CSS
+- [ ] HTML track changes decorations like explaind at [Comparing and contrasting ins, del, and s](http://html5doctor.com/ins-del-s) or https://github.com/jgm/pandoc/issues/2884#issuecomment-240263921
+- [ ] auto identifiers, be careful on nested/overlapping comments
+- [ ] citations in comment text
\ No newline at end of file diff --git a/paper/lua-filters/track-changes/expected_accept.markdown b/paper/lua-filters/track-changes/expected_accept.markdown new file mode 100644 index 0000000..9207571 --- /dev/null +++ b/paper/lua-filters/track-changes/expected_accept.markdown @@ -0,0 +1,29 @@ +Track changes in LaTeX and HTML +=============================== + +A **simple** comment from me. + +This is a text with an *exciting* insertion. + +This is/was a text with a deletion. + +Here is the text to be moved. + +Here is a comment with nested changes. + +Here is a multi-line paragraph containing some text and a long deletion wrapping over two lines. + +This is a new paragraph. + +And so is this. + +One more. + +A *header* with a comment {#a-header-width-a-notecomment} +========================= + +Some unmodified text ... + +\newpage + +... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.). diff --git a/paper/lua-filters/track-changes/expected_draft.html b/paper/lua-filters/track-changes/expected_draft.html new file mode 100644 index 0000000..d5c3cc5 --- /dev/null +++ b/paper/lua-filters/track-changes/expected_draft.html @@ -0,0 +1,41 @@ +<!DOCTYPE html> +<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang=""> +<head> + <meta charset="utf-8" /> + <meta name="generator" content="pandoc" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" /> + <title>Track changes in LaTeX and HTML</title> + <style> + code{white-space: pre-wrap;} + span.smallcaps{font-variant: small-caps;} + span.underline{text-decoration: underline;} + div.column{display: inline-block; vertical-align: top; width: 50%;} + </style> + <!--[if lt IE 9]> + <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script> + <![endif]--> +</head> +<body> +<header id="title-block-header"> +<h1 class="title">Track changes in LaTeX and HTML</h1> +</header> +<nav id="TOC" role="doc-toc"> + +</nav> +<h1 id="track-changes-in-latex-and-html">Track changes in LaTeX and HTML</h1> +<p>A <mark data-author="Mathias C. Walter" date="2016-05-21T22:14:00Z" data-id="1" title="I agree!"><strong>simple</strong></mark> comment from me.</p> +<p>This is a text with <ins data-author="MCW" date="2014-06-25T10:40:00Z">an exciting</ins> insertion.</p> +<p>This is/was a text with a <del data-author="SWS" date="2014-06-25T10:42:00Z">short</del> deletion.</p> +<p><ins data-author="FKA" date="2016-04-16T08:20:00Z">Here is the text to be moved.</ins></p> +<p><del data-author="John F. Kennedy" date="2016-04-16T08:20:00Z">Here is the text to be moved.</del></p> +<p>Here is a <mark data-author="JFK" date="2016-07-29T16:50:00Z" data-id="2" title="Why?">com<ins data-author="SWS" date="2016-07-29T16:50:00Z">m</ins>ent with nest<del data-author="FKA" date="2016-04-16T08:20:00Z">t</del>ed changes</mark>.</p> +<p>Here is a multi-line paragraph containing some text and a long deletion <del data-author="MCW" date="2016-04-16T08:20:00Z">short insertion</del> wrapping over two lines.</p> +<p>This is <mark data-author="MCW" date="2016-05-09T16:13:00Z" data-id="4" title="A comment across paragraphs.">a new paragraph.</p> +<p>And so</mark> is this.</p> +<p>One <mark data-author="Jesse Rosenthal" date="2016-05-09T16:14:00Z" data-id="5" title="This one has multiple paragraphs. See?">more</mark>.</p> +<h1 id="a-header-width-a-notecomment">A <em>header</em> wi<del data-author="FKA" date="2018-03-02T23:07:00Z">d</del>th <ins data-author="JFK" date="2018-03-02T23:07:00Z">a</ins> <mark data-author="FKA" date="2017-08-24T22:14:00Z" data-id="3" title="Note">comment</mark></h1> +<p>Some unmodified text …</p> + +<p>… continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.).</p> +</body> +</html> diff --git a/paper/lua-filters/track-changes/expected_draft.tex b/paper/lua-filters/track-changes/expected_draft.tex new file mode 100644 index 0000000..87ea1ac --- /dev/null +++ b/paper/lua-filters/track-changes/expected_draft.tex @@ -0,0 +1,159 @@ +\PassOptionsToPackage{unicode=true}{hyperref} % options for packages loaded elsewhere +\PassOptionsToPackage{hyphens}{url} +% +\documentclass[ +]{article} +\usepackage{lmodern} +\usepackage{amssymb,amsmath} +\usepackage{ifxetex,ifluatex} +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provides euro and other symbols +\else % if luatex or xelatex + \usepackage{unicode-math} + \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +\IfFileExists{microtype.sty}{% use microtype if available + \usepackage[]{microtype} + \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts +}{} +\makeatletter +\@ifundefined{KOMAClassName}{% if non-KOMA class + \IfFileExists{parskip.sty}{% + \usepackage{parskip} + }{% else + \setlength{\parindent}{0pt} + \setlength{\parskip}{6pt plus 2pt minus 1pt}} +}{% if KOMA class + \KOMAoptions{parskip=half}} +\makeatother +\usepackage{xcolor} +\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available +\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}} +\hypersetup{ + pdftitle={Track changes in LaTeX and HTML}, + pdfborder={0 0 0}, + breaklinks=true} +\urlstyle{same} % don't use monospace font for urls +\setlength{\emergencystretch}{3em} % prevent overfull lines +\providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +\setcounter{secnumdepth}{-2} +% Redefines (sub)paragraphs to behave more like sections +\ifx\paragraph\undefined\else + \let\oldparagraph\paragraph + \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} +\fi +\ifx\subparagraph\undefined\else + \let\oldsubparagraph\subparagraph + \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} +\fi + +% set default figure placement to htbp +\makeatletter +\def\fps@figure{htbp} +\makeatother + +\RequirePackage[debrief]{silence} +\ErrorsOff +\usepackage{fancyhdr} +\pagestyle{fancy} +\fancyhf{} +\fancyhead[C]{\leftmark} +\usepackage[markup=underlined,authormarkup=none]{changes} +\definecolor{auth1}{HTML}{4477AA} +\definecolor{auth2}{HTML}{117733} +\definecolor{auth3}{HTML}{999933} +\definecolor{auth4}{HTML}{CC6677} +\definecolor{auth5}{HTML}{AA4499} +\definecolor{auth6}{HTML}{332288} +\usepackage[textsize=scriptsize]{todonotes} +\setlength{\marginparwidth}{3cm} +\makeatletter +\setremarkmarkup{\todo[color=Changes@Color#1!20]{\sffamily\textbf{#1:}~#2}} +\makeatother +\newcommand{\note}[2][]{\added[#1,remark={#2}]{}} +\newcommand\hlnotesingle{% + \bgroup + \expandafter\def\csname sout\space\endcsname{\bgroup \ULdepth =-.8ex \ULset}% + \markoverwith{\textcolor{yellow}{\rule[-.5ex]{.1pt}{2.5ex}}}% + \ULon} +\newcommand\hlnote[1]{\let\helpcmd\hlnotesingle\parhelp#1\par\relax\relax} +\long\def\parhelp#1\par#2\relax{% + \helpcmd{#1}\ifx\relax#2\else\par\parhelp#2\relax\fi% +} + +\makeatletter +\newcommand\ifmoving{% + \ifx\protect\@unexpandable@protect + \expandafter\@firstoftwo + \else + \expandafter\@secondoftwo + \fi +} + +\newcommand{\gobbletwo}[2][]{\@bsphack\@esphack} +\newcommand{\gobbleone}[1][]{\@bsphack\@esphack} + +\let\oldadded\added +\let\olddeleted\deleted +\let\oldhlnote\hlnote +\let\oldnote\note +\renewcommand{\added}{\ifmoving{\gobbleone}{\oldadded}} +\renewcommand{\deleted}{\ifmoving{\gobbletwo}{\olddeleted}} +\renewcommand{\hlnote}{\ifmoving{}{\oldhlnote}} +\renewcommand{\note}{\ifmoving{\gobbletwo}{\oldnote}} +\makeatother +\definechangesauthor[name={FKA}, color=auth1]{FKA} +\definechangesauthor[name={JFK}, color=auth2]{JFK} +\definechangesauthor[name={Jesse Rosenthal}, color=auth3]{JR} +\definechangesauthor[name={MCW}, color=auth4]{MCW} +\definechangesauthor[name={SWS}, color=auth5]{SWS} + +\title{Track changes in LaTeX and HTML} +\date{} + +\begin{document} +\maketitle + +{ +\setcounter{tocdepth}{3} +\tableofcontents +} +\hypertarget{track-changes-in-latex-and-html}{% +\section{Track changes in LaTeX and HTML}\label{track-changes-in-latex-and-html}} + +A \note[id=MCW]{I agree!}\hlnote{\textbf{simple}} comment from me. + +This is a text with \added[id=MCW]{an exciting} insertion. + +This is/was a text with a \deleted[id=SWS]{short} deletion. + +\added[id=FKA]{Here is the text to be moved.} + +\deleted[id=JFK]{Here is the text to be moved.} + +Here is a \note[id=JFK]{Why?}\hlnote{com\added[id=SWS]{m}ent with nest\deleted[id=FKA]{t}ed changes}. + +Here is a multi-line paragraph containing some text and a long deletion \deleted[id=MCW]{short insertion} wrapping over two lines. + +This is \note[id=MCW]{A comment across paragraphs.}\hlnote{a new paragraph. + +And so} is this. + +One \note[id=JR]{This one has multiple paragraphs. \newline \newline See?}\hlnote{more}. + +\hypertarget{a-header-width-a-notecomment}{% +\section{\texorpdfstring{A \emph{header} wi\deleted[id=FKA]{d}th \added[id=JFK]{a} \note[id=FKA]{Note}\hlnote{comment}}{A header with comment}}\label{a-header-width-a-notecomment}} + +Some unmodified text \ldots{} + +\newpage + +\ldots{} continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.). + +\end{document} diff --git a/paper/lua-filters/track-changes/expected_reject.markdown b/paper/lua-filters/track-changes/expected_reject.markdown new file mode 100644 index 0000000..5059f34 --- /dev/null +++ b/paper/lua-filters/track-changes/expected_reject.markdown @@ -0,0 +1,29 @@ +Track changes in LaTeX and HTML +=============================== + +A **simple** comment from me. + +This is a text with insertion. + +This is/was a text with a *short* deletion. + +Here is the text to be moved. + +Here is a coment with nestted changes. + +Here is a multi-line paragraph containing some text and a long deletion short insertion wrapping over two lines. + +This is a new paragraph. + +And so is this. + +One more. + +A *header* width comment {#a-header-width-a-notecomment} +======================== + +Some unmodified text ... + +\newpage + +... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.). diff --git a/paper/lua-filters/track-changes/sample.md b/paper/lua-filters/track-changes/sample.md new file mode 100644 index 0000000..42493ed --- /dev/null +++ b/paper/lua-filters/track-changes/sample.md @@ -0,0 +1,43 @@ +--- +title: Track changes in LaTeX and HTML +toc: true +header-includes: | + ```{=latex} + \RequirePackage[debrief]{silence} + \ErrorsOff + \usepackage{fancyhdr} + \pagestyle{fancy} + \fancyhf{} + \fancyhead[C]{\leftmark} + ``` +... + +# Track changes in LaTeX and HTML + +A [I agree!]{.comment-start id="1" author="Mathias C. Walter" date="2016-05-21T22:14:00Z"}**simple**[]{.comment-end id="1"} comment from me. + +This is a text with [an *exciting*]{.insertion author="MCW" date="2014-06-25T10:40:00Z"} insertion. + +This is/was a text with a [*short*]{.deletion author="SWS" date="2014-06-25T10:42:00Z"} deletion. + +[Here is the text to be moved.]{.insertion author="FKA" date="2016-04-16T08:20:00Z"} + +[Here is the text to be moved.]{.deletion author="John F. Kennedy" date="2016-04-16T08:20:00Z"} + +Here is a [Why?]{.comment-start id="2" author="JFK" date="2016-07-29T16:50:00Z"}com[m]{.insertion author="SWS" date="2016-07-29T16:50:00Z"}ent with nest[t]{.deletion author="FKA" date="2016-04-16T08:20:00Z"}ed changes[]{.comment-end id="2"}. + +Here is a multi-line paragraph containing some text and a long deletion [short insertion]{.deletion author="MCW" date="2016-04-16T08:20:00Z"} wrapping over two lines. + +This is [A comment across paragraphs.]{.comment-start id="4" author="MCW" date="2016-05-09T16:13:00Z"}a new paragraph. + +And so[]{.comment-end id="4"} is this. + +One [This one has multiple paragraphs. ¶ ¶ See?]{.comment-start id="5" author="Jesse Rosenthal" date="2016-05-09T16:14:00Z"}more[]{.comment-end id="5"}. + +# A *header* wi[d]{.deletion author="FKA" date="2018-03-02T23:07:00Z"}th [a]{.insertion author="JFK" date="2018-03-02T23:07:00Z"} [Note]{.comment-start id="3" author="FKA" date="2017-08-24T22:14:00Z"}comment[]{.comment-end id="3"} + +Some unmodified text ... + +\newpage + +... continued from previous page just to test page headers in supporting formats (LaTeX, DOCX, etc.). diff --git a/paper/lua-filters/track-changes/test-track-changes.sh b/paper/lua-filters/track-changes/test-track-changes.sh new file mode 100644 index 0000000..b7074ef --- /dev/null +++ b/paper/lua-filters/track-changes/test-track-changes.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +latex_result="$(cat -)" + +assert_contains () +{ + printf '%s' "$latex_result" | grep -qF "$1" - + if [ $? -ne 0 ]; then + printf 'Output does not contain `%s`.\n' "$1" >&2 + exit 1 + fi +} + +# whether we are using the change package +assert_contains <<EOF +\usepackage{changes} +EOF + +# Author colors +assert_contains '\definechangesauthor[name={JFK}, color=auth2]{JFK}' + +# Additions, notes, and deletions +assert_contains <<EOF +Here is a \note[id=JFK]{Why?}\hlnote{com\added[id=SWS]{m}ent with nest\deleted[id=FKA]{t}ed changes}. +EOF diff --git a/paper/lua-filters/track-changes/track-changes.lua b/paper/lua-filters/track-changes/track-changes.lua new file mode 100644 index 0000000..4c447ea --- /dev/null +++ b/paper/lua-filters/track-changes/track-changes.lua @@ -0,0 +1,247 @@ +local authors = {} + +local function is_tex(format) + return format == 'latex' or format == 'tex' or format == 'context' +end + +local function is_html (format) + return format == 'html' or format == 'html4' or format == 'html5' +end + +local function is_wordprocessing (format) + return format == 'docx' or format == 'odt' +end + +header_track_changes = [[ + +\makeatletter +\PassOptionsToPackage{textsize=scriptsize}{todonotes} +\PassOptionsToPackage{markup=underlined,authormarkup=none,commentmarkup=todo}{changes} +\usepackage{changes} +\@ifpackagelater{changes}{2018/11/03}{% +}{% + \usepackage{todonotes} + \setremarkmarkup{\todo[color=Changes@Color#1!20]{\sffamily\textbf{#1:}~#2}} +}% +\makeatother +\definecolor{auth1}{HTML}{4477AA} +\definecolor{auth2}{HTML}{117733} +\definecolor{auth3}{HTML}{999933} +\definecolor{auth4}{HTML}{CC6677} +\definecolor{auth5}{HTML}{AA4499} +\definecolor{auth6}{HTML}{332288} +\setlength{\marginparwidth}{3cm} +\newcommand{\note}[2][]{\added[#1,remark={#2}]{}} +\newcommand\hlnotesingle{% + \bgroup + \expandafter\def\csname sout\space\endcsname{\bgroup \ULdepth =-.8ex \ULset}% + \markoverwith{\textcolor{yellow}{\rule[-.5ex]{.1pt}{2.5ex}}}% + \ULon} +\newcommand\hlnote[1]{\let\helpcmd\hlnotesingle\parhelp#1\par\relax\relax} +\long\def\parhelp#1\par#2\relax{% + \helpcmd{#1}\ifx\relax#2\else\par\parhelp#2\relax\fi% +} + +\makeatletter +\newcommand\ifmoving{% + \ifx\protect\@unexpandable@protect + \expandafter\@firstoftwo + \else + \expandafter\@secondoftwo + \fi +} + +\newcommand{\gobbletwo}[2][]{\@bsphack\@esphack} +\newcommand{\gobbleone}[1][]{\@bsphack\@esphack} + +\let\oldadded\added +\let\olddeleted\deleted +\let\oldhlnote\hlnote +\let\oldnote\note +\renewcommand{\added}{\ifmoving{\gobbleone}{\oldadded}} +\renewcommand{\deleted}{\ifmoving{\gobbletwo}{\olddeleted}} +\renewcommand{\hlnote}{\ifmoving{}{\oldhlnote}} +\renewcommand{\note}{\ifmoving{\gobbletwo}{\oldnote}} +\makeatother +]] + +local function initials(s) + local ignore = { -- list of words to ignore + ['dr'] = true, ['mr'] = true, ['ms'] = true, ['mrs'] = true, ['prof'] = true, + ['mx'] = true, ['sir'] = true, + } + + local ans = {} + for w in s:gmatch '[%w\']+' do + if not ignore[w:lower()] then ans[#ans+1] = w:sub(1,1):upper() end + end + return table.concat(ans) +end + +relinerHtml = { + Str = function (s) + if s.text == "¶" then + return pandoc.Str(' ') + end + end +} + +relinerTex = { + Str = function (s) + if s.text == "¶" then + return pandoc.Str('\\newline') + end + end +} + +reliner = { + Str = function (s) + if s.text == "¶" then + return pandoc.LineBreak() + end + end +} + +function SpanReliner(elem) + local classes = elem.classes or elem.attr.classes + if classes:includes("comment-start") then + return pandoc.walk_inline(elem, reliner) + end +end + +local toTex = {["comment-start"] = "\\note", insertion = "\\added", deletion = "\\deleted"} + +local function TrackingSpanToTex(elem) + if toTex[elem.classes[1]] ~= nil then + local author = elem.attributes.author + local inits = author:find' ' and initials(author) or author + authors[inits] = author + local s = toTex[elem.classes[1]] .. '[id=' .. inits .. ']{' + if elem.classes:includes("comment-start") then + s = s .. pandoc.utils.stringify(pandoc.walk_inline(elem, relinerTex)) .. '}\\hlnote{' + else + s = s .. pandoc.utils.stringify(elem.content) .. '}' + end + return pandoc.RawInline('latex', s) + elseif elem.classes:includes("comment-end") then + return pandoc.RawInline('latex', '}') + end +end + +local function pairsByKeys(t, f) + local a = {} + for n in pairs(t) do table.insert(a, n) end + table.sort(a, f) + local i = 0 + local iter = function () + i = i + 1 + return a[i], t[a[i]] + end + return iter +end + +--- Add packages to the header includes. +local function add_track_changes(meta) + local header_includes + if meta['header-includes'] and meta['header-includes'].t == 'MetaList' then + header_includes = meta['header-includes'] + else + header_includes = pandoc.MetaList{meta['header-includes']} + end + header_includes[#header_includes + 1] = + pandoc.MetaBlocks{pandoc.RawBlock('latex', header_track_changes)} + local a = 1 + for key,value in pairsByKeys(authors) do -- sorted author list; otherwise make test may fail + header_includes[#header_includes + 1] = + pandoc.MetaBlocks{pandoc.RawBlock('latex', '\\definechangesauthor[name={' .. value .. '}, color=auth' .. a .. ']{' .. key .. '}')} + a = a + 1 + end + meta['header-includes'] = header_includes + return meta +end + +local toHtml = {["comment-start"] = "mark", insertion = "ins", deletion = "del"} + +local function TrackingSpanToHtml(elem) + if toHtml[elem.classes[1]] ~= nil then + local author = elem.attributes.author + local inits = author:find' ' and initials(author) or author + authors[inits] = author + local s = '<' .. toHtml[elem.classes[1]] + for k,v in pairs(elem.attributes) do + local hattr = k + if hattr ~= 'date' then hattr = 'data-' .. hattr end + s = s .. ' ' .. hattr .. '="' .. v .. '"' + end + if elem.classes:includes("comment-start") then + if elem.identifier then + s = s .. ' data-id="' .. elem.identifier .. '"' + end + s = s .. ' title="' .. pandoc.utils.stringify(pandoc.walk_inline(elem, relinerHtml)) .. '">' + else + s = s .. '>' .. pandoc.utils.stringify(elem.content) .. '</' .. toHtml[elem.classes[1]] .. '>' + end + return pandoc.RawInline('html', s) + elseif elem.classes:includes("comment-end") then + return pandoc.RawInline('html', '</mark>') + end +end + +local function SpanAcceptChanges(elem) + if elem.classes:includes("comment-start") or elem.classes:includes("comment-end") then + return {} + elseif elem.classes:includes("insertion") then + return elem.content + elseif elem.classes:includes("deletion") then + return {} + end +end + +local function SpanRejectChanges(elem) + if elem.classes:includes("comment-start") or elem.classes:includes("comment-end") then + return {} + elseif elem.classes:includes("insertion") then + return {} + elseif elem.classes:includes("deletion") then + return elem.content + end +end + +function Pandoc(doc) + local meta = doc.meta + local trackChangesOptions = {all = 'AllChanges', accept = 'AcceptChanges', reject = 'RejectChanges' } + local tc = meta and meta['trackChanges'] + tc = type(meta['trackChanges']) == 'table' and pandoc.utils.stringify(meta['trackChanges']) or meta['trackChanges'] or 'accept' + local trackChanges = PANDOC_READER_OPTIONS and PANDOC_READER_OPTIONS.trackChanges or trackChangesOptions[tc] + meta.trackChanges = nil -- remove it from the matadata + + local M = {} + if trackChanges == 'AllChanges' then + if is_html(FORMAT) then + M[#M + 1] = { + Span = TrackingSpanToHtml + } + elseif is_tex(FORMAT) then + M[#M + 1] = { + Span = TrackingSpanToTex, + } + elseif is_wordprocessing(FORMAT) then + M[#M + 1] = { Span = SpanReliner } + end + elseif trackChanges == 'RejectChanges' then + M[#M + 1] = { Span = SpanRejectChanges } + else -- otherwise assumes AcceptChanges + M[#M + 1] = { Span = SpanAcceptChanges } + end + + if #M then + local blocks = doc.blocks + for i = 1, #M do + blocks = pandoc.walk_block(pandoc.Div(blocks), M[i]).content + end + if trackChanges == 'AllChanges' and is_tex(FORMAT) then + meta = add_track_changes(meta) + end + return pandoc.Pandoc(blocks, meta) + end +end diff --git a/paper/lua-filters/wordcount/Makefile b/paper/lua-filters/wordcount/Makefile new file mode 100644 index 0000000..7dfba48 --- /dev/null +++ b/paper/lua-filters/wordcount/Makefile @@ -0,0 +1,2 @@ +test: + @pandoc --lua-filter=wordcount.lua sample.md | diff --strip-trailing-cr -u expected.txt - diff --git a/paper/lua-filters/wordcount/README.md b/paper/lua-filters/wordcount/README.md new file mode 100644 index 0000000..45efc2f --- /dev/null +++ b/paper/lua-filters/wordcount/README.md @@ -0,0 +1,11 @@ +# wordcount + +This filter counts the words in the body of a document (omitting +metadata like titles and abstracts), including words in code. +It should be more accurate than `wc -w` run directly on a +Markdown document, since the latter will count markup +characters, like the `#` in front of an ATX header, or +tags in HTML documents, as words. + +To run it, `pandoc --lua-filter wordcount.lua myfile.md`. +The word count will be printed to stdout. diff --git a/paper/lua-filters/wordcount/expected.txt b/paper/lua-filters/wordcount/expected.txt new file mode 100644 index 0000000..dc608fc --- /dev/null +++ b/paper/lua-filters/wordcount/expected.txt @@ -0,0 +1 @@ +15 words in body diff --git a/paper/lua-filters/wordcount/sample.md b/paper/lua-filters/wordcount/sample.md new file mode 100644 index 0000000..240bee0 --- /dev/null +++ b/paper/lua-filters/wordcount/sample.md @@ -0,0 +1,12 @@ +--- +title: Metadata words don't count +abstract: ignored! +--- + +# Word count + +This document has *a **lot** of [words](url "title")* (15).[^1] + + code is counted + +[^1]: Footnotes count. diff --git a/paper/lua-filters/wordcount/wordcount.lua b/paper/lua-filters/wordcount/wordcount.lua new file mode 100644 index 0000000..19aec11 --- /dev/null +++ b/paper/lua-filters/wordcount/wordcount.lua @@ -0,0 +1,29 @@ +-- counts words in a document + +words = 0 + +wordcount = { + Str = function(el) + -- we don't count a word if it's entirely punctuation: + if el.text:match("%P") then + words = words + 1 + end + end, + + Code = function(el) + _,n = el.text:gsub("%S+","") + words = words + n + end, + + CodeBlock = function(el) + _,n = el.text:gsub("%S+","") + words = words + n + end +} + +function Pandoc(el) + -- skip metadata, just count body: + pandoc.walk_block(pandoc.Div(el.blocks), wordcount) + print(words .. " words in body") + os.exit(0) +end |