diff options
Diffstat (limited to 'paper/lua-filters/spellcheck')
-rw-r--r-- | paper/lua-filters/spellcheck/Makefile | 2 | ||||
-rw-r--r-- | paper/lua-filters/spellcheck/README.md | 42 | ||||
-rw-r--r-- | paper/lua-filters/spellcheck/expected.txt | 2 | ||||
-rw-r--r-- | paper/lua-filters/spellcheck/sample.md | 15 | ||||
-rw-r--r-- | paper/lua-filters/spellcheck/spellcheck.lua | 70 |
5 files changed, 131 insertions, 0 deletions
diff --git a/paper/lua-filters/spellcheck/Makefile b/paper/lua-filters/spellcheck/Makefile new file mode 100644 index 0000000..9d51bff --- /dev/null +++ b/paper/lua-filters/spellcheck/Makefile @@ -0,0 +1,2 @@ +test: + @pandoc --lua-filter=spellcheck.lua sample.md | sort | diff --strip-trailing-cr -u expected.txt - diff --git a/paper/lua-filters/spellcheck/README.md b/paper/lua-filters/spellcheck/README.md new file mode 100644 index 0000000..5f5d6a3 --- /dev/null +++ b/paper/lua-filters/spellcheck/README.md @@ -0,0 +1,42 @@ +# spellcheck + +This filter checks the spelling of words in the body of the +document (omitting metadata). The external program `aspell` is +used for the checking, and must be present in the path. + +Why use this instead of just running `aspell` on the +document's source? Because this filter is sensitive to +the semantics of the document in ways that `aspell` is +not: + +- Material in code spans, raw HTML, URLs in links, + and math is not spell-checked, eliminating a big + class of false positives. + +- The filter is sensitive to the `lang` specified in + the document's metadata; this will be treated as the + default language for the document. + +- It is also sensitive to `lang` attributes on native + divs and spans. Thus, for example, in an English + document, `[chevaux]{lang=fr}` will not be registered + as a spelling error. + +To run it, + + pandoc --lua-filter spellcheck.lua sample.md + +A list of misspelled words (or at any rate, words not +in the appropriate dictionary) will be printed to stdout. +If the word is in a div or span with a non-default `lang` +attribute, the relevant language will be indicated in +brackets after the word, separated by a tab. + +To add words to the list for a language, you can add files +with names `.aspell.LANG.pws` in your home directory. Example: + +``` +% cat ~/.aspell.en.pws +personal_ws-1.1 en 0 +goopy +``` diff --git a/paper/lua-filters/spellcheck/expected.txt b/paper/lua-filters/spellcheck/expected.txt new file mode 100644 index 0000000..dd973c8 --- /dev/null +++ b/paper/lua-filters/spellcheck/expected.txt @@ -0,0 +1,2 @@ +missspeling [en] +summer diff --git a/paper/lua-filters/spellcheck/sample.md b/paper/lua-filters/spellcheck/sample.md new file mode 100644 index 0000000..31f7834 --- /dev/null +++ b/paper/lua-filters/spellcheck/sample.md @@ -0,0 +1,15 @@ +--- +lang: fr-FR +... + +Ces sont des mots français. +Mais pas summer. + +[This is a sentence in English, +with one missspeling.]{lang=en} + +::: {lang=en} +Here's a div in English. +Code is ignored: `baoeuthasoe`{.nolang}. +So are [URLs](http://example.com/notaword). +::: diff --git a/paper/lua-filters/spellcheck/spellcheck.lua b/paper/lua-filters/spellcheck/spellcheck.lua new file mode 100644 index 0000000..85ae281 --- /dev/null +++ b/paper/lua-filters/spellcheck/spellcheck.lua @@ -0,0 +1,70 @@ +-- lua filter for spell checking: requires 'aspell'. +-- Copyright (C) 2017-2019 John MacFarlane, released under MIT license + +local text = require('text') +local words = {} +local deflang + +local function add_to_dict(lang, t) + if not words[lang] then + words[lang] = {} + end + if not words[lang][t] then + words[lang][t] = (words[lang][t] or 0) + 1 + end +end + +local function get_deflang(meta) + deflang = (meta.lang and meta.lang[1] and meta.lang[1].c) or 'en' + -- the following is better but won't work in pandoc 2.0.6. + -- it requires pandoc commit ecc46e229fde934f163d1f646383d24bfe2039e1: + -- deflang = (meta.lang and pandoc.utils.stringify(meta.lang)) or 'en' + return {} -- eliminate meta so it doesn't get spellchecked +end + +local function run_spellcheck(lang) + local keys = {} + local wordlist = words[lang] + for k,_ in pairs(wordlist) do + keys[#keys + 1] = k + end + local inp = table.concat(keys, '\n') + local outp = pandoc.pipe('aspell', {'list','-l',lang}, inp) + for w in string.gmatch(outp, "(%a*)\n") do + io.write(w) + if lang ~= deflang then + io.write("\t[" .. lang .. "]") + end + io.write("\n") + end +end + +local function results(el) + pandoc.walk_block(pandoc.Div(el.blocks), {Str = function(e) add_to_dict(deflang, e.text) end}) + for lang,v in pairs(words) do + run_spellcheck(lang) + end + os.exit(0) +end + +local function checkstr(el) + add_to_dict(deflang, el.text) +end + +local function checkspan(el) + local lang = el.attributes.lang + if not lang then return nil end + pandoc.walk_inline(el, {Str = function(e) add_to_dict(lang, e.text) end}) + return {} -- remove span, so it doesn't get checked again +end + +local function checkdiv(el) + local lang = el.attributes.lang + if not lang then return nil end + pandoc.walk_block(el, {Str = function(e) add_to_dict(lang, e.text) end}) + return {} -- remove div, so it doesn't get checked again +end + +return {{Meta = get_deflang}, + {Div = checkdiv, Span = checkspan}, + {Str = function(e) add_to_dict(deflang, e.text) end, Pandoc = results}} |