summaryrefslogtreecommitdiff
path: root/paper/lua-filters/spellcheck
diff options
context:
space:
mode:
Diffstat (limited to 'paper/lua-filters/spellcheck')
-rw-r--r--paper/lua-filters/spellcheck/Makefile2
-rw-r--r--paper/lua-filters/spellcheck/README.md42
-rw-r--r--paper/lua-filters/spellcheck/expected.txt2
-rw-r--r--paper/lua-filters/spellcheck/sample.md15
-rw-r--r--paper/lua-filters/spellcheck/spellcheck.lua70
5 files changed, 131 insertions, 0 deletions
diff --git a/paper/lua-filters/spellcheck/Makefile b/paper/lua-filters/spellcheck/Makefile
new file mode 100644
index 0000000..9d51bff
--- /dev/null
+++ b/paper/lua-filters/spellcheck/Makefile
@@ -0,0 +1,2 @@
+test:
+ @pandoc --lua-filter=spellcheck.lua sample.md | sort | diff --strip-trailing-cr -u expected.txt -
diff --git a/paper/lua-filters/spellcheck/README.md b/paper/lua-filters/spellcheck/README.md
new file mode 100644
index 0000000..5f5d6a3
--- /dev/null
+++ b/paper/lua-filters/spellcheck/README.md
@@ -0,0 +1,42 @@
+# spellcheck
+
+This filter checks the spelling of words in the body of the
+document (omitting metadata). The external program `aspell` is
+used for the checking, and must be present in the path.
+
+Why use this instead of just running `aspell` on the
+document's source? Because this filter is sensitive to
+the semantics of the document in ways that `aspell` is
+not:
+
+- Material in code spans, raw HTML, URLs in links,
+ and math is not spell-checked, eliminating a big
+ class of false positives.
+
+- The filter is sensitive to the `lang` specified in
+ the document's metadata; this will be treated as the
+ default language for the document.
+
+- It is also sensitive to `lang` attributes on native
+ divs and spans. Thus, for example, in an English
+ document, `[chevaux]{lang=fr}` will not be registered
+ as a spelling error.
+
+To run it,
+
+ pandoc --lua-filter spellcheck.lua sample.md
+
+A list of misspelled words (or at any rate, words not
+in the appropriate dictionary) will be printed to stdout.
+If the word is in a div or span with a non-default `lang`
+attribute, the relevant language will be indicated in
+brackets after the word, separated by a tab.
+
+To add words to the list for a language, you can add files
+with names `.aspell.LANG.pws` in your home directory. Example:
+
+```
+% cat ~/.aspell.en.pws
+personal_ws-1.1 en 0
+goopy
+```
diff --git a/paper/lua-filters/spellcheck/expected.txt b/paper/lua-filters/spellcheck/expected.txt
new file mode 100644
index 0000000..dd973c8
--- /dev/null
+++ b/paper/lua-filters/spellcheck/expected.txt
@@ -0,0 +1,2 @@
+missspeling [en]
+summer
diff --git a/paper/lua-filters/spellcheck/sample.md b/paper/lua-filters/spellcheck/sample.md
new file mode 100644
index 0000000..31f7834
--- /dev/null
+++ b/paper/lua-filters/spellcheck/sample.md
@@ -0,0 +1,15 @@
+---
+lang: fr-FR
+...
+
+Ces sont des mots français.
+Mais pas summer.
+
+[This is a sentence in English,
+with one missspeling.]{lang=en}
+
+::: {lang=en}
+Here's a div in English.
+Code is ignored: `baoeuthasoe`{.nolang}.
+So are [URLs](http://example.com/notaword).
+:::
diff --git a/paper/lua-filters/spellcheck/spellcheck.lua b/paper/lua-filters/spellcheck/spellcheck.lua
new file mode 100644
index 0000000..85ae281
--- /dev/null
+++ b/paper/lua-filters/spellcheck/spellcheck.lua
@@ -0,0 +1,70 @@
+-- lua filter for spell checking: requires 'aspell'.
+-- Copyright (C) 2017-2019 John MacFarlane, released under MIT license
+
+local text = require('text')
+local words = {}
+local deflang
+
+local function add_to_dict(lang, t)
+ if not words[lang] then
+ words[lang] = {}
+ end
+ if not words[lang][t] then
+ words[lang][t] = (words[lang][t] or 0) + 1
+ end
+end
+
+local function get_deflang(meta)
+ deflang = (meta.lang and meta.lang[1] and meta.lang[1].c) or 'en'
+ -- the following is better but won't work in pandoc 2.0.6.
+ -- it requires pandoc commit ecc46e229fde934f163d1f646383d24bfe2039e1:
+ -- deflang = (meta.lang and pandoc.utils.stringify(meta.lang)) or 'en'
+ return {} -- eliminate meta so it doesn't get spellchecked
+end
+
+local function run_spellcheck(lang)
+ local keys = {}
+ local wordlist = words[lang]
+ for k,_ in pairs(wordlist) do
+ keys[#keys + 1] = k
+ end
+ local inp = table.concat(keys, '\n')
+ local outp = pandoc.pipe('aspell', {'list','-l',lang}, inp)
+ for w in string.gmatch(outp, "(%a*)\n") do
+ io.write(w)
+ if lang ~= deflang then
+ io.write("\t[" .. lang .. "]")
+ end
+ io.write("\n")
+ end
+end
+
+local function results(el)
+ pandoc.walk_block(pandoc.Div(el.blocks), {Str = function(e) add_to_dict(deflang, e.text) end})
+ for lang,v in pairs(words) do
+ run_spellcheck(lang)
+ end
+ os.exit(0)
+end
+
+local function checkstr(el)
+ add_to_dict(deflang, el.text)
+end
+
+local function checkspan(el)
+ local lang = el.attributes.lang
+ if not lang then return nil end
+ pandoc.walk_inline(el, {Str = function(e) add_to_dict(lang, e.text) end})
+ return {} -- remove span, so it doesn't get checked again
+end
+
+local function checkdiv(el)
+ local lang = el.attributes.lang
+ if not lang then return nil end
+ pandoc.walk_block(el, {Str = function(e) add_to_dict(lang, e.text) end})
+ return {} -- remove div, so it doesn't get checked again
+end
+
+return {{Meta = get_deflang},
+ {Div = checkdiv, Span = checkspan},
+ {Str = function(e) add_to_dict(deflang, e.text) end, Pandoc = results}}