summaryrefslogtreecommitdiff
path: root/paper/lua-filters/scholarly-metadata/scholarly-metadata.lua
blob: 3ec529cff4b1d08d122953e11aa768949b47eb6b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
--[[
ScholarlyMeta – normalize author/affiliation meta variables

Copyright (c) 2017-2019 Albert Krewinkel, Robert Winkler

Permission to use, copy, modify, and/or distribute this software for any purpose
with or without fee is hereby granted, provided that the above copyright notice
and this permission notice appear in all copies.

THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
]]
local List = require 'pandoc.List'

-- Split a string at commas.
local function comma_separated_values(str)
  local acc = List:new{}
  for substr in str:gmatch('([^,]*)') do
    acc[#acc + 1] = substr:gsub('^%s*', ''):gsub('%s*$', '') -- trim
  end
  return acc
end

--- Ensure the return value is a list.
local function ensure_list (val)
  if type(val) ~= 'table' then
    -- create singleton list (or empty list if val == nil).
    return List:new{val}
  elseif val.t == 'MetaInlines' then
    -- check if this is really a comma-separated list
    local csv = comma_separated_values(pandoc.utils.stringify(val))
    if #csv >= 2 then
      return csv
    end
    return List:new{val}
  elseif val.t == 'MetaList' then
    return List:new(val)
  else
    -- MetaBlocks or MetaMap, use as a singleton
    return List:new{val}
  end
end

--- Returns a function which checks whether an object has the given ID.
local function has_id (id)
  return function(x) return x.id == id end
end

--- Copy all key-value pairs of the first table into the second iff there is no
-- such key yet in the second table.
-- @returns the second argument
function add_missing_entries(a, b)
  for k, v in pairs(a) do
    b[k] = b[k] or v
  end
  return b
end

--- Create an object with a name. The name is either taken directly from the
-- `name` field, or from the *only* field name (i.e., key) if the object is a
-- dictionary with just one entry. If neither exists, the name is left unset
-- (`nil`).
function to_named_object (obj)
  local named = {}
  if type(obj) ~= 'table' then
    -- if the object isn't a table, just use its value as a name.
    named.name = pandoc.MetaInlines{pandoc.Str(tostring(obj))}
    named.id = tostring(obj)
  elseif obj.t == 'MetaInlines' then
      -- Treat inlines as the name
      named.name = obj
      named.id = pandoc.utils.stringify(obj)
  elseif obj.name ~= nil then
    -- object has name attribute → just create a copy of the object
    add_missing_entries(obj, named)
    named.id = pandoc.utils.stringify(named.id or named.name)
  elseif next(obj) and next(obj, next(obj)) == nil then
    -- the entry's key is taken as the name, the value contains the
    -- attributes.
    key, attribs = next(obj)
    if type(attribs) == "string" or attribs.t == 'MetaInlines' then
      named.name = attribs
    else
      add_missing_entries(attribs, named)
      named.name = named.name or pandoc.MetaInlines{pandoc.Str(tostring(key))}
    end
    named.id = named.id and pandoc.utils.stringify(named.id) or key
  else
    -- this is not a named object adhering to the usual conventions.
    error('not a named object: ' .. tostring(obj))
  end
  return named
end

--- Resolve institute placeholders to full named objects
local function resolve_institutes (institute, known_institutes)
  local unresolved_institutes
  if institute == nil then
    unresolved_institutes = {}
  elseif type(institute) == "string" or type(institute) == "number" then
    unresolved_institutes = {institute}
  else
    unresolved_institutes = institute
  end

  local result = List:new{}
  for i, inst in ipairs(unresolved_institutes) do
    result[i] =
      known_institutes[tonumber(inst)] or
      known_institutes:find_if(has_id(pandoc.utils.stringify(inst))) or
      to_named_object(inst)
  end
  return result
end

--- Insert a named object into a list; if an object of the same name exists
-- already, add all properties only present in the new object to the existing
-- item.
function merge_on_id (list, namedObj)
  local elem, idx = list:find_if(has_id(namedObj.id))
  local res = elem and add_missing_entries(namedObj, elem) or namedObj
  local obj_idx = idx or (#list + 1)
  -- return res, obj_idx
  list[obj_idx] = res
  return res, #list
end

--- Flatten a list of lists.
local function flatten (lists)
  local result = List:new{}
  for _, lst in ipairs(lists) do
    result:extend(lst)
  end
  return result
end

--- Canonicalize authors and institutes
local function canonicalize(raw_author, raw_institute)
  local institutes = ensure_list(raw_institute):map(to_named_object)
  local authors = ensure_list(raw_author):map(to_named_object)

  for _, author in ipairs(authors) do
    author.institute = resolve_institutes(
      ensure_list(author.institute),
      institutes
    )
  end

  -- Merge institutes defined in author objects with those defined in the
  -- top-level list.
  local author_insts = flatten(authors:map(function(x) return x.institute end))
  for _, inst in ipairs(author_insts) do
    merge_on_id(institutes, inst)
  end

  -- replace institutes with their indices
  local to_index = function (inst)
    return tostring(select(2, institutes:find_if(has_id(inst.id))))
  end
  for _, author in ipairs(authors) do
    author.institute = pandoc.MetaList(author.institute:map(to_index))
  end

  return authors, institutes
end


return {
  {
    Meta = function(meta)
      meta.author, meta.institute = canonicalize(meta.author, meta.institute)
      return meta
    end
  }
}