summaryrefslogtreecommitdiff
path: root/paper/scholar-filters/json-ld.lua
blob: b9cd101544a1041d728c67f18f7833b4748436ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
-- json-ld.lua: add a JSON-LD metadata field describing the document.
--
-- Copyright (c) 2017-2018 Albert Krewinkel
--
-- This program is free software; you can redistribute it and/or modify it
-- under the terms of the GNU public license version 2 or later.
-- See the LICENSE file for details.
local SCRIPT_DIR = PANDOC_SCRIPT_FILE:gsub('/[^/]*$', '')

package.path =  SCRIPT_DIR .. '/?.lua;' .. package.path

local json = require "dkjson"
local List = require 'pandoc.List'

local function stringify(x)
  if x == nil then
    return nil
  elseif type(x) == 'string' then
    return x
  end
  return pandoc.utils.stringify(x)
end

local function Organizations(orgs)
  local orgs_json = {}
  for i, org in ipairs(orgs) do
    orgs_json[i] = {
      ["@type"] = "Organization",
      ["name"]  = org.name and stringify(org.name),
      ['url']   = org.url and stringify(org.url),
    }
  end
  return orgs_json
end

local function Authors(authors)
  local authors_json = pandoc.MetaList{}
  for i, author in ipairs(authors) do
    authors_json[i] = {
      ['@type']       = "Person",
      ['@id']         = authors[i].orcid and
                          ("https://orcid.org/" .. stringify(authors[i].orcid)),
      ["name"]        = author.name and stringify(author.name),
      ["affiliation"] = author.institute and Organizations(author.institute),
      ['email']       = author.email and stringify(author.email),
      ['url']         = author.url and stringify(author.url),
    }
  end
  return authors_json
end

local function Cito (bibjson, cites_by_cito_property)
  function find_citation(id)
    -- sloooow
    for i = 1, #bibjson do
      if bibjson[i].id == id then
        return bibjson[i]
      end
    end
  end

  local result = {}
  local bibentry, citation_ld
  for citation_type, typed_citation_ids in pairs(cites_by_cito_property) do
    for i = 1, #typed_citation_ids do
      bibentry = find_citation(typed_citation_ids[i])
      if bibentry and bibentry.DOI then
        citation_ld = {
          ["@id"] = "http://dx.doi.org/" .. bibentry.DOI
        }
        cito_type_str = "cito:" .. citation_type
        if not result[cito_type_str] then
          result[cito_type_str] = {}
        end
        table.insert(result[cito_type_str], citation_ld)
      end
    end
  end
  return result
end

local function Citations (bibjson, citation_ids)
  function find_citation(id)
    -- sloooow
    for i = 1, #bibjson do
      if bibjson[i].id == id then
        return bibjson[i]
      end
    end
  end

  function CitationSchema(record)
    local type
    if record.type == "report" then
      type = "Report"
    elseif record.type == "article-journal" then
      type = "ScholarlyArticle"
    else
      type = "Article"
    end

    local authors = {}
    if record.author then
      for i = 1, #record.author do
        local name = {
          record.author[i].family,
          record.author[i].given
        }
        authors[i] = {
          name = table.concat(name, ", ")
        }
      end
    end

    return {
      ["@context"] = {
        ["@vocab"]    = "http://schema.org/",
        ["title"]     = "headline",
        ["page"]      = "pagination",
        ["date"]      = "datePublished",
        ["publisher"] = "publisher",
        ["author"]    = "author",
      },
      ["@type"]     = type,
      ["@id"]       = record.DOI and ("http://dx.doi.org/" .. record.DOI),
      ["title"]     = record.title,
      ["author"]    = Authors(authors),
      ["date"]      = record.issued and
        record.issued["date-parts"] and
        table.concat(record.issued["date-parts"][1], "-"),
      ["publisher"] = record.publisher and
        { ["@type"] = "Organization", ["name"] = record.publisher },
      ["page"]      = record.page,
    }
  end

  local res = {}
  for cit_id, _ in pairs(citation_ids) do
    local citation_record = find_citation(cit_id)
    if citation_record then
      res[#res + 1] = CitationSchema(citation_record)
    end
  end
  return res
end

function json_ld (meta)
  local default_image = "https://upload.wikimedia.org/wikipedia/commons/f/fa/Globe.svg"
  local accessible_for_free
  if meta.accessible_for_free ~= nil then
    accessible_for_free = meta.accessible_for_free
  else
    accessible_for_free = true
  end
  local context = {
    ["@vocab"]    = "http://schema.org/",
    ["cito"]      = "http://purl.org/spar/cito/",
    ["author"]    = "author",
    ["name"]      = "name",
    ["title"]     = "headline",
    ["subtitle"]  = "alternativeTitle",
    ["publisher"] = "publisher",
    ["date"]      = "datePublished",
    ["isFree"]    = accessible_for_free and "isAccessibleForFree" or nil,
    ["image"]     = "image",
    ["citation"]  = "citation",
  }

  local citation_ids = {}
  for _, ids in pairs(meta.cito_cites) do
    for _, id in ipairs(ids) do citation_ids[id] = true end
  end
  local result = {
    ["@context"]  = context,
    ["@type"]     = "ScholarlyArticle",
    ["author"]    = Authors(meta.author),
    ["name"]      = stringify(meta.title),
    ["title"]     = stringify(meta.title),
    ["subtitle"]  = meta.subtitle and stringify(meta.subtitle),
    ["date"]      = meta.date and stringify(meta.date) or os.date("%Y-%m-%d"),
    -- -- ["image"]     = meta.image or default_image,
    ["isFree"]    = accessible_for_free,
    ["citation"]  = Citations(meta.bibliography_records, citation_ids),
  }
  for k, v in pairs(Cito(meta.bibliography_records, meta.cito_cites)) do
    result[k] = v
  end
  return result
end

local function bibliography(bibfilename)
  if not bibfilename then
    return {}
  end
  local bibfile = io.popen("pandoc-citeproc --bib2json " .. bibfilename, "r")
  local jsonstr = bibfile:read("*a")
  bibfile:close()
  return json.decode(jsonstr)
end

local function institute_resolver (institutes)
  return function (inst_idx)
    return institutes[tonumber(stringify(inst_idx))]
  end
end

function Meta (meta)
  local function clone (obj)
    local result = {}
    for k, v in pairs(obj) do result[k] = v end
    return result
  end
  local metadata = clone(meta)

  local resolve_institute = function (idx)
    return meta.institute[tonumber(idx)]
  end
  local tmp_authors = {}
  for i, author_orig in ipairs(meta.author) do
    local author = clone(author_orig)
    if author.institute then
      author.institute = List.map(author.institute, resolve_institute)
    end
    tmp_authors[i] = author
  end
  metadata.author = tmp_authors

  metadata.bibliography_records = bibliography(meta.bibliography)
  local jsonld_object = json_ld(metadata)
  meta.jsonld = json.encode(jsonld_object)

  return meta
end