Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add alternative SIL parsing using LPEG's RE module #1973

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions inputters/sil-epnf.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
local bits = SILE.parserBits
local epnf = require("epnf")
local bits = require("core.parserbits")

local passthroughCommands = {
ftl = true,
Expand All @@ -25,7 +26,7 @@ end

-- luacheck: push ignore
---@diagnostic disable: undefined-global, unused-local, lowercase-global
local function grammar (_ENV)
local function builder (_ENV)
local _ = WS^0
local eol = S"\r\n"
local specials = S"\\{}%"
Expand Down Expand Up @@ -92,4 +93,10 @@ local function grammar (_ENV)
)
end

return grammar
local grammar = epnf.define(builder)

local function parser (string)
return epnf.parsestring(grammar, string)
end

return parser
68 changes: 68 additions & 0 deletions inputters/sil-lpeg-re.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
local lpeg = require("lpeg")
local re = require("re")
local bits = require("core.parserbits")

local P, C, S = lpeg.P, lpeg.C, lpeg.S
local myID = C(bits.silidentifier) / 1

local wrapper = function (a) return type(a)=="table" and a or {} end
local specials = S"{}%\\"

local expression = [=[

document <- texlike_stuff !.

texlike_stuff <- {: environment / comment / texlike_text / texlike_braced_stuff / texlike_command :}*

environment <- '\begin' {:options: %parameters :}
('{' {:command: passthrough_cmd :} '}' passthrough_env_stuff pass_end /
'{' {:command: %cmdID :} '}' texlike_stuff notpass_end)

comment <- ('%' (!%eol .)* %eol ) -> ''

texlike_text <- { (!%specials . / %escaped_specials)+ } -> unescapeSpecials

texlike_braced_stuff <- '{' texlike_stuff '}'

texlike_command <- '\' ({:command: passthrough_cmd :} {:options: %parameters :}
passthrough_braced_stuff / {:command: %cmdID :} {:options: %parameters :}
texlike_braced_stuff)

passthrough_cmd <- 'ftl' / 'lua' / 'math' / 'raw' / 'script' / 'sil' / 'use' / 'xml'

passthrough_stuff <- { {: passthrough_text / passthrough_debraced_stuff :} }

passthrough_env_stuff <- {: passthrough_env_text :}*

passthrough_text <- { [^{}]+ }

passthrough_env_text <- { (!('\end{' =command '}') .)+ }

passthrough_braced_stuff <- '{' passthrough_stuff '}'

passthrough_debraced_stuff <- { passthrough_braced_stuff }

notpass_end <- '\end{' =command '}' _

pass_end <- '\end{' =command '}' _

_ <- %s*

]=]

local grammar = re.compile(expression, {
unescapeSpecials = function (str)
return str:gsub('\\([{}%%\\])', '%1')
end,
cmdID = myID - P"begin" - P"end",
parameters = (P"[" * bits.parameters * P"]")^-1 / wrapper,
eol = S"\r\n",
specials = specials,
escaped_specials = P"\\" * specials
})

local function parser (string)
return re.match(string, grammar)
end

return parser
16 changes: 5 additions & 11 deletions inputters/sil.lua
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
local base = require("inputters.base")

local epnf = require("epnf")
local usere = SU.boolean(_G["SIL_USE_RE"], false)
local parser = require("inputters.sil-" .. (usere and "lpeg-re" or "epnf"))

local inputter = pl.class(base)
inputter._name = "sil"

inputter.order = 50

inputter._grammar = require("inputters.sil-epnf")

inputter.appropriate = function (round, filename, doc)
if round == 1 then
return filename:match(".sil$")
Expand All @@ -17,16 +16,15 @@ inputter.appropriate = function (round, filename, doc)
local promising = sniff:match("\\begin") or sniff:match("\\document") or sniff:match("\\sile")
return promising and inputter.appropriate(3, filename, doc) or false
elseif round == 3 then
local _parser = epnf.define(inputter._grammar)
local status, _ = pcall(epnf.parsestring, _parser, doc)
local status, _ = pcall(parser, doc)
return status
end
end

function inputter:_init ()
-- Save time when parsing strings by only setting up the grammar once per
-- instantiation then re-using it on every use.
self._parser = self:rebuildParser()
self._parser = parser
base._init(self)
end

Expand Down Expand Up @@ -113,12 +111,8 @@ local function massage_ast (tree, doc)
end
end

function inputter:rebuildParser ()
return epnf.define(self._grammar)
end

function inputter:parse (doc)
local status, result = pcall(epnf.parsestring, self._parser, doc)
local status, result = pcall(self._parser, doc)
if not status then
return SU.error(([[Unable to parse input document to an AST tree. Parser error:

Expand Down
Loading