Skip to content

Commit

Permalink
feat: added new bib parser in lua
Browse files Browse the repository at this point in the history
This is similar to the Vimscript parser ("vim"), but since it is in Lua
it is much faster and comparable to the current fastest parser
("bibtex").

refer: #2786
  • Loading branch information
lervag committed Oct 24, 2023
1 parent e626412 commit d89d308
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 7 deletions.
13 changes: 13 additions & 0 deletions .luarc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"$schema": "https://raw.githubusercontent.com/LuaLS/vscode-lua/master/setting/schema.json",
"runtime": {
"version": "LuaJIT"
},
"workspace": {
"library": [
"$VIMRUNTIME",
"${3rd}/luv/library"
],
"checkThirdParty": false
}
}
5 changes: 5 additions & 0 deletions .stylua.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
column_width = 80
indent_type = "Spaces"
indent_width = 2
quote_style = "AutoPreferDouble"
call_parentheses = "None"
4 changes: 3 additions & 1 deletion autoload/vimtex/options.vim
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,9 @@ function! vimtex#options#init() abort " {{{1
call s:init_option('vimtex_lint_chktex_ignore_warnings',
\ '-n1 -n3 -n8 -n25 -n36')

call s:init_option('vimtex_parser_bib_backend', 'bibtex')
call s:init_option('vimtex_parser_bib_backend',
\ has('nvim') ? 'lua' : 'bibtex'
\)
call s:init_option('vimtex_parser_cmd_separator_check',
\ 'vimtex#cmd#parser_separator_check')

Expand Down
12 changes: 12 additions & 0 deletions autoload/vimtex/parser/bib.vim
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,18 @@ endfunction

" }}}1

function! s:parse_with_lua(file) abort " {{{1
if !has('nvim')
call vimtex#log#error(
\ 'bib parser backend "lua" only works with neovim!')
return []
endif

return luaeval('require("vimtex.bibparser").parse(_A)', a:file)
endfunction

" }}}1

function! s:parse_with_vim(file) abort " {{{1
" Adheres to the format description found here:
" http://www.bibtex.org/Format/
Expand Down
18 changes: 12 additions & 6 deletions doc/vimtex.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1310,8 +1310,14 @@ OPTIONS *vimtex-options*
This option sets the desired default backend for parsing bibliographies.
This is used e.g. for gathering completion candidates. Possible values:

`bibtex`: The fastest, but most hacky solution. Should work well in most
cases.
`bibtex`: The fastest, but most "hacky" solution. Still, time has proved
that this works well!

`vim`: The slowest but perhaps most robust solution, as it does not
require any external utilities.

`lua`: A Lua implementation of the Vim backend. About as fast as the
`bibtex` parser, but this only works on Neovim.

`bibparse`: Also fast, but might be more robust.

Expand All @@ -1335,17 +1341,17 @@ OPTIONS *vimtex-options*
(see |if_pyth| and |py3|) and that the `bibtexparser`
Python module is installed and available.

`vim`: The slowest but perhaps most robust solution, as it does not
require any external utilities.

Some people may want to conditionally change this option if a backend is
available. For example: >vim

if executable('bibparse')
let g:vimtex_parser_bib_backend = 'bibparse'
endif
<
Default value: `bibtex`
Default value:

Vim: `bibtex`
Neovim: `lua`

*g:vimtex_parser_cmd_separator_check*
This option specifies the policy for deciding whether successive groups of
Expand Down
237 changes: 237 additions & 0 deletions lua/vimtex/bibparser.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
local function format_line(title, object)
return "**" .. string.upper(title) .. "**" .. ": " .. object .. "\n"
end

local function format_entry(entry)
local info = ""
local _entry = {}
local prioritized_keys = { "title", "author" }

for _, key in pairs(prioritized_keys) do
if entry[key] ~= nil then
info = info .. format_line(key, entry[key])
end
end

local i = 0
for k, v in pairs(entry) do
if not vim.tbl_contains(prioritized_keys, k) then
i = i + 1
_entry[i] = format_line(k, v)
end
end
table.sort(_entry)

for _, a in ipairs(_entry) do
info = info .. a
end

return info
end

---@param head 0-index
local function get_key(body, head)
local matches = vim.fn.matchlist(body, [[^\v([-_:0-9a-zA-Z]+)\s*\=\s*]], head)
if vim.tbl_isempty(matches) then
return "", -1
end

return string.lower(matches[2]), head + vim.fn.strlen(matches[1])
end

local function parse_string(line, current, strings)
current.level = current.level
+ vim.fn.count(line, "{")
- vim.fn.count(line, "}")
if current.level > 0 then
current.body = current.body .. line
return false
end

local body = current.body .. vim.fn.matchstr(line, [[.*\ze}]])

local matches = vim.fn.matchlist(body, [[\v^\s*(\S+)\s*\=\s*"(.*)"\s*$]])
if not vim.tbl_isempty(matches) and not vim.fn.empty(matches[2]) == 1 then
strings[matches[2]] = matches[3]
end

matches = vim.fn.matchlist(body, [[\v^\s*(\S+)\s*\=\s*\{(.*)\}\s*$]])
if not vim.tbl_isempty(matches) and not vim.fn.empty(matches[2]) == 1 then
strings[matches[2]] = matches[3]
end

return true
end

local function parse_entry(line, current, entries)
current.level = current.level
+ vim.fn.count(line, "{")
- vim.fn.count(line, "}")
if current.level > 0 then
current.body = current.body .. line
return false
end

current.body = current.body .. vim.fn.matchstr(line, [[.*\ze}]])
table.insert(entries, current)
return true
end

local function parse_type(file, lnum, line, current, strings, entries)
local matches = vim.fn.matchlist(line, [[\v^\@(\w+)\s*\{\s*(.*)]])
if vim.tbl_isempty(matches) then
return true
end

local type = string.lower(matches[2])
local types = { preamble = 1, comment = 1 }
if types[type] ~= nil then
return true
end

current.level = 1
current.body = ""
current.vimtex_file = file
current.vimtex_lnum = lnum
current.type = type

if current.type == "string" then
return parse_string(matches[3], current, strings)
end

matches = vim.fn.matchlist(matches[3], [[\v^([^, ]*)\s*,\s*(.*)]])
current.key = matches[2]
if not vim.fn.empty(matches[3]) == 1 then
return parse_entry(matches[3], current, entries)
end

return false
end

---@param head 0-index
local function get_value_string(body, head, strings)
local value
local head_1
if body:sub(head + 1, head + 1) == "{" then
local sum = 1
local i1 = head + 1
local i0 = i1

while sum > 0 do
local match
local res = vim.fn.matchstrpos(body, [=[[{}]]=], i1)
match, _, i1 = res[1], res[2], res[3]
res = nil

if i1 < 0 then
break
end

i0 = i1
sum = sum + (match == "{" and 1 or -1)
end

value = body:sub(head + 1 + 1, i0 - 2 + 1)
head_1 = vim.fn.matchend(body, [[^\s*]], i0)
elseif body:sub(head + 1, head + 1) == [["]] then
local index = vim.fn.match(body, [[\\\@<!"]], head + 1)
if index < 0 then
return "bibparser.lua: get_value_string failed", ""
end

value = body:sub(head + 1 + 1, index - 1 + 1)
head_1 = vim.fn.matchend(body, [[^\s*]], index + 1)
return value, head_1
elseif vim.regex([[^\w]]):match_str(body:sub(head + 1)) then
value = vim.fn.matchstr(body, [[^\x\+]], head)
head_1 = vim.fn.matchend(body, [[^\s*]], head + vim.fn.strlen(value))
value = vim.fn.get(strings, value, [[@(]] .. value .. [[)]])
else
head_1 = head
end

if body:sub(head + 1, head + 1) == "#" then
head_1 = vim.fn.matchend(body, [[^\s*]], head_1 + 1)
local vadd
vadd, head_1 = get_value_string(body, head_1, strings)
value = value .. vadd
end

return value, vim.fn.matchend(body, [[^,\s*]], head_1)
end

local function get_value(body, head, strings)
if vim.regex([[\d]]):match_str(body:sub(head + 1, head + 1)) then
local value = vim.fn.matchstr(body, [[^\d\+]], head)
local head_1 = vim.fn.matchend(body, [[^\s*,\s*]], head + vim.fn.len(value))
return value, head_1
end

return get_value_string(body, head, strings)
end

local function parse_entry_body(entry, strings)
entry.level = nil

local key = ""
local pos = vim.fn.matchend(entry.body, [[^\s*]])
while pos >= 0 do
if vim.fn.empty(key) == 1 then
key, pos = get_key(entry.body, pos)
else
local value
value, pos = get_value(entry.body, pos, strings)
entry[key] = value
key = ""
end
end

entry.body = nil
return entry
end

local M = {}
M.parse = function(file)
if file == nil or not vim.fn.filereadable(file) then
return {}
end

local entries = {}
local strings = {}
local current = {}

local lines = vim.fn.readfile(file)
for lnum = 1, #lines do
local line = lines[lnum]

if vim.tbl_isempty(current) then
if parse_type(file, lnum, line, current, strings, entries) then
current = {}
end
goto continue
end

if current.type == "string" then
if parse_string(line, current, strings) then
current = {}
end
else
if parse_entry(line, current, entries) then
current = {}
end
end

::continue::
end

local result = {}
for _, v in pairs(entries) do
local entry = parse_entry_body(v, strings)
entry.formatted_info = format_entry(entry)
table.insert(result, entry)
end

return result
end

return M
1 change: 1 addition & 0 deletions test/test-completion-bibtex-speed/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ MYVIM ?= nvim --clean --headless
test:
@INMAKE=1 $(MYVIM) -u bibspeed.vim
@INMAKE=1 BACKEND=vim $(MYVIM) -u bibspeed.vim
@INMAKE=1 BACKEND=lua $(MYVIM) -u bibspeed.vim
@#INMAKE=1 BACKEND=bibparse $(MYVIM) -u bibspeed.vim
@#INMAKE=1 BACKEND=bibtexparser $(MYVIM) -u bibspeed.vim
@rm -f nvim_servernames.log

0 comments on commit d89d308

Please sign in to comment.