From d89d30813da9912d25d939dd429aeb1aaf87136d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Yngve=20Lerv=C3=A5g?= Date: Sun, 22 Oct 2023 13:37:43 +0200 Subject: [PATCH] feat: added new bib parser in lua This is similar to the Vimscript parser ("vim"), but since it is in Lua it is much faster and comparable to the current fastest parser ("bibtex"). refer: #2786 --- .luarc.json | 13 ++ .stylua.toml | 5 + autoload/vimtex/options.vim | 4 +- autoload/vimtex/parser/bib.vim | 12 ++ doc/vimtex.txt | 18 +- lua/vimtex/bibparser.lua | 237 +++++++++++++++++++++ test/test-completion-bibtex-speed/Makefile | 1 + 7 files changed, 283 insertions(+), 7 deletions(-) create mode 100644 .luarc.json create mode 100644 .stylua.toml create mode 100644 lua/vimtex/bibparser.lua diff --git a/.luarc.json b/.luarc.json new file mode 100644 index 0000000000..d06245585f --- /dev/null +++ b/.luarc.json @@ -0,0 +1,13 @@ +{ + "$schema": "https://raw.githubusercontent.com/LuaLS/vscode-lua/master/setting/schema.json", + "runtime": { + "version": "LuaJIT" + }, + "workspace": { + "library": [ + "$VIMRUNTIME", + "${3rd}/luv/library" + ], + "checkThirdParty": false + } +} diff --git a/.stylua.toml b/.stylua.toml new file mode 100644 index 0000000000..c4b052ab72 --- /dev/null +++ b/.stylua.toml @@ -0,0 +1,5 @@ +column_width = 80 +indent_type = "Spaces" +indent_width = 2 +quote_style = "AutoPreferDouble" +call_parentheses = "None" diff --git a/autoload/vimtex/options.vim b/autoload/vimtex/options.vim index 35f11ed700..69abf3d3b0 100644 --- a/autoload/vimtex/options.vim +++ b/autoload/vimtex/options.vim @@ -311,7 +311,9 @@ function! vimtex#options#init() abort " {{{1 call s:init_option('vimtex_lint_chktex_ignore_warnings', \ '-n1 -n3 -n8 -n25 -n36') - call s:init_option('vimtex_parser_bib_backend', 'bibtex') + call s:init_option('vimtex_parser_bib_backend', + \ has('nvim') ? 'lua' : 'bibtex' + \) call s:init_option('vimtex_parser_cmd_separator_check', \ 'vimtex#cmd#parser_separator_check') diff --git a/autoload/vimtex/parser/bib.vim b/autoload/vimtex/parser/bib.vim index ce83e1cd9c..63ea16a0fb 100644 --- a/autoload/vimtex/parser/bib.vim +++ b/autoload/vimtex/parser/bib.vim @@ -250,6 +250,18 @@ endfunction " }}}1 +function! s:parse_with_lua(file) abort " {{{1 + if !has('nvim') + call vimtex#log#error( + \ 'bib parser backend "lua" only works with neovim!') + return [] + endif + + return luaeval('require("vimtex.bibparser").parse(_A)', a:file) +endfunction + +" }}}1 + function! s:parse_with_vim(file) abort " {{{1 " Adheres to the format description found here: " http://www.bibtex.org/Format/ diff --git a/doc/vimtex.txt b/doc/vimtex.txt index 715b941235..5b407a2a2e 100644 --- a/doc/vimtex.txt +++ b/doc/vimtex.txt @@ -1310,8 +1310,14 @@ OPTIONS *vimtex-options* This option sets the desired default backend for parsing bibliographies. This is used e.g. for gathering completion candidates. Possible values: - `bibtex`: The fastest, but most hacky solution. Should work well in most - cases. + `bibtex`: The fastest, but most "hacky" solution. Still, time has proved + that this works well! + + `vim`: The slowest but perhaps most robust solution, as it does not + require any external utilities. + + `lua`: A Lua implementation of the Vim backend. About as fast as the + `bibtex` parser, but this only works on Neovim. `bibparse`: Also fast, but might be more robust. @@ -1335,9 +1341,6 @@ OPTIONS *vimtex-options* (see |if_pyth| and |py3|) and that the `bibtexparser` Python module is installed and available. - `vim`: The slowest but perhaps most robust solution, as it does not - require any external utilities. - Some people may want to conditionally change this option if a backend is available. For example: >vim @@ -1345,7 +1348,10 @@ OPTIONS *vimtex-options* let g:vimtex_parser_bib_backend = 'bibparse' endif < - Default value: `bibtex` + Default value: + + Vim: `bibtex` + Neovim: `lua` *g:vimtex_parser_cmd_separator_check* This option specifies the policy for deciding whether successive groups of diff --git a/lua/vimtex/bibparser.lua b/lua/vimtex/bibparser.lua new file mode 100644 index 0000000000..7e4c060ad8 --- /dev/null +++ b/lua/vimtex/bibparser.lua @@ -0,0 +1,237 @@ +local function format_line(title, object) + return "**" .. string.upper(title) .. "**" .. ": " .. object .. "\n" +end + +local function format_entry(entry) + local info = "" + local _entry = {} + local prioritized_keys = { "title", "author" } + + for _, key in pairs(prioritized_keys) do + if entry[key] ~= nil then + info = info .. format_line(key, entry[key]) + end + end + + local i = 0 + for k, v in pairs(entry) do + if not vim.tbl_contains(prioritized_keys, k) then + i = i + 1 + _entry[i] = format_line(k, v) + end + end + table.sort(_entry) + + for _, a in ipairs(_entry) do + info = info .. a + end + + return info +end + +---@param head 0-index +local function get_key(body, head) + local matches = vim.fn.matchlist(body, [[^\v([-_:0-9a-zA-Z]+)\s*\=\s*]], head) + if vim.tbl_isempty(matches) then + return "", -1 + end + + return string.lower(matches[2]), head + vim.fn.strlen(matches[1]) +end + +local function parse_string(line, current, strings) + current.level = current.level + + vim.fn.count(line, "{") + - vim.fn.count(line, "}") + if current.level > 0 then + current.body = current.body .. line + return false + end + + local body = current.body .. vim.fn.matchstr(line, [[.*\ze}]]) + + local matches = vim.fn.matchlist(body, [[\v^\s*(\S+)\s*\=\s*"(.*)"\s*$]]) + if not vim.tbl_isempty(matches) and not vim.fn.empty(matches[2]) == 1 then + strings[matches[2]] = matches[3] + end + + matches = vim.fn.matchlist(body, [[\v^\s*(\S+)\s*\=\s*\{(.*)\}\s*$]]) + if not vim.tbl_isempty(matches) and not vim.fn.empty(matches[2]) == 1 then + strings[matches[2]] = matches[3] + end + + return true +end + +local function parse_entry(line, current, entries) + current.level = current.level + + vim.fn.count(line, "{") + - vim.fn.count(line, "}") + if current.level > 0 then + current.body = current.body .. line + return false + end + + current.body = current.body .. vim.fn.matchstr(line, [[.*\ze}]]) + table.insert(entries, current) + return true +end + +local function parse_type(file, lnum, line, current, strings, entries) + local matches = vim.fn.matchlist(line, [[\v^\@(\w+)\s*\{\s*(.*)]]) + if vim.tbl_isempty(matches) then + return true + end + + local type = string.lower(matches[2]) + local types = { preamble = 1, comment = 1 } + if types[type] ~= nil then + return true + end + + current.level = 1 + current.body = "" + current.vimtex_file = file + current.vimtex_lnum = lnum + current.type = type + + if current.type == "string" then + return parse_string(matches[3], current, strings) + end + + matches = vim.fn.matchlist(matches[3], [[\v^([^, ]*)\s*,\s*(.*)]]) + current.key = matches[2] + if not vim.fn.empty(matches[3]) == 1 then + return parse_entry(matches[3], current, entries) + end + + return false +end + +---@param head 0-index +local function get_value_string(body, head, strings) + local value + local head_1 + if body:sub(head + 1, head + 1) == "{" then + local sum = 1 + local i1 = head + 1 + local i0 = i1 + + while sum > 0 do + local match + local res = vim.fn.matchstrpos(body, [=[[{}]]=], i1) + match, _, i1 = res[1], res[2], res[3] + res = nil + + if i1 < 0 then + break + end + + i0 = i1 + sum = sum + (match == "{" and 1 or -1) + end + + value = body:sub(head + 1 + 1, i0 - 2 + 1) + head_1 = vim.fn.matchend(body, [[^\s*]], i0) + elseif body:sub(head + 1, head + 1) == [["]] then + local index = vim.fn.match(body, [[\\\@= 0 do + if vim.fn.empty(key) == 1 then + key, pos = get_key(entry.body, pos) + else + local value + value, pos = get_value(entry.body, pos, strings) + entry[key] = value + key = "" + end + end + + entry.body = nil + return entry +end + +local M = {} +M.parse = function(file) + if file == nil or not vim.fn.filereadable(file) then + return {} + end + + local entries = {} + local strings = {} + local current = {} + + local lines = vim.fn.readfile(file) + for lnum = 1, #lines do + local line = lines[lnum] + + if vim.tbl_isempty(current) then + if parse_type(file, lnum, line, current, strings, entries) then + current = {} + end + goto continue + end + + if current.type == "string" then + if parse_string(line, current, strings) then + current = {} + end + else + if parse_entry(line, current, entries) then + current = {} + end + end + + ::continue:: + end + + local result = {} + for _, v in pairs(entries) do + local entry = parse_entry_body(v, strings) + entry.formatted_info = format_entry(entry) + table.insert(result, entry) + end + + return result +end + +return M diff --git a/test/test-completion-bibtex-speed/Makefile b/test/test-completion-bibtex-speed/Makefile index 926123fdce..c1ca76c20a 100644 --- a/test/test-completion-bibtex-speed/Makefile +++ b/test/test-completion-bibtex-speed/Makefile @@ -5,6 +5,7 @@ MYVIM ?= nvim --clean --headless test: @INMAKE=1 $(MYVIM) -u bibspeed.vim @INMAKE=1 BACKEND=vim $(MYVIM) -u bibspeed.vim + @INMAKE=1 BACKEND=lua $(MYVIM) -u bibspeed.vim @#INMAKE=1 BACKEND=bibparse $(MYVIM) -u bibspeed.vim @#INMAKE=1 BACKEND=bibtexparser $(MYVIM) -u bibspeed.vim @rm -f nvim_servernames.log