From de91b7bffb20fb0287484437ef1b05b4124d0994 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Karl=20Yngve=20Lerv=C3=A5g?= <karl.yngve+git@gmail.com>
Date: Sun, 22 Oct 2023 13:37:43 +0200
Subject: [PATCH 1/2] feat: added new bib parser in lua

This is similar to the Vimscript parser ("vim"), but since it is in Lua
it is much faster and comparable to the current fastest parser
("bibtex").

refer: #2786
---
 .luarc.json                                   |  13 +
 .stylua.toml                                  |   5 +
 autoload/vimtex/context/cite.vim              |  11 +-
 autoload/vimtex/options.vim                   |   4 +-
 autoload/vimtex/parser/bib.vim                |  18 +-
 doc/vimtex.txt                                |  18 +-
 lua/vimtex/bibparser.lua                      | 235 ++++++++++++++++++
 test/test-completion-bibtex-speed/Makefile    |   3 +-
 .../test-completion-bibtex-speed/bibspeed.vim |   5 +-
 test/test-completion-bibtex/Makefile          |   3 +-
 test/test-completion-bibtex/test_backend.vim  |   5 +-
 test/test-completion-bibtex/test_cache.vim    |   1 +
 test/test-completion-bibtex/test_matchstr.vim |   1 +
 test/test-parser-bib/test.bib                 |  24 +-
 test/test-parser-bib/test.vim                 |  65 +++--
 15 files changed, 358 insertions(+), 53 deletions(-)
 create mode 100644 .luarc.json
 create mode 100644 .stylua.toml
 create mode 100644 lua/vimtex/bibparser.lua

diff --git a/.luarc.json b/.luarc.json
new file mode 100644
index 0000000000..d06245585f
--- /dev/null
+++ b/.luarc.json
@@ -0,0 +1,13 @@
+{
+  "$schema": "https://raw.githubusercontent.com/LuaLS/vscode-lua/master/setting/schema.json",
+  "runtime": {
+    "version": "LuaJIT"
+  },
+  "workspace": {
+    "library": [
+      "$VIMRUNTIME",
+      "${3rd}/luv/library"
+    ],
+    "checkThirdParty": false
+  }
+}
diff --git a/.stylua.toml b/.stylua.toml
new file mode 100644
index 0000000000..c4b052ab72
--- /dev/null
+++ b/.stylua.toml
@@ -0,0 +1,5 @@
+column_width = 80
+indent_type = "Spaces"
+indent_width = 2
+quote_style = "AutoPreferDouble"
+call_parentheses = "None"
diff --git a/autoload/vimtex/context/cite.vim b/autoload/vimtex/context/cite.vim
index 7dc8f0feed..5e409296fc 100644
--- a/autoload/vimtex/context/cite.vim
+++ b/autoload/vimtex/context/cite.vim
@@ -48,7 +48,10 @@ function! s:handler.get_actions() abort dict " {{{1
   call vimtex#paths#pushd(b:vimtex.root)
   let l:entries = []
   for l:file in vimtex#bib#files()
-    let l:entries += vimtex#parser#bib(l:file, {'backend': 'vim'})
+    let l:entries += vimtex#parser#bib(
+          \ l:file,
+          \ {'backend': has('nvim') ? 'lua' : 'vim'}
+          \)
   endfor
   call vimtex#paths#popd()
 
@@ -119,7 +122,7 @@ function! s:actions.show() abort dict " {{{1
         \ ['Normal', ','],
         \])
 
-  for l:x in ['key', 'type', 'vimtex_lnum', 'vimtex_file']
+  for l:x in ['key', 'type', 'source_lnum', 'source_file']
     if has_key(l:entry, l:x)
       call remove(l:entry, l:x)
     endif
@@ -144,10 +147,10 @@ endfunction
 
 " }}}1
 function! s:actions.edit() abort dict " {{{1
-  execute 'edit' self.entry.vimtex_file
+  execute 'edit' self.entry.source_file
   filetype detect
 
-  call vimtex#pos#set_cursor(self.entry.vimtex_lnum, 0)
+  call vimtex#pos#set_cursor(self.entry.source_lnum, 0)
   normal! zv
 endfunction
 
diff --git a/autoload/vimtex/options.vim b/autoload/vimtex/options.vim
index 35f11ed700..69abf3d3b0 100644
--- a/autoload/vimtex/options.vim
+++ b/autoload/vimtex/options.vim
@@ -311,7 +311,9 @@ function! vimtex#options#init() abort " {{{1
   call s:init_option('vimtex_lint_chktex_ignore_warnings',
         \ '-n1 -n3 -n8 -n25 -n36')
 
-  call s:init_option('vimtex_parser_bib_backend', 'bibtex')
+  call s:init_option('vimtex_parser_bib_backend',
+        \ has('nvim') ? 'lua' : 'bibtex'
+        \)
   call s:init_option('vimtex_parser_cmd_separator_check',
         \ 'vimtex#cmd#parser_separator_check')
 
diff --git a/autoload/vimtex/parser/bib.vim b/autoload/vimtex/parser/bib.vim
index ce83e1cd9c..86a54f13cf 100644
--- a/autoload/vimtex/parser/bib.vim
+++ b/autoload/vimtex/parser/bib.vim
@@ -250,6 +250,18 @@ endfunction
 
 " }}}1
 
+function! s:parse_with_lua(file) abort " {{{1
+  if !has('nvim')
+    call vimtex#log#error(
+          \ 'bib parser backend "lua" only works with neovim!')
+    return []
+  endif
+
+  return luaeval('require("vimtex.bibparser").parse(_A)', a:file)
+endfunction
+
+" }}}1
+
 function! s:parse_with_vim(file) abort " {{{1
   " Adheres to the format description found here:
   " http://www.bibtex.org/Format/
@@ -297,8 +309,8 @@ function! s:parse_type(file, lnum, line, current, strings, entries) abort " {{{1
 
   let a:current.level = 1
   let a:current.body = ''
-  let a:current.vimtex_file = a:file
-  let a:current.vimtex_lnum = a:lnum
+  let a:current.source_file = a:file
+  let a:current.source_lnum = a:lnum
 
   if l:type ==# 'string'
     return s:parse_string(l:matches[2], a:current, a:strings)
@@ -420,7 +432,7 @@ function! s:get_value_string(body, head, strings) abort " {{{1
   elseif a:body[a:head] ==# '"'
     let l:index = match(a:body, '\\\@<!"', a:head+1)
     if l:index < 0
-      return ['s:get_value_string failed', '']
+      return ['s:get_value_string failed', -1]
     endif
 
     let l:value = a:body[a:head+1:l:index-1]
diff --git a/doc/vimtex.txt b/doc/vimtex.txt
index 715b941235..5b407a2a2e 100644
--- a/doc/vimtex.txt
+++ b/doc/vimtex.txt
@@ -1310,8 +1310,14 @@ OPTIONS                                                        *vimtex-options*
   This option sets the desired default backend for parsing bibliographies.
   This is used e.g. for gathering completion candidates. Possible values:
 
-    `bibtex`:   The fastest, but most hacky solution. Should work well in most
-              cases.
+    `bibtex`:   The fastest, but most "hacky" solution. Still, time has proved
+              that this works well!
+
+    `vim`:      The slowest but perhaps most robust solution, as it does not
+              require any external utilities.
+
+    `lua`:      A Lua implementation of the Vim backend. About as fast as the
+              `bibtex` parser, but this only works on Neovim.
 
     `bibparse`: Also fast, but might be more robust.
 
@@ -1335,9 +1341,6 @@ OPTIONS                                                        *vimtex-options*
                     (see |if_pyth| and |py3|) and that the `bibtexparser`
                     Python module is installed and available.
 
-    `vim`:      The slowest but perhaps most robust solution, as it does not
-              require any external utilities.
-
   Some people may want to conditionally change this option if a backend is
   available. For example: >vim
 
@@ -1345,7 +1348,10 @@ OPTIONS                                                        *vimtex-options*
       let g:vimtex_parser_bib_backend = 'bibparse'
     endif
 <
-  Default value: `bibtex`
+  Default value:
+
+    Vim:    `bibtex`
+    Neovim: `lua`
 
 *g:vimtex_parser_cmd_separator_check*
   This option specifies the policy for deciding whether successive groups of
diff --git a/lua/vimtex/bibparser.lua b/lua/vimtex/bibparser.lua
new file mode 100644
index 0000000000..fb90c9a1a8
--- /dev/null
+++ b/lua/vimtex/bibparser.lua
@@ -0,0 +1,235 @@
+-- VimTeX - LaTeX plugin for Vim
+--
+-- Maintainer: Karl Yngve Lervåg
+-- Email:      karl.yngve@gmail.com
+--
+
+---Parse input line as middle or tail part of an entry
+---@param item table The current entry
+---@param line string The new line to parse
+---@return table item Current entry with updated body
+local function parse_tail(item, line)
+  item.level = item.level
+    + line:gsub("[^{]", ""):len()
+    - line:gsub("[^}]", ""):len()
+  if item.level > 0 then
+    item.body = item.body .. line
+  else
+    item.body = item.body .. vim.fn.matchstr(line, [[.*\ze}]])
+    item.parsed = true
+  end
+
+  return item
+end
+
+---Parse the head part of an entry
+---@param file string The path to the bibtex file-asd
+---@param lnum integer The line number for the entry
+---@param line string The line content of the entry
+---@return table item Current entry with updated body
+local function parse_head(file, lnum, line)
+  local matches = vim.fn.matchlist(line, [[\v^\@(\w+)\s*\{\s*(.*)]])
+  if #matches == 0 then
+    return {}
+  end
+
+  local type = string.lower(matches[2])
+  if type == "preamble" or type == "comment" then
+    return {}
+  end
+
+  return parse_tail({
+    level = 1,
+    body = "",
+    source_file = file,
+    source_lnum = lnum,
+    type = type,
+  }, matches[3])
+end
+
+---Parse the value part of a bib entry tag until separating comma or end.
+---The value is likely a quoted string and may possibly be a concatenation of
+---strings. The value may also contain abbreviations defined by @string
+---entries.
+---@param body string
+---@param head integer
+---@param strings table<string, string>
+---@param pre_value string
+---@return string value The parsed value
+---@return integer head New head position
+local function get_tag_value_concat(body, head, strings, pre_value)
+  local value = ""
+  local new_head = head
+
+  if body:sub(head + 1, head + 1) == "{" then
+    local sum = 1
+    local i = head + 1
+    local n = #body
+
+    while sum > 0 and i <= n do
+      local char = body:sub(i + 1, i + 1)
+      if char == "{" then
+        sum = sum + 1
+      elseif char == "}" then
+        sum = sum - 1
+      end
+
+      i = i + 1
+    end
+
+    value = body:sub(head + 2, i - 1)
+    new_head = vim.fn.matchend(body, [[^\s*]], i)
+  elseif body:sub(head + 1, head + 1) == [["]] then
+    local index = vim.fn.match(body, [[\\\@<!"]], head + 1)
+    if index < 0 then
+      return "bibparser.lua: get_tag_value_concat failed", -1
+    end
+
+    value = body:sub(head + 1 + 1, index - 1 + 1)
+    new_head = vim.fn.matchend(body, [[^\s*]], index + 1)
+  elseif vim.fn.match(body, [[^\w]], head) >= 0 then
+    value = vim.fn.matchstr(body, [[^\w[0-9a-zA-Z_-]*]], head)
+    new_head = vim.fn.matchend(body, [[^\s*]], head + vim.fn.strlen(value))
+    value = vim.fn.get(strings, value, [[@(]] .. value .. [[)]])
+  end
+
+  if body:sub(new_head + 1, new_head + 1) == "#" then
+    new_head = vim.fn.matchend(body, [[^\s*]], new_head + 1)
+    return get_tag_value_concat(body, new_head, strings, pre_value .. value)
+  end
+
+  return pre_value .. value, vim.fn.matchend(body, [[^,\s*]], new_head)
+end
+
+---Parse the value part of a bib entry tag until separating comma or end.
+---@param body string
+---@param head integer
+---@param strings table<string, string>
+---@return string value The parsed value
+---@return integer head New head position
+local function get_tag_value(body, head, strings)
+  -- First check if the value is simply a number
+  if vim.regex([[\d]]):match_str(body:sub(head + 1, head + 1)) then
+    local value = vim.fn.matchstr(body, [[^\d\+]], head)
+    local new_head =
+      vim.fn.matchend(body, [[^\s*,\s*]], head + vim.fn.len(value))
+    return value, new_head
+  end
+
+  return get_tag_value_concat(body, head, strings, "")
+end
+
+---Parse tag from string (e.g. author, title, etc)
+---@param body string Raw text in which to find tag
+---@param head integer Where to start search for tag
+---@return string tag_name The parsed tag
+---@return integer head New head position
+local function get_tag_name(body, head)
+  local matches = vim.fn.matchlist(body, [[^\v([-_:0-9a-zA-Z]+)\s*\=\s*]], head)
+  if #matches == 0 then
+    return "", -1
+  end
+
+  return string.lower(matches[2]), head + vim.fn.strlen(matches[1])
+end
+
+---Parse an item
+---@param item table
+---@param strings table<string, string>
+---@return nil
+local function parse_item(item, strings)
+  local parts = vim.fn.matchlist(item.body, [[\v^([^, ]*)\s*,\s*(.*)]])
+
+  item.key = parts[2]
+  if item.key == nil or item.key == "" then
+    return nil
+  end
+
+  item.level = nil
+  item.parsed = nil
+  item.body = nil
+
+  local body = parts[3]
+  local tag = ""
+  local value
+  local head = 0
+  while head >= 0 do
+    if tag == "" then
+      tag, head = get_tag_name(body, head)
+    else
+      value, head = get_tag_value(body, head, strings)
+      item[tag] = value
+      tag = ""
+    end
+  end
+
+  return item
+end
+
+---Parse a string entry
+---@param raw_string string
+---@return string key
+---@return string value
+local function parse_string(raw_string)
+  local matches =
+    vim.fn.matchlist(raw_string, [[\v^\s*(\S+)\s*\=\s*"(.*)"\s*$]])
+  if vim.fn.empty(matches[3]) == 0 then
+    return matches[2], matches[3]
+  end
+
+  matches = vim.fn.matchlist(raw_string, [[\v^\s*(\S+)\s*\=\s*\{(.*)\}\s*$]])
+  if vim.fn.empty(matches[3]) == 0 then
+    return matches[2], matches[3]
+  end
+
+  return "", ""
+end
+
+local M = {}
+
+---Parse the specified bibtex file
+---The parser adheres to the format description found here:
+---http://www.bibtex.org/Format/
+---@param file string
+---@return table[]
+M.parse = function(file)
+  if file == nil or not vim.fn.filereadable(file) then
+    return {}
+  end
+
+  local items = {}
+  local strings = {}
+
+  local item = {}
+  local key, value
+  local lines = vim.fn.readfile(file)
+  for lnum = 1, #lines do
+    local line = lines[lnum]
+
+    if vim.tbl_isempty(item) then
+      item = parse_head(file, lnum, line)
+    else
+      item = parse_tail(item, line)
+    end
+
+    if item.parsed then
+      if item.type == "string" then
+        key, value = parse_string(item.body)
+        if key ~= "" then
+          strings[key] = value
+        end
+      else
+        table.insert(items, item)
+      end
+      item = {}
+    end
+  end
+
+  local result = {}
+  for _, x in ipairs(items) do
+    table.insert(result, parse_item(x, strings))
+  end
+  return result
+end
+
+return M
diff --git a/test/test-completion-bibtex-speed/Makefile b/test/test-completion-bibtex-speed/Makefile
index 926123fdce..c50dcc9dbe 100644
--- a/test/test-completion-bibtex-speed/Makefile
+++ b/test/test-completion-bibtex-speed/Makefile
@@ -3,8 +3,9 @@
 MYVIM ?= nvim --clean --headless
 
 test:
-	@INMAKE=1                      $(MYVIM) -u bibspeed.vim
+	@INMAKE=1 BACKEND=bibtex       $(MYVIM) -u bibspeed.vim
 	@INMAKE=1 BACKEND=vim          $(MYVIM) -u bibspeed.vim
+	@INMAKE=1 BACKEND=lua          $(MYVIM) -u bibspeed.vim
 	@#INMAKE=1 BACKEND=bibparse     $(MYVIM) -u bibspeed.vim
 	@#INMAKE=1 BACKEND=bibtexparser $(MYVIM) -u bibspeed.vim
 	@rm -f nvim_servernames.log
diff --git a/test/test-completion-bibtex-speed/bibspeed.vim b/test/test-completion-bibtex-speed/bibspeed.vim
index 7b6fbc26ba..d89a93de8e 100644
--- a/test/test-completion-bibtex-speed/bibspeed.vim
+++ b/test/test-completion-bibtex-speed/bibspeed.vim
@@ -6,7 +6,6 @@ nnoremap q :qall!<cr>
 
 let g:vimtex_cache_root = '.'
 let g:vimtex_cache_persistent = 0
-
 if !empty($BACKEND)
   let g:vimtex_parser_bib_backend = $BACKEND
 endif
@@ -15,6 +14,10 @@ silent edit bibspeed.tex
 
 if empty($INMAKE) | finish | endif
 
+if g:vimtex_parser_bib_backend ==# 'lua' && !has('nvim')
+  call vimtex#test#finished()
+endif
+
 normal! 10G
 
 " execute 'profile start' 'bibspeed-' . g:vimtex_parser_bib_backend . '.log'
diff --git a/test/test-completion-bibtex/Makefile b/test/test-completion-bibtex/Makefile
index 5a5a760861..07a8f87d70 100644
--- a/test/test-completion-bibtex/Makefile
+++ b/test/test-completion-bibtex/Makefile
@@ -5,8 +5,7 @@ export INMAKE
 
 TESTS := $(wildcard test*.vim)
 TESTS := $(filter-out test_backend,$(TESTS:.vim=))
-BACKENDS := bibtex vim
-# BACKENDS := $(BACKENDS) bibparse bibtexparser
+BACKENDS := bibtex vim lua
 
 .PHONY: test $(TESTS) $(BACKENDS) test-cleanup
 
diff --git a/test/test-completion-bibtex/test_backend.vim b/test/test-completion-bibtex/test_backend.vim
index 24bd0edfab..5b58c08abb 100644
--- a/test/test-completion-bibtex/test_backend.vim
+++ b/test/test-completion-bibtex/test_backend.vim
@@ -6,7 +6,6 @@ nnoremap q :qall!<cr>
 
 let g:vimtex_cache_root = '.'
 let g:vimtex_cache_persistent = 0
-
 if !empty($BACKEND)
   let g:vimtex_parser_bib_backend = $BACKEND
 endif
@@ -15,6 +14,10 @@ silent edit test_backend.tex
 
 if empty($INMAKE) | finish | endif
 
+if g:vimtex_parser_bib_backend ==# 'lua' && !has('nvim')
+  call vimtex#test#finished()
+endif
+
 " Simplify test on basic systems
 if empty(vimtex#kpsewhich#find('biblatex-examples.bib'))
   let s:candidates = vimtex#test#completion('\cite{', '')
diff --git a/test/test-completion-bibtex/test_cache.vim b/test/test-completion-bibtex/test_cache.vim
index 57f4a0b9dd..814dc2e84b 100644
--- a/test/test-completion-bibtex/test_cache.vim
+++ b/test/test-completion-bibtex/test_cache.vim
@@ -5,6 +5,7 @@ filetype plugin on
 nnoremap q :qall!<cr>
 
 let g:vimtex_cache_root = '.'
+let g:vimtex_parser_bib_backend = 'bibtex'
 
 silent edit test_cache.tex
 
diff --git a/test/test-completion-bibtex/test_matchstr.vim b/test/test-completion-bibtex/test_matchstr.vim
index 03e3d67a50..f3454a10c1 100644
--- a/test/test-completion-bibtex/test_matchstr.vim
+++ b/test/test-completion-bibtex/test_matchstr.vim
@@ -6,6 +6,7 @@ nnoremap q :qall!<cr>
 
 let g:vimtex_cache_root = '.'
 let g:vimtex_cache_persistent = 0
+let g:vimtex_parser_bib_backend = 'bibtex'
 let g:vimtex_complete_bib = {'match_str_fmt':  '@key @author_all @year "@title"'}
 
 silent edit test_matchstr.tex
diff --git a/test/test-parser-bib/test.bib b/test/test-parser-bib/test.bib
index 4790518186..3128b850f7 100644
--- a/test/test-parser-bib/test.bib
+++ b/test/test-parser-bib/test.bib
@@ -1,5 +1,6 @@
 @string{ test= "something" }
 @string{ name1 = "Mr. Foo" }
+@string{anch-ie = {Angew.~Chem. Int.~Ed.}}
 
 @comment{
   blahrg
@@ -8,19 +9,13 @@ @comment{
 @preamble{silly things
 }
 
-@SomeType{key,
+@SomeType{key1,
   title  = "Some title, with a comma in it",
   year   = {2017},
   author = "Author1 and Author2",
   other  = {Something else}
 }
 
-@article{knuth,
-  title  = "Other title",
-  year   = {1938},
-  author = "Donald Knuth",
-}
-
 @misc{key2,
   title = {A new title},
   author = name1 # " and Mr. Bar",
@@ -39,9 +34,20 @@ @misc{key4
   
 }
 
-@article{knuth-single-line, title  = "Other title", year   = {1938}, author = "Donald Knuth", }
+@misc{key5,
+  author = {text here } # test,
+  title = "title: " # anch-ie
+}
 
-@errorintags{key4,
+@errorintags{key6,
   title = {some title}
   author = "should not work",
 }
+
+@article{knuth,
+  title  = "Other title",
+  year   = {1938},
+  author = "Donald Knuth",
+}
+
+@article{knuth-single-line, title  = "Other title", year   = {1938}, author = "Donald Knuth", }
diff --git a/test/test-parser-bib/test.vim b/test/test-parser-bib/test.vim
index a42c8727c8..5f578faf31 100644
--- a/test/test-parser-bib/test.vim
+++ b/test/test-parser-bib/test.vim
@@ -6,36 +6,51 @@ function! TestBackend(bibfile, backend) abort
   return vimtex#parser#bib(a:bibfile)
 endfunction
 
+let s:backends = ['bibtex', 'vim']
+if has('nvim')
+  call add(s:backends, 'lua')
+endif
 
-let s:parsed = TestBackend('test.bib', 'bibtex')
-call assert_equal(6, len(s:parsed))
+for s:backend in s:backends
+  let s:parsed = TestBackend('test.bib', s:backend)
+  call assert_equal(8, len(s:parsed),
+        \ "Failed for backend: " . s:backend)
+
+  for s:entry in s:parsed
+    if s:entry.key == 'key5'
+      call assert_match(
+            \ 'text.here something',
+            \ get(s:entry, 'author', ''),
+            \ "Failed for backend: " . s:backend)
+      call assert_match(
+            \ '^title: Angew',
+            \ get(s:entry, 'title', ''),
+            \ "Failed for backend: " . s:backend)
+    endif
+  endfor
+endfor
+
+" Check that Vim and Lua parser give the same result
+if has('nvim')
+  let s:parsed_lua = TestBackend('test.bib', 'lua')
+  let s:parsed_vim = TestBackend('test.bib', 'vim')
+  call assert_equal(len(s:parsed_lua), len(s:parsed_vim))
+  for s:i in range(len(s:parsed_lua))
+    call assert_equal(s:parsed_lua[s:i], s:parsed_vim[s:i])
+  endfor
+endif
 
-let s:parsed = TestBackend('test.bib', 'vim')
-call assert_equal(7, len(s:parsed))
+let s:bib = vimtex#kpsewhich#find('biblatex-examples.bib')
+if !empty(s:bib) && filereadable(s:bib)
+  for s:backend in s:backends
+    let s:parsed = TestBackend(s:bib, s:backend)
+    call assert_equal(92, len(s:parsed),
+          \ "Failed for backend: " . s:backend)
+  endfor
+endif
 
 call vimtex#log#set_silent()
 let s:parsed = TestBackend('test.bib', 'badparser')
 call assert_equal(0, len(s:parsed))
 
-" let s:parsed = TestBackend('test.bib', 'bibparse')
-" call assert_equal(7, len(s:parsed))
-
-" let s:parsed = TestBackend('test.bib', 'bibtexparser')
-" call assert_equal(5, len(s:parsed))
-
-let s:bib = vimtex#kpsewhich#find('biblatex-examples.bib')
-if !empty(s:bib) && filereadable(s:bib)
-  let s:parsed = TestBackend(s:bib, 'bibtex')
-  call assert_equal(92, len(s:parsed))
-
-  let s:parsed = TestBackend(s:bib, 'vim')
-  call assert_equal(92, len(s:parsed))
-
-  " let s:parsed = TestBackend(s:bib, 'bibparse')
-  " call assert_equal(92, len(s:parsed))
-
-  " let s:parsed = TestBackend(s:bib, 'bibtexparser')
-  " call assert_equal(92, len(s:parsed))
-endif
-
 call vimtex#test#finished()

From 0aa412d8b9589fadf363e63f9dcad6980453f77b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Karl=20Yngve=20Lerv=C3=A5g?= <karl.yngve+git@gmail.com>
Date: Wed, 25 Oct 2023 19:11:48 +0200
Subject: [PATCH 2/2] feat: improve vimscript implementation of bib parser

---
 autoload/vimtex/parser/bib.vim | 156 ++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 82 deletions(-)

diff --git a/autoload/vimtex/parser/bib.vim b/autoload/vimtex/parser/bib.vim
index 86a54f13cf..17905f145e 100644
--- a/autoload/vimtex/parser/bib.vim
+++ b/autoload/vimtex/parser/bib.vim
@@ -270,117 +270,111 @@ function! s:parse_with_vim(file) abort " {{{1
     return []
   endif
 
-  let l:current = {}
+  let l:items = []
   let l:strings = {}
-  let l:entries = []
+
+  let l:item = {}
   let l:lnum = 0
   for l:line in readfile(a:file)
     let l:lnum += 1
 
-    if empty(l:current)
-      if s:parse_type(a:file, l:lnum, l:line, l:current, l:strings, l:entries)
-        let l:current = {}
-      endif
-      continue
-    endif
+    let l:item = empty(l:item)
+          \ ? s:parse_head(a:file, l:lnum, l:line)
+          \ : s:parse_tail(l:item, l:line)
 
-    if l:current.type ==# 'string'
-      if s:parse_string(l:line, l:current, l:strings)
-        let l:current = {}
-      endif
-    else
-      if s:parse_entry(l:line, l:current, l:entries)
-        let l:current = {}
+    if has_key(l:item, 'parsed')
+      if l:item.type == "string"
+        let [l:key, l:value] = s:parse_string(l:item.body)
+        if !empty(l:key)
+          let l:strings[l:key] = l:value
+        endif
+      else
+        call add(l:items, l:item)
       endif
+      let l:item = {}
     endif
   endfor
 
-  return map(l:entries, 's:parse_entry_body(v:val, l:strings)')
+  return map(l:items, 's:parse_item(v:val, l:strings)')
 endfunction
 
 " }}}1
 
-function! s:parse_type(file, lnum, line, current, strings, entries) abort " {{{1
+function! s:parse_head(file, lnum, line) abort " {{{1
   let l:matches = matchlist(a:line, '\v^\@(\w+)\s*\{\s*(.*)')
-  if empty(l:matches) | return 0 | endif
+  if empty(l:matches) | return {} | endif
 
   let l:type = tolower(l:matches[1])
-  if index(['preamble', 'comment'], l:type) >= 0 | return 0 | endif
-
-  let a:current.level = 1
-  let a:current.body = ''
-  let a:current.source_file = a:file
-  let a:current.source_lnum = a:lnum
+  if l:type == 'preamble' || l:type == 'comment' | return {} | endif
+
+  return s:parse_tail({
+        \ 'level': 1,
+        \ 'body': '',
+        \ 'source_file': a:file,
+        \ 'source_lnum': a:lnum,
+        \ 'type': l:type,
+        \}, l:matches[2])
+endfunction
 
-  if l:type ==# 'string'
-    return s:parse_string(l:matches[2], a:current, a:strings)
+" }}}1
+function! s:parse_tail(item, line) abort " {{{1
+  let a:item.level += s:count(a:line, '{') - s:count(a:line, '}')
+  if a:item.level > 0
+    let a:item.body .= a:line
   else
-    let l:matches = matchlist(l:matches[2], '\v^([^, ]*)\s*,\s*(.*)')
-    let a:current.type = l:type
-    let a:current.key = l:matches[1]
-
-    return empty(l:matches[2])
-          \ ? 0
-          \ : s:parse_entry(l:matches[2], a:current, a:entries)
+    let a:item.body .= matchstr(a:line, '.*\ze}')
+    let a:item.parsed = v:true
   endif
+
+  return a:item
 endfunction
 
 " }}}1
-function! s:parse_string(line, string, strings) abort " {{{1
-  let a:string.level += s:count(a:line, '{') - s:count(a:line, '}')
-  if a:string.level > 0
-    let a:string.body .= a:line
-    return 0
+function! s:parse_string(raw_string) abort " {{{1
+  let l:matches = matchlist(a:raw_string, '\v^\s*(\S+)\s*\=\s*"(.*)"\s*$')
+  if !empty(l:matches) && !empty(l:matches[1])
+    return [l:matches[1], l:matches[2]]
   endif
 
-  let a:string.body .= matchstr(a:line, '.*\ze}')
-
-  let l:matches = matchlist(a:string.body, '\v^\s*(\w+)\s*\=\s*"(.*)"\s*$')
+  let l:matches = matchlist(a:raw_string, '\v^\s*(\S+)\s*\=\s*\{(.*)\}\s*$')
   if !empty(l:matches) && !empty(l:matches[1])
-    let a:strings[l:matches[1]] = l:matches[2]
+    return [l:matches[1], l:matches[2]]
   endif
 
-  return 1
+  return ['', '']
 endfunction
 
 " }}}1
-function! s:parse_entry(line, entry, entries) abort " {{{1
-  let a:entry.level += s:count(a:line, '{') - s:count(a:line, '}')
-  if a:entry.level > 0
-    let a:entry.body .= a:line
-    return 0
-  endif
 
-  let a:entry.body .= matchstr(a:line, '.*\ze}')
+function! s:parse_item(item, strings) abort " {{{1
+  let l:parts = matchlist(a:item.body, '\v^([^, ]*)\s*,\s*(.*)')
 
-  call add(a:entries, a:entry)
-  return 1
-endfunction
-
-" }}}1
+  let a:item.key = l:parts[1]
+  if empty(a:item.key) | return {} | endif
 
-function! s:parse_entry_body(entry, strings) abort " {{{1
-  unlet a:entry.level
+  unlet a:item.level
+  unlet a:item.body
+  unlet a:item.parsed
 
-  let l:key = ''
-  let l:pos = matchend(a:entry.body, '^\s*')
-  while l:pos >= 0
-    if empty(l:key)
-      let [l:key, l:pos] = s:get_key(a:entry.body, l:pos)
+  let l:body = l:parts[2]
+  let l:tag = ''
+  let l:head = 0
+  while l:head >= 0
+    if empty(l:tag)
+      let [l:tag, l:head] = s:get_tag_name(l:body, l:head)
     else
-      let [l:value, l:pos] = s:get_value(a:entry.body, l:pos, a:strings)
-      let a:entry[l:key] = l:value
-      let l:key = ''
+      let [l:value, l:head] = s:get_tag_value(l:body, l:head, a:strings)
+      let a:item[l:tag] = l:value
+      let l:tag = ''
     endif
   endwhile
 
-  unlet a:entry.body
-  return a:entry
+  return a:item
 endfunction
 
 " }}}1
-function! s:get_key(body, head) abort " {{{1
-  " Parse the key part of a bib entry tag.
+function! s:get_tag_name(body, head) abort " {{{1
+  " Parse the name part of a bib entry tag.
   " Assumption: a:body is left trimmed and either empty or starts with a key.
   " Returns: The key and the remaining part of the entry body.
 
@@ -391,7 +385,7 @@ function! s:get_key(body, head) abort " {{{1
 endfunction
 
 " }}}1
-function! s:get_value(body, head, strings) abort " {{{1
+function! s:get_tag_value(body, head, strings) abort " {{{1
   " Parse the value part of a bib entry tag, until separating comma or end.
   " Assumption: a:body is left trimmed and either empty or starts with a value.
   " Returns: The value and the remaining part of the entry body.
@@ -405,15 +399,16 @@ function! s:get_value(body, head, strings) abort " {{{1
     let l:value = matchstr(a:body, '^\d\+', a:head)
     let l:head = matchend(a:body, '^\s*,\s*', a:head + len(l:value))
     return [l:value, l:head]
-  else
-    return s:get_value_string(a:body, a:head, a:strings)
   endif
 
-  return ['s:get_value failed', -1]
+  return s:get_tag_value_concat(a:body, a:head, a:strings, "")
 endfunction
 
 " }}}1
-function! s:get_value_string(body, head, strings) abort " {{{1
+function! s:get_tag_value_concat(body, head, strings, pre_value) abort " {{{1
+  let l:value = ""
+  let l:head = a:head
+
   if a:body[a:head] ==# '{'
     let l:sum = 1
     let l:i1 = a:head + 1
@@ -432,27 +427,24 @@ function! s:get_value_string(body, head, strings) abort " {{{1
   elseif a:body[a:head] ==# '"'
     let l:index = match(a:body, '\\\@<!"', a:head+1)
     if l:index < 0
-      return ['s:get_value_string failed', -1]
+      return ['s:get_tag_value_concat failed', -1]
     endif
 
     let l:value = a:body[a:head+1:l:index-1]
     let l:head = matchend(a:body, '^\s*', l:index+1)
-    return [l:value, l:head]
   elseif a:body[a:head:] =~# '^\w'
-    let l:value = matchstr(a:body, '^\w\+', a:head)
+    let l:value = matchstr(a:body, '^\w[0-9a-zA-Z_-]*', a:head)
     let l:head = matchend(a:body, '^\s*', a:head + strlen(l:value))
     let l:value = get(a:strings, l:value, '@(' . l:value . ')')
-  else
-    let l:head = a:head
   endif
 
   if a:body[l:head] ==# '#'
     let l:head = matchend(a:body, '^\s*', l:head + 1)
-    let [l:vadd, l:head] = s:get_value_string(a:body, l:head, a:strings)
-    let l:value .= l:vadd
+    return s:get_tag_value_concat(
+          \ a:body, l:head, a:strings, a:pre_value . l:value)
   endif
 
-  return [l:value, matchend(a:body, '^,\s*', l:head)]
+  return [a:pre_value . l:value, matchend(a:body, '^,\s*', l:head)]
 endfunction
 
 " }}}1