[lldb] Add a compiler/interpreter of LLDB data formatter bytecode to lldb/examples #113398

adrian-prantl · 2024-10-22T23:40:22Z

This PR adds a proof-of-concept for a bytecode designed to ship and run LLDB data formatters. More motivation and context can be found in the formatter-bytecode.md file and on discourse.

https://discourse.llvm.org/t/a-bytecode-for-lldb-data-formatters/82696

llvmbot · 2024-10-22T23:40:59Z

@llvm/pr-subscribers-lldb

Author: Adrian Prantl (adrian-prantl)

Changes

This PR adds a proof-of-concept for a bytecode designed to ship and run LLDB data formatters. More motivation and context can be found in the formatter-bytecode.md file and on discourse.

Patch is 28.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113398.diff

5 Files Affected:

(added) lldb/examples/formatter-bytecode/Makefile (+8)
(added) lldb/examples/formatter-bytecode/compiler.py (+437)
(added) lldb/examples/formatter-bytecode/formatter-bytecode.md (+165)
(added) lldb/examples/formatter-bytecode/test/MyOptional.cpp (+23)
(added) lldb/examples/formatter-bytecode/test/formatter.py (+120)

diff --git a/lldb/examples/formatter-bytecode/Makefile b/lldb/examples/formatter-bytecode/Makefile
new file mode 100644
index 00000000000000..f544fea9d3f28d
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/Makefile
@@ -0,0 +1,8 @@
+all: test
+
+.PHONY: test
+test:
+	python3 compiler.py
+	mkdir -p _test
+	clang++ -std=c++17 test/MyOptional.cpp -g -o _test/MyOptional
+	lldb _test/MyOptional -o "command script import test/formatter.py" -o "b -p here" -o "r" -o "v x" -o "v y" -o q
diff --git a/lldb/examples/formatter-bytecode/compiler.py b/lldb/examples/formatter-bytecode/compiler.py
new file mode 100644
index 00000000000000..7bbaae78ab4be1
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/compiler.py
@@ -0,0 +1,437 @@
+"""
+Specification, compiler, disassembler, and interpreter
+for LLDB dataformatter bytecode.
+
+See formatter-bytecode.md for more details.
+"""
+from __future__ import annotations
+
+# Types
+type_String = 1
+type_Int = 2
+type_UInt = 3
+type_Object = 4
+type_Type = 5
+
+# Opcodes
+opcode = dict()
+def define_opcode(n, mnemonic, name):
+    globals()['op_'+name] = n
+    if mnemonic:
+        opcode[mnemonic] = n
+    opcode[n] = mnemonic
+
+define_opcode(1, 'dup', 'dup')
+define_opcode(2, 'drop', 'drop')
+define_opcode(3, 'pick', 'pick')
+define_opcode(4, 'over', 'over')
+define_opcode(5, 'swap', 'swap')
+define_opcode(6, 'rot', 'rot')
+
+define_opcode(0x10, '{', 'begin')
+define_opcode(0x11, 'if', 'if')
+define_opcode(0x12, 'ifelse', 'ifelse')
+
+define_opcode(0x20, None, 'lit_uint')
+define_opcode(0x21, None, 'lit_int')
+define_opcode(0x22, None, 'lit_string')
+define_opcode(0x23, None, 'lit_selector')
+
+define_opcode(0x30, '+', 'plus')
+define_opcode(0x31, '-', 'minus')
+define_opcode(0x32, '*', 'mul')
+define_opcode(0x33, '/', 'div')
+define_opcode(0x34, '%', 'mod')
+define_opcode(0x35, '<<', 'shl')
+define_opcode(0x36, '>>', 'shr')
+define_opcode(0x37, 'shra', 'shra')
+
+define_opcode(0x40, '&', 'and')
+define_opcode(0x41, '|', 'or')
+define_opcode(0x42, '^', 'xor')
+define_opcode(0x43, '~', 'not')
+
+define_opcode(0x50, '=', 'eq')
+define_opcode(0x51, '!=', 'neq')
+define_opcode(0x52, '<', 'lt')
+define_opcode(0x53, '>', 'gt')
+define_opcode(0x54, '=<', 'le')
+define_opcode(0x55, '>=', 'ge')
+
+define_opcode(0x60, 'call', 'call')
+
+# Function signatures
+sig_summary = 0
+sig_init = 1
+sig_get_num_children = 2
+sig_get_child_index = 3
+sig_get_child_at_index = 4
+
+# Selectors
+selector = dict()
+def define_selector(n, name):
+    globals()['sel_'+name] = n
+    selector['@'+name] = n
+    selector[n] = '@'+name
+
+define_selector(0, 'summary')
+define_selector(1, 'type_summary')
+
+define_selector(0x10, 'get_num_children')
+define_selector(0x11, 'get_child_at_index')
+define_selector(0x12, 'get_child_with_name')
+define_selector(0x13, 'get_child_index')
+define_selector(0x15, 'get_type')
+define_selector(0x16, 'get_template_argument_type')
+define_selector(0x20, 'get_value')
+define_selector(0x21, 'get_value_as_unsigned')
+define_selector(0x22, 'get_value_as_signed')
+define_selector(0x23, 'get_value_as_address')
+define_selector(0x24, 'cast')
+
+define_selector(0x40, 'read_memory_byte')
+define_selector(0x41, 'read_memory_uint32')
+define_selector(0x42, 'read_memory_int32')
+define_selector(0x43, 'read_memory_unsigned')
+define_selector(0x44, 'read_memory_signed')
+define_selector(0x45, 'read_memory_address')
+define_selector(0x46, 'read_memory')
+
+define_selector(0x50, 'fmt')
+define_selector(0x51, 'sprintf')
+define_selector(0x52, 'strlen')
+
+
+################################################################################
+# Compiler.
+################################################################################
+
+def compile(assembler: str) -> bytearray:
+    """Compile assembler into bytecode"""
+    # This is a stack of all in-flight/unterminated blocks.
+    bytecode = [bytearray()]
+
+    def emit(byte):
+        bytecode[-1].append(byte)
+
+    tokens = list(assembler.split(' '))
+    tokens.reverse()
+    while tokens:
+        tok = tokens.pop()
+        if tok == '': pass
+        elif tok == '{': bytecode.append(bytearray())
+        elif tok == '}':
+            block = bytecode.pop()
+            emit(op_begin)
+            emit(len(block)) # FIXME: uleb
+            bytecode[-1].extend(block)
+        elif tok[0].isdigit():
+            if tok[-1] == 'u':
+                emit(op_lit_uint)
+                emit(int(tok[:-1])) # FIXME
+            else:
+                emit(op_lit_int)
+                emit(int(tok)) # FIXME
+        elif tok[0] == '@':
+            emit(op_lit_selector)
+            emit(selector[tok])
+        elif tok[0] == '"':
+            s = bytearray()
+            done = False
+            chrs = tok[1:]
+            while not done:
+                quoted = False
+                for c in chrs:
+                    if quoted:
+                        s.append(ord(c)) #FIXME
+                        quoted = False
+                    elif c == '\\':
+                        quoted = True
+                    elif c == '"':
+                        done = True;
+                        break; # FIXME assert this is last in token
+                    else:
+                        s.append(ord(c))
+                if not done:
+                    s.append(ord(' '))
+                    chrs = tokens.pop()
+
+            emit(op_lit_string)
+            emit(len(s))
+            bytecode[-1].extend(s)
+        else:
+            emit(opcode[tok])
+    assert(len(bytecode) == 1) # unterminated {
+    return bytecode[0]
+
+
+################################################################################
+# Disassembler.
+################################################################################
+
+def disassemble(bytecode: bytearray) -> (str, int):
+    """Disassemble bytecode into (assembler, token starts)"""
+    asm = ""
+    all_bytes = list(bytecode)
+    all_bytes.reverse()
+    blocks = []
+    tokens = [0]
+    def next_byte():
+        """Fetch the next byte in the bytecode and keep track of all
+        in-flight blocks"""
+        for i in range(len(blocks)):
+            blocks[i] -= 1
+        tokens.append(len(asm))
+        return all_bytes.pop()
+
+    while all_bytes:
+        b = next_byte()
+        if b == op_begin:
+            asm += '{'
+            length = next_byte()
+            blocks.append(length)
+        elif b == op_lit_uint:
+            b = next_byte()
+            asm += str(b) # FIXME uleb
+            asm += 'u'
+        elif b == op_lit_int:
+            b = next_byte()
+            asm += str(b)
+        elif b == op_lit_selector:
+            b = next_byte()
+            asm += selector[b]
+        elif b == op_lit_string:
+            length = next_byte()
+            s = "'"
+            while length:
+                s += chr(next_byte())
+                length -= 1
+            asm += '"' + repr(s)[2:]
+        else:
+            asm += opcode[b]
+
+        while blocks and blocks[-1] == 0:
+            asm += ' }'
+            blocks.pop()
+
+        if all_bytes:
+            asm += ' '
+
+    if blocks:
+        asm += "ERROR"
+    return asm, tokens
+
+
+################################################################################
+# Interpreter.
+################################################################################
+
+def count_fmt_params(fmt: str) -> int:
+    """Count the number of parameters in a format string"""
+    from string import Formatter
+    f = Formatter()
+    n = 0
+    for _, name, _, _ in f.parse(fmt):
+        if name > n:
+            n = name
+    return n
+
+
+def interpret(bytecode: bytearray, control: list, data: list, tracing: bool = False):
+    """Interpret bytecode"""
+    frame = []
+    frame.append((0, len(bytecode)))
+
+    def trace():
+        """print a trace of the execution for debugging purposes"""
+
+        def fmt(d):
+            if isinstance(d, int): return str(d)
+            if isinstance(d, str): return d
+            return repr(type(d))
+
+        pc, end = frame[-1]
+        asm, tokens = disassemble(bytecode)
+        print('=== frame = {1}, data = {2}, opcode = {0}'
+                  .format(opcode[b], frame, [fmt(d) for d in data]))
+        print(asm)
+        print(' '*(tokens[pc]) + '^')
+
+    def next_byte():
+        """Fetch the next byte and update the PC"""
+        pc, end = frame[-1]
+        assert(pc < len(bytecode))
+        b = bytecode[pc]
+        frame[-1] = pc + 1, end
+        # At the end of a block?
+        while pc >= end:
+            frame.pop()
+            if not frame:
+                return None
+            pc, end = frame[-1]
+            if pc >= end:
+                return None
+            b = bytecode[pc]
+            frame[-1] = pc + 1, end
+        return b
+
+    while frame[-1][0] < len(bytecode):
+        b = next_byte()
+        if b == None:
+            break
+        if tracing:
+            trace()
+        # Data stack manipulation.
+        if   b == op_dup:  data.append(data[-1])
+        elif b == op_drop: data.pop()
+        elif b == op_pick: data.append(data[data.pop()])
+        elif b == op_over: data.append(data[-2])
+        elif b == op_swap:
+            x = data.pop()
+            y = data.pop()
+            data.append(x)
+            data.append(y)
+        elif b == op_rot:
+            z = data.pop()
+            y = data.pop()
+            x = data.pop()
+            data.append(z)
+            data.append(x)
+            data.append(y)
+
+        # Control stack manipulation.
+        elif b == op_begin:
+            length = next_byte()
+            pc, end = frame[-1]
+            control.append((pc, pc+length))
+            frame[-1] = pc + length, end
+        elif b == op_if:
+            if data.pop():
+                frame.append(control.pop())
+        elif b == op_ifelse:
+            if data.pop():
+                control.pop()
+                frame.append(control.pop())
+            else:
+                frame.append(control.pop())
+                control.pop()
+
+        # Literals.
+        elif b == op_lit_uint:
+            b = next_byte()  # FIXME uleb
+            data.append(int(b))
+        elif b == op_lit_int:
+            b = next_byte()  # FIXME uleb
+            data.append(int(b))
+        elif b == op_lit_selector:
+            b = next_byte()
+            data.append(b)
+        elif b == op_lit_string:
+            length = next_byte()
+            s = ""
+            while length:
+                s += chr(next_byte())
+                length -= 1
+            data.append(s)
+
+        # Arithmetic, logic, etc.
+        elif b == op_plus:  data.append(data.pop() + data.pop())
+        elif b == op_minus: data.append(- data.pop() + data.pop())
+        elif b == op_mul:   data.append(data.pop() * data.pop())
+        elif b == op_div:   y = data.pop(); data.append(data.pop() / y)
+        elif b == op_mod:   y = data.pop(); data.append(data.pop() % y)
+        elif b == op_shl:   y = data.pop(); data.append(data.pop() << y)
+        elif b == op_shr:   y = data.pop(); data.append(data.pop() >> y)
+        elif b == op_shra:  y = data.pop(); data.append(data.pop() >> y) # FIXME
+        elif b == op_and:   data.append(data.pop() & data.pop())
+        elif b == op_or:    data.append(data.pop() | data.pop())
+        elif b == op_xor:   data.append(data.pop() ^ data.pop())
+        elif b == op_not:   data.append(not data.pop())
+        elif b == op_eq:    data.append(data.pop() == data.pop())
+        elif b == op_neq:   data.append(data.pop() != data.pop())
+        elif b == op_lt:    data.append(data.pop() > data.pop())
+        elif b == op_gt:    data.append(data.pop() < data.pop())
+        elif b == op_le:    data.append(data.pop() >= data.pop())
+        elif b == op_ge:    data.append(data.pop() <= data.pop())
+
+        # Function calls.
+        elif b == op_call:
+            sel = data.pop()
+            if sel == sel_summary:
+                data.append(data.pop().GetSummary())
+            elif sel == sel_get_num_children:
+                data.append(data.pop().GetNumChildren())
+            elif sel == sel_get_child_at_index:
+                index = data.pop()
+                valobj = data.pop()
+                data.append(valobj.GetChildAtIndex(index))
+            elif sel == sel_get_child_with_name:
+                name = data.pop()
+                valobj = data.pop()
+                data.append(valobj.GetChildMemberWithName(name))
+            elif sel == sel_get_child_index:
+                name = data.pop()
+                valobj = data.pop()
+                data.append(valobj.GetIndexOfChildWithName(name))
+            elif sel == sel_get_type:
+                data.append(data.pop().GetType())
+            elif sel == sel_get_template_argument_type:
+                n = data.pop()
+                valobj = data.pop()
+                data.append(valobj.GetTemplateArgumentType(n))
+            elif sel == sel_get_value:
+                data.append(data.pop().GetValue())
+            elif sel == sel_get_value_as_unsigned:
+                data.append(data.pop().GetValueAsUnsigned())
+            elif sel == sel_get_value_as_signed:
+                data.append(data.pop().GetValueAsSigned())
+            elif sel == sel_get_value_as_address:
+                data.append(data.pop().GetValueAsAddress())
+            elif sel == sel_cast:
+                sbtype = data.pop()
+                valobj = data.pop()
+                data.append(valobj.Cast(sbtype))
+            elif sel == sel_strlen:
+                data.append(len(data.pop()))
+            elif sel == sel_fmt:
+                fmt = data.pop()
+                n = count_fmt_params(fmt)
+                args = []
+                for i in range(n):
+                    args.append(data.pop())
+                data.append(fmt.format(*args))
+            else:
+                print("not implemented: " + selector[sel])
+                assert(False)
+                pass
+    return data[-1]
+
+
+################################################################################
+# Tests.
+################################################################################
+
+import unittest
+
+class TestCompiler(unittest.TestCase):
+
+    def test(self):
+        self.assertEqual(compile("1u dup").hex(), '200101')
+        self.assertEqual(compile("\"1u dup\"").hex(), '2206317520647570')
+        self.assertEqual(compile("16 < { dup } if").hex(), '21105210010111')
+        self.assertEqual(compile("{ { \" } \" } }").hex(), '100710052203207d20')
+
+        def roundtrip(asm):
+            self.assertEqual(disassemble(compile(asm))[0], asm)
+
+        roundtrip("1u dup")
+        roundtrip("1u dup \"1u dup\"")
+        roundtrip("16 < { dup } if")
+        roundtrip("{ { \" } \" } }")
+
+        self.assertEqual(interpret(compile("1 1 +"), [], []), 2)
+        self.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4)
+        self.assertEqual(interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/lldb/examples/formatter-bytecode/formatter-bytecode.md b/lldb/examples/formatter-bytecode/formatter-bytecode.md
new file mode 100644
index 00000000000000..e7cce8e740e757
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/formatter-bytecode.md
@@ -0,0 +1,165 @@
+# A bytecode for (LLDB) data formatters
+
+## Background
+
+LLDB provides very rich customization options to display data types (see https://lldb.llvm.org/use/variable.html ). To use custom data formatters, developers typically need to edit the global `~/.lldbinit` file to make sure they are found and loaded. An example for this workflow is the `llvm/utils/lldbDataFormatters.py` script. Because of the manual configuration that is involved, this workflow doesn't scale very well. What would be nice is if developers or library authors could ship ship data formatters with their code and LLDB automatically finds them.
+
+In Swift we added the `DebugDescription` macro (see https://www.swift.org/blog/announcing-swift-6/#debugging ) that translates Swift string interpolation into LLDB summary strings, and puts them into a `.lldbsummaries` section, where LLDB can find them. This works well for simple summaries, but doesn't scale to synthetic child providers or summaries that need to perform some kind of conditional logic or computation. The logical next step would be to store full Python formatters instead of summary strings, but Python code is larger and more importantly it is potentially dangerous to just load an execute untrusted Python code in LLDB.
+
+This document describes a minimal bytecode tailored to running LLDB formatters. It defines a human-readable assembler representation for the language, an efficient binary encoding, a virtual machine for evaluating it, and format for embedding formatters into binary containers.
+
+### Goals
+
+Provide an efficient and secure encoding for data formatters that can be used as a compilation target from user-friendly representations (such as DIL, Swift DebugDescription, or NatVis).
+
+### Non-goals
+
+While humans could write the assembler syntax, making it user-friendly is not a goal.
+
+## Design of the virtual machine
+
+The LLDB formatter virtual machine uses a stack-based bytecode, comparable with DWARF expressions, but with higher-level data types and functions.
+
+The virtual machine has two stacks, a data and a control stack. The control stack is kept separate to make it easier to reason about the security aspects of the VM.
+
+### Data types
+These data types are "host" data types, in LLDB parlance.
+- _String_ (UTF-8)
+- _Int_ (64 bit)
+- _UInt_ (64 bit)
+- _Object_ (Basically an `SBValue`)
+- _Type_ (Basically an `SBType`)
+- _Selector_ (One of the predefine functions)
+
+_Object_ and _Type_ are opaque, they can only be used as a parameters of `call`.
+
+## Instruction set
+
+### Stack operations
+
+These manipulate the data stack directly.
+
+- `dup  (x -> x x)`
+- `drop (x y -> x)`
+- `pick (x ... UInt -> x ... x)`
+- `over (x y -> y)`
+- `swap (x y -> y x)`
+- `rot (x y z -> z x y)`
+
+### Control flow
+
+- `{` pushes a code block address onto the control stack
+- `}` (technically not an opcode) denotes the end of a code block
+- `if` pops a block from the control stack, if the top of the data stack is nonzero, executes it
+- `ifelse` pops two blocks from the control stack, if the top of the data stack is nonzero, executes the first, otherwise the second.
+
+### Literals for basic types
+
+- `123u ( -> UInt)` an unsigned 64-bit host integer.
+- `123 ( -> Int)` a signed 64-bit host integer.
+- `"abc" ( -> String)` a UTF-8 host string.
+- `@strlen ( -> Selector)` one of the predefined functions supported by the VM.
+
+### Arithmetic, logic, and comparison operations
+- `+ (x y -> [x+y])`
+- `-` etc ...
+- `*`
+- `/`
+- `%`
+- `<<`
+- `>>`
+- `shra` (arithmetic shift right)
+- `~`
+- `|`
+- `^`
+- `=`
+- `!=`
+- `<`
+- `>`
+- `=<`
+- `>=`
+
+### Function calls
+
+For security reasons the list of functions callable with `call` is predefined. The supported functions are either existing methods on `SBValue`, or string formatting operations.
+
+- `call (Object arg0 ... Selector -> retval)`
+
+Method is one of a predefined set of _Selectors_
+- `(Object @summary -> String)`
+- `(Object @type_summary -> String)`
+
+- `(Object @get_num_children -> UInt)`
+- `(Object UInt @get_child_at_index -> Object)`
+- `(Object String @get_child_index -> UInt)`
+- `(Object @get_type -> Type)`
+- `(Object UInt @get_template_argument_type -> Type)`
+- `(Object @get_value -> Object)`
+- `(Object @get_value_as_unsigned -> UInt)`
+- `(Object @get_value_as_signed -> Int)`
+- `(Object @get_value_as_address -> UInt)`
+- `(Object Type @cast -> Object)`
+
+- `(UInt @read_memory_byte -> UInt)`
+- `(UInt @read_memory_uint32 -> UInt)`
+- `(UInt @read_memory_int32 -> Int)`
+- `(UInt @read_memory_unsigned -> UInt)`
+- `(UInt @read_memory_signed -> Int)`
+- `(UInt @read_memory_address -> UInt)`
+- `(UInt Type @read_memory -> Object)`
+ 
+- `(String arg0 ... fmt -> String)`
+- `(String arg0 ... sprintf -> String)`
+- `(String strlen -> String)`
+
+## Byte Code
+
+Most instructions are just a single byte opcode. The only exceptions are the literals:
+
+- String...
[truncated]

github-actions · 2024-10-22T23:44:00Z

⚠️ Python code formatter, darker found issues in your code. ⚠️

You can test this locally with the following command:

darker --check --diff -r 2e0506f83bfde6db93454bdf28e4a71c160d4f5b...fc2b9044be824c1d9dd70b129ebb73b8c7f70bbd lldb/examples/formatter-bytecode/compiler.py lldb/examples/formatter-bytecode/test/formatter.py

View the diff from darker here.

--- compiler.py	2024-10-30 15:44:29.000000 +0000
+++ compiler.py	2024-10-30 15:48:00.568998 +0000
@@ -38,13 +38,13 @@
 define_opcode(0x20, None, "lit_uint")
 define_opcode(0x21, None, "lit_int")
 define_opcode(0x22, None, "lit_string")
 define_opcode(0x23, None, "lit_selector")
 
-define_opcode(0x2a, "as_int", "as_int")
-define_opcode(0x2b, "as_uint", "as_uint")
-define_opcode(0x2c, "is_null", "is_null")
+define_opcode(0x2A, "as_int", "as_int")
+define_opcode(0x2B, "as_uint", "as_uint")
+define_opcode(0x2C, "is_null", "is_null")
 
 define_opcode(0x30, "+", "plus")
 define_opcode(0x31, "-", "minus")
 define_opcode(0x32, "*", "mul")
 define_opcode(0x33, "/", "div")
@@ -358,12 +358,14 @@
             while length:
                 s += chr(next_byte())
                 length -= 1
             data.append(s)
 
-        elif b == op_as_uint: pass
-        elif b == op_as_int: pass
+        elif b == op_as_uint:
+            pass
+        elif b == op_as_int:
+            pass
         elif b == op_is_null:
             data.append(1 if data.pop() == None else 0)
 
         # Arithmetic, logic, etc.
         elif b == op_plus:
--- test/formatter.py	2024-10-30 15:44:29.000000 +0000
+++ test/formatter.py	2024-10-30 15:48:00.624739 +0000
@@ -17,24 +17,45 @@
         "type summary add -w llvm "
         f"-e -F {__name__}.MyOptionalSummaryProvider "
         '-x "^MyOptional<.+>$"'
     )
 
+
 def stringify(bytecode: bytearray) -> str:
     s = ""
     in_hex = False
     for b in bytecode:
-        if ((b < 32 or b > 127 or chr(b) in ['"','`',"'"]) or
-            (in_hex and chr(b).lower() in
-             ['a','b','c','d','e','f','0','1','2','3','4','5','6','7','8','9'])):
-            s+= r'\x' + hex(b)[2:]
+        if (b < 32 or b > 127 or chr(b) in ['"', "`", "'"]) or (
+            in_hex
+            and chr(b).lower()
+            in [
+                "a",
+                "b",
+                "c",
+                "d",
+                "e",
+                "f",
+                "0",
+                "1",
+                "2",
+                "3",
+                "4",
+                "5",
+                "6",
+                "7",
+                "8",
+                "9",
+            ]
+        ):
+            s += r"\x" + hex(b)[2:]
             in_hex = True
         else:
-            s+=chr(b)
+            s += chr(b)
             in_hex = False
     return s
-    
+
+
 def evaluate(assembler: str, data: list):
     bytecode = compile(assembler)
     trace = True
     if trace:
         print(
@@ -73,25 +94,27 @@
     #    if val.summary:
     #        return val.summary
     #    return val.GetValue()
     summary = ""
     summary += ' dup "Storage" @get_child_with_name call'  # valobj storage
-    summary += ' dup is_null ~ { swap } if drop'  # storage
+    summary += " dup is_null ~ { swap } if drop"  # storage
     summary += ' dup "hasVal" @get_child_with_name call'  # storage obj(hasVal)
     summary += ' dup is_null { drop "<could not read MyOptional>" } {'
-    summary += '   @get_value_as_unsigned call'  # storage int(hasVal)
+    summary += "   @get_value_as_unsigned call"  # storage int(hasVal)
     summary += '   0u = { "None" } {'
-    summary += '     dup @get_type call'
-    summary += '     0u @get_template_argument_type call'  # storage type
-    summary += '     swap'  # type storage
+    summary += "     dup @get_type call"
+    summary += "     0u @get_template_argument_type call"  # storage type
+    summary += "     swap"  # type storage
     summary += '     "value" @get_child_with_name call'  # type value
-    summary += '     swap @cast call'  # type(value)
+    summary += "     swap @cast call"  # type(value)
     summary += '     dup is_null { "None" } {'
-    summary += '       dup @summary call dup @strlen call { @get_value call } { drop } ifelse'
-    summary += '     } ifelse'
-    summary += '   } ifelse'
-    summary += ' } ifelse'
+    summary += (
+        "       dup @summary call dup @strlen call { @get_value call } { drop } ifelse"
+    )
+    summary += "     } ifelse"
+    summary += "   } ifelse"
+    summary += " } ifelse"
     return evaluate(summary, [valobj])
 
 
 class MyOptionalSynthProvider:
     """Provides deref support to llvm::Optional<T>"""

github-actions · 2024-10-22T23:44:00Z

✅ With the latest revision this PR passed the C/C++ code formatter.

walter-erquinigo · 2024-10-23T04:04:35Z

Man, this is fantastic. I'll try to implement a little compiler for this for my language (Mojo).

lldb/examples/formatter-bytecode/compiler.py

lldb/examples/formatter-bytecode/formatter-bytecode.md

DavidSpickett · 2024-10-24T16:57:20Z

lldb/examples/formatter-bytecode/formatter-bytecode.md

+
+If not specified, the init function defaults to an empty function that just passes the Object along. Its results may be cached and allow common prep work to be done for an Object that can be reused by subsequent calls to the other methods. This way subsequent calls to `@get_child_at_index` can avoid recomputing shared information, for example.
+
+While it is more efficient to store multiple programs per type key, this is not a requirement. LLDB will merge all entries. If there are conflicts the result is undefined.


What is the use case for having multiple programs?

typename -> [formatter 1, formatter 2]

Perhaps?

Are they executed serially to produce one formatted output, or does each one produce one output?

So you could say ok this container can be viewed in this way or this way. Perhaps one for debugging details for the library developer and one for a user who just wants to see their own data.

Yes, the synthetic child providers have to define a set of functions, and it would be less efficient to repeat the typename for all of them.

Are they executed serially to produce one formatted output, or does each one produce one output?

No, they serve different purposes based on their signature. (Summary, init, etc...)

Right now I see the list of those in the doc.

So if you did want to have 2 different views you'd write a single summary provider that returned both, or arrange for different providers to be built into release and debug builds.

(or more likely just have the debug ones be python scripts given that developers of the library are already knee deep in complexity)

Along the same lines, is it possible to take the providers loaded from the section and assign them to new types? For example, I might subclass a container from a library I use. Would the library have to write their regex carefully to allow that or can I get a handle to the existing provider and attach that to my own type?

lldb/examples/formatter-bytecode/formatter-bytecode.md

DavidSpickett · 2024-10-24T17:02:04Z

Compiling languages into this is intriguing.

MLIR noob thinking out loud: if MLIR could lower into this could you write your formatter in Fortran? 🤣

DavidSpickett · 2024-10-24T17:04:25Z

This is very cool overall. At first the idea of a whole new VM seems like way too much, but I think I'm getting a better sense of the tradeoffs having read through this.

lldb/examples/formatter-bytecode/formatter-bytecode.md

adrian-prantl · 2024-10-25T21:11:56Z

Compiling languages into this is intriguing.

MLIR noob thinking out loud: if MLIR could lower into this could you write your formatter in Fortran? 🤣

It is well known that Fortran is superbly suited to process text: https://en.wikipedia.org/wiki/Colossal_Cave_Adventure ;-)

kastiglione · 2024-10-25T21:18:49Z

Compiling languages into this is intriguing.

@DavidSpickett A minimal example of Python compiling into the assembly for this bytecode: #113734

adrian-prantl requested a review from JDevlieghere as a code owner October 22, 2024 23:40

llvmbot added the lldb label Oct 22, 2024

adrian-prantl force-pushed the lldb-formatter-bytecode branch 2 times, most recently from 3267f7a to c4360a6 Compare October 23, 2024 00:02

DavidSpickett reviewed Oct 24, 2024

View reviewed changes

kastiglione reviewed Oct 25, 2024

View reviewed changes

lldb/examples/formatter-bytecode/formatter-bytecode.md Outdated Show resolved Hide resolved

adrian-prantl force-pushed the lldb-formatter-bytecode branch from c4360a6 to dcab1a5 Compare October 25, 2024 23:07

Add a compiler/interpreter of LLDB data formatter bytecode to examples

0b88de8

adrian-prantl force-pushed the lldb-formatter-bytecode branch from dcab1a5 to 0b88de8 Compare October 25, 2024 23:09

adrian-prantl mentioned this pull request Oct 25, 2024

[lldb] Load embedded type summary section (#7859) (#8040) #113743

Open

adrian-prantl force-pushed the lldb-formatter-bytecode branch 4 times, most recently from cf1b608 to ec08ac1 Compare October 29, 2024 23:54

minor corrections from C++ implementations

fc2b904

adrian-prantl force-pushed the lldb-formatter-bytecode branch from ec08ac1 to fc2b904 Compare October 30, 2024 15:44

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[lldb] Add a compiler/interpreter of LLDB data formatter bytecode to lldb/examples #113398

[lldb] Add a compiler/interpreter of LLDB data formatter bytecode to lldb/examples #113398

adrian-prantl commented Oct 22, 2024 •

edited

Loading

llvmbot commented Oct 22, 2024

github-actions bot commented Oct 22, 2024 •

edited

Loading

github-actions bot commented Oct 22, 2024 •

edited

Loading

walter-erquinigo commented Oct 23, 2024

DavidSpickett Oct 24, 2024

adrian-prantl Oct 25, 2024

adrian-prantl Oct 25, 2024

DavidSpickett Oct 28, 2024

DavidSpickett commented Oct 24, 2024

DavidSpickett commented Oct 24, 2024

adrian-prantl commented Oct 25, 2024

kastiglione commented Oct 25, 2024 •

edited

Loading


		If not specified, the init function defaults to an empty function that just passes the Object along. Its results may be cached and allow common prep work to be done for an Object that can be reused by subsequent calls to the other methods. This way subsequent calls to `@get_child_at_index` can avoid recomputing shared information, for example.

		While it is more efficient to store multiple programs per type key, this is not a requirement. LLDB will merge all entries. If there are conflicts the result is undefined.

[lldb] Add a compiler/interpreter of LLDB data formatter bytecode to lldb/examples #113398

Are you sure you want to change the base?

[lldb] Add a compiler/interpreter of LLDB data formatter bytecode to lldb/examples #113398

Conversation

adrian-prantl commented Oct 22, 2024 • edited Loading

llvmbot commented Oct 22, 2024

github-actions bot commented Oct 22, 2024 • edited Loading

github-actions bot commented Oct 22, 2024 • edited Loading

walter-erquinigo commented Oct 23, 2024

DavidSpickett Oct 24, 2024

Choose a reason for hiding this comment

adrian-prantl Oct 25, 2024

Choose a reason for hiding this comment

adrian-prantl Oct 25, 2024

Choose a reason for hiding this comment

DavidSpickett Oct 28, 2024

Choose a reason for hiding this comment

DavidSpickett commented Oct 24, 2024

DavidSpickett commented Oct 24, 2024

adrian-prantl commented Oct 25, 2024

kastiglione commented Oct 25, 2024 • edited Loading

adrian-prantl commented Oct 22, 2024 •

edited

Loading

github-actions bot commented Oct 22, 2024 •

edited

Loading

github-actions bot commented Oct 22, 2024 •

edited

Loading

kastiglione commented Oct 25, 2024 •

edited

Loading