Skip to content

Commit

Permalink
libdrgn, python: add SymbolIndex
Browse files Browse the repository at this point in the history
The Symbol Finder API gives us the ability to register a dynamic
callback for symbol lookup. However, many common use cases are satisfied
by a simple static list of symbols. Correct and efficient lookup in this
simple case is rather tricky. Implement a new type, SymbolIndex, which
can take a list of symbols and index them for efficient lookup by name
or address.

Signed-off-by: Stephen Brennan <[email protected]>
  • Loading branch information
brenns10 committed Oct 10, 2024
1 parent f64287a commit fda9b25
Show file tree
Hide file tree
Showing 11 changed files with 697 additions and 1 deletion.
63 changes: 63 additions & 0 deletions _drgn.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1898,6 +1898,69 @@ class Symbol:
kind: Final[SymbolKind]
"""Kind of entity represented by this symbol."""

class SymbolIndex:
"""
A ``SymbolIndex`` contains a static set of symbols and allows efficient
lookup by name and address.
With :meth:`Program.register_symbol_finder()`, you can add a callback to
provide custom symbol finding logic. However, in many cases, all that is
necessary is to provide drgn with a list of symbols that you know to be part
of the program. This object allows you to do that. It efficiently implements
the Symbol Finder API given a static set of symbols. For example::
>>> prog = drgn.Program()
>>> symbol = drgn.Symbol("foo", 0x123, 1, drgn.SymbolBinding.GLOBAL, drgn.SymbolKind.OBJECT)
>>> finder = drgn.SymbolIndex([symbol])
>>> prog.register_symbol_finder("SymbolIndex", finder, enable_index=0)
>>> prog.symbols()
[Symbol(name='foo', address=0x123, size=0x1, binding=<SymbolBinding.GLOBAL: 2>, kind=<SymbolKind.OBJECT: 1>)]
>>> prog.symbol("bar")
Traceback (most recent call last):
File "<console>", line 1, in <module>
LookupError: not found
>>> prog.symbol("foo")
Symbol(name='foo', address=0x123, size=0x1, binding=<SymbolBinding.GLOBAL: 2>, kind=<SymbolKind.OBJECT: 1>)
>>> prog.symbol(0x100)
Traceback (most recent call last):
File "<console>", line 1, in <module>
LookupError: not found
>>> prog.symbol(0x123)
Symbol(name='foo', address=0x123, size=0x1, binding=<SymbolBinding.GLOBAL: 2>, kind=<SymbolKind.OBJECT: 1>)
"""

def __init__(self, symbols: Iterable[Symbol]) -> None:
"""
Create a ``SymbolIndex`` from a sequence of symbols
The returned symbol index satisfies the Symbol Finder API. It supports
overlapping symbol address ranges and duplicate symbol names. However,
in the case of these sorts of conflicts, it doesn't provide any
guarantee on the order of the results, or which result is returned when
a single symbol is requested.
:param symbols: An iterable of symbols
:returns: A callable object suitable to provide to
:meth:`Program.register_symbol_finder()`.
"""

def __call__(
self,
prog: Program,
name: Optional[str],
address: Optional[int],
one: bool,
) -> List[Symbol]:
"""
Lookup symbol by name, address, or both.
:param prog: (unused) the program looking up this symbol
:param name: if given, only return symbols with this name
:param address: if given, only return symbols spanning this address
:param one: if given, limit the result to a single symbol
:returns: a list of matching symbols (empty if none are found)
"""

class SymbolBinding(enum.Enum):
"""
A ``SymbolBinding`` describes the linkage behavior and visibility of a
Expand Down
1 change: 1 addition & 0 deletions docs/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ Symbols
.. drgndoc:: Symbol
.. drgndoc:: SymbolBinding
.. drgndoc:: SymbolKind
.. drgndoc:: SymbolIndex

Stack Traces
------------
Expand Down
2 changes: 2 additions & 0 deletions drgn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
StackTrace,
Symbol,
SymbolBinding,
SymbolIndex,
SymbolKind,
Thread,
Type,
Expand Down Expand Up @@ -127,6 +128,7 @@
"StackTrace",
"Symbol",
"SymbolBinding",
"SymbolIndex",
"SymbolKind",
"Thread",
"Type",
Expand Down
1 change: 1 addition & 0 deletions libdrgn/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ _drgn_la_SOURCES = python/constants.c \
python/program.c \
python/stack_trace.c \
python/symbol.c \
python/symbol_index.c \
python/test.c \
python/thread.c \
python/type.c \
Expand Down
7 changes: 7 additions & 0 deletions libdrgn/python/drgnpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "../hash_table.h"
#include "../pp.h"
#include "../program.h"
#include "../symbol.h"

/* These were added in Python 3.7. */
#ifndef Py_UNREACHABLE
Expand Down Expand Up @@ -108,6 +109,11 @@ typedef struct {
PyObject *attr_cache;
} DrgnType;

typedef struct {
PyObject_HEAD
struct drgn_symbol_index index;
} SymbolIndex;

typedef struct {
PyObject_HEAD
/*
Expand Down Expand Up @@ -242,6 +248,7 @@ extern PyTypeObject Register_type;
extern PyTypeObject StackFrame_type;
extern PyTypeObject StackTrace_type;
extern PyTypeObject Symbol_type;
extern PyTypeObject SymbolIndex_type;
extern PyTypeObject Thread_type;
extern PyTypeObject ThreadIterator_type;
extern PyTypeObject TypeEnumerator_type;
Expand Down
1 change: 1 addition & 0 deletions libdrgn/python/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void)
add_type(m, &StackFrame_type) ||
add_type(m, &StackTrace_type) ||
add_type(m, &Symbol_type) ||
add_type(m, &SymbolIndex_type) ||
add_type(m, &DrgnType_type) ||
add_type(m, &Thread_type) ||
add_type(m, &ThreadIterator_type) ||
Expand Down
10 changes: 10 additions & 0 deletions libdrgn/python/program.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,16 @@ py_symbol_find_fn(const char *name, uint64_t addr,
enum drgn_find_symbol_flags flags, void *arg,
struct drgn_symbol_result_builder *builder)
{
// Fast path for SymbolIndex: don't bother converting to and from Python
// types, as this is a C finder. Use Py_TYPE and pointer comparison
// directly here to avoid needing to take the GIL for
// PyObject_TypeCheck(). SymbolIndex cannot be subclassed, so the logic
// for subclass checking is unnecessary anyway.
if (Py_TYPE(PyTuple_GET_ITEM(arg, 1)) == &SymbolIndex_type) {
SymbolIndex *ix = (SymbolIndex *)PyTuple_GET_ITEM(arg, 1);
return drgn_symbol_index_find(name, addr, flags, &ix->index, builder);
}

PyGILState_guard();

_cleanup_pydecref_ PyObject *name_obj = NULL;
Expand Down
122 changes: 122 additions & 0 deletions libdrgn/python/symbol_index.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Copyright (c) 2024 Oracle and/or its affiliates
// SPDX-License-Identifier: LGPL-2.1-or-later

#include "drgnpy.h"
#include "../symbol.h"

static void SymbolIndex_dealloc(SymbolIndex *self)
{
drgn_symbol_index_deinit(&self->index);
Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *SymbolIndex_call(SymbolIndex *self, PyObject *args, PyObject *kwargs)
{
PyObject *prog_obj;
struct index_arg address = { .allow_none = true };
const char *name;
static char *kwnames[] = {"prog", "name", "address", "one", NULL};
int single; // 'p' format specifier expects an int, not bool

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OzO&p:__call__", kwnames,
&prog_obj, &name, index_converter, &address,
&single))
return NULL;

unsigned int flags = 0;
if (single)
flags |= DRGN_FIND_SYMBOL_ONE;
if (!address.is_none)
flags |= DRGN_FIND_SYMBOL_ADDR;
if (name)
flags |= DRGN_FIND_SYMBOL_NAME;

struct drgn_symbol_result_builder builder;
drgn_symbol_result_builder_init(&builder, flags & DRGN_FIND_SYMBOL_ONE);

struct drgn_error *err =
drgn_symbol_index_find(name, address.uvalue, flags, &self->index, &builder);
if (err)
goto error;

/* We return a list regardless */
if (single) {
struct drgn_symbol *symbol = drgn_symbol_result_builder_single(&builder);
_cleanup_pydecref_ PyObject *list = PyList_New(symbol ? 1 : 0);
if (!list)
goto error;
if (symbol) {
PyObject *pysym = Symbol_wrap(symbol, (PyObject *)self);
if (!pysym)
goto error;
PyList_SET_ITEM(list, 0, pysym);
}
return_ptr(list);
} else {
struct drgn_symbol **syms;
size_t count;
drgn_symbol_result_builder_array(&builder, &syms, &count);
return Symbol_list_wrap(syms, count, (PyObject *)self);
}

return NULL;
error:
drgn_symbol_result_builder_abort(&builder);
return err ? set_drgn_error(err) : NULL;
}

static PyObject *SymbolIndex_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
{
static char *kwnames[] = {"symbols", NULL};
PyObject *list_obj;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwnames, &list_obj))
return NULL;

_cleanup_pydecref_ PyObject *iter =
PyObject_GetIter(list_obj);
if (!iter)
return NULL;

_cleanup_(drgn_symbol_index_builder_deinit)
struct drgn_symbol_index_builder builder;
drgn_symbol_index_builder_init(&builder);

for (;;) {
_cleanup_pydecref_ PyObject *item = PyIter_Next(iter);
if (!item)
break;
if (!PyObject_TypeCheck(item, &Symbol_type))
return PyErr_Format(PyExc_TypeError, "expected sequence of Symbols");
Symbol *sym = (Symbol *)item;
if (!drgn_symbol_index_builder_add(&builder, sym->sym))
return PyErr_NoMemory();
}

if (PyErr_Occurred())
return NULL;

_cleanup_pydecref_ SymbolIndex *index_obj = call_tp_alloc(SymbolIndex);
if (!index_obj)
return NULL;

struct drgn_error *err =
drgn_symbol_index_init_from_builder(&index_obj->index,
&builder);
// On error, the builder and index are already deinitialized
if (err)
return set_drgn_error(err);

return (PyObject *)no_cleanup_ptr(index_obj);
}

PyTypeObject SymbolIndex_type = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "_drgn.SymbolIndex",
.tp_basicsize = sizeof(SymbolIndex),
.tp_dealloc = (destructor)SymbolIndex_dealloc,
.tp_flags = Py_TPFLAGS_DEFAULT,
.tp_doc = drgn_SymbolIndex_DOC,
.tp_call = (ternaryfunc)SymbolIndex_call,
.tp_new = SymbolIndex_new,
};
Loading

0 comments on commit fda9b25

Please sign in to comment.