Skip to content

Commit

Permalink
helpers: linux: add module kallsyms helpers
Browse files Browse the repository at this point in the history
Add Python helpers which load module kallsyms and return a symbol index
for them. Unlike the /proc/kallsyms and built-in kallsyms, these are
quite easy to handle using regular Python & drgn code, so implement them
as Python helpers.

There are (at least) two use cases for these helpers:
1. After loading CTF and built-in vmlinux kallsyms, support for module
   kallsyms is still necessary.
2. Sometimes, people only extract vmlinux DWARF debuginfo. Adding module
   symbols can allow stack traces and other symbolization to work even
   without module debuginfo.

Signed-off-by: Stephen Brennan <[email protected]>
  • Loading branch information
brenns10 committed Mar 18, 2024
1 parent e8c5245 commit 500e52b
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 4 deletions.
115 changes: 113 additions & 2 deletions drgn/helpers/linux/kallsyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,29 @@
"""
import os
import re
from typing import Dict
from typing import Dict, List

from _drgn import (
_linux_helper_load_builtin_kallsyms,
_linux_helper_load_proc_kallsyms as load_proc_kallsyms,
)
from drgn import Program, ProgramFlags, SymbolIndex
from drgn import (
Object,
Program,
ProgramFlags,
Symbol,
SymbolBinding,
SymbolIndex,
SymbolKind,
)
from drgn.helpers.linux.module import for_each_module

__all__ = (
"load_builtin_kallsyms",
"load_proc_kallsyms",
"load_vmlinux_kallsyms",
"load_module_kallsyms",
"module_kallsyms",
)


Expand Down Expand Up @@ -89,3 +100,103 @@ def load_vmlinux_kallsyms(prog: Program) -> SymbolIndex:
return load_proc_kallsyms()
else:
return load_builtin_kallsyms(prog)


def _elf_sym_to_symbol(name: str, obj: Object) -> Symbol:
# Similar to drgn_symbol_from_elf() in libdrgn/symbol.c
info = obj.st_info.value_()
binding = info >> 4
STB_WEAK = 2
STB_GNU_UNIQUE = 10
if binding <= STB_WEAK or binding == STB_GNU_UNIQUE:
binding = SymbolBinding(binding + 1)
else:
binding = SymbolBinding.UNKNOWN
type_ = info & 0xF
STT_TLS = 6
STT_GNU_IFUNC = 10
if type_ <= STT_TLS or type_ == STT_GNU_IFUNC:
kind = SymbolKind(type_)
else:
kind = SymbolKind.UNKNOWN
return Symbol( # type: ignore
name,
obj.st_value.value_(),
obj.st_size.value_(),
binding,
kind,
)


def module_kallsyms(module: Object) -> List[Symbol]:
"""
Return a list of symbols for a kernel module
When compiled with ``CONFIG_KALLSYMS``, the kernel maintains ELF symbol
information about each module within ``struct module``. This function
accesses this symbol information, and returns a list of drgn :class:`Symbol`
objects for the module. Keep in mind that unless ``CONFIG_KALLSYMS_ALL`` is
enabled, these symbols are typically only function symbols.
:param module: :class:`Object` of type ``struct module *``
:returns: a list of symbols
"""
try:
ks = module.kallsyms
except AttributeError:
# Prior to 8244062ef1e54 ("modules: fix longstanding /proc/kallsyms vs
# module insertion race."), the kallsyms variables were stored directly
# on the module object. This commit was introduced in 4.5, but was
# backported to some stable kernels too. Fall back to the module object
# in cases where kallsyms field isn't available.
ks = module

prog = module.prog_
num_symtab = ks.num_symtab.value_()

# The symtab field is a pointer, but it points at an array of Elf_Sym
# objects. Indexing it requires drgn to do pointer arithmetic and issue a
# lot of very small /proc/kcore reads, which can be a real performance
# issue. So convert it into an object representing a correctly-sized array,
# and then read that object all at once. This does one /proc/kcore read,
# which is a major improvement!
symtab = Object(
prog,
type=prog.array_type(ks.symtab.type_.type, num_symtab),
address=ks.symtab.value_(),
).read_()

# The strtab is similarly a pointer into a contigous array of strings packed
# next to each other. Reading individual strings from /proc/kcore can be
# quite slow. So read the entire array of bytes into a Python bytes value,
# and we'll extract the individual symbol strings from there.
last_string_start = symtab[num_symtab - 1].st_name.value_()
last_string_len = len(ks.strtab[last_string_start].address_of_().string_()) + 1
strtab = prog.read(ks.strtab.value_(), last_string_start + last_string_len)
syms = []
for i in range(ks.num_symtab.value_()):
elfsym = symtab[i]
if not elfsym.st_name:
continue
str_index = elfsym.st_name.value_()
nul_byte = strtab.find(b"\x00", str_index)
name = strtab[str_index:nul_byte].decode("ascii")
syms.append(_elf_sym_to_symbol(name, elfsym))
return syms


def load_module_kallsyms(prog: Program) -> SymbolIndex:
"""
Return a symbol index containing all module symbols from kallsyms
For kernels built with ``CONFIG_KALLSYMS``, loaded kernel modules contain
an ELF symbol table in kernel memory. This function can parse those data
structures and create a symbol index usable by drgn. However, it requires
that you already have debuginfo for the vmlinux image.
:returns: a symbol index containing all symbols from module kallsyms
"""
all_symbols = []
for module in for_each_module(prog):
all_symbols.extend(module_kallsyms(module))
return SymbolIndex(all_symbols)
25 changes: 25 additions & 0 deletions drgn/helpers/linux/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024 Oracle and/or its affiliates
# SPDX-License-Identifier: LGPL-2.1-or-later
"""
Modules
-------
The ``drgn.helpers.linux.module`` module contains helpers for working with
loaded kernel modules.
"""
from typing import Iterable

from drgn import Object, Program
from drgn.helpers.linux.list import list_for_each_entry

__all__ = ("for_each_module",)


def for_each_module(prog: Program) -> Iterable[Object]:
"""
Returns all loaded kernel modules
:param prog: Program being debugged
:returns: Iterable of ``struct module *`` objects
"""
return list_for_each_entry("struct module", prog["modules"].address_of_(), "list")
20 changes: 18 additions & 2 deletions tests/linux_kernel/helpers/test_kallsyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
from unittest import TestCase

from drgn import Symbol, SymbolBinding, SymbolKind
from drgn.helpers.linux.kallsyms import load_builtin_kallsyms, load_proc_kallsyms
from tests.linux_kernel import LinuxKernelTestCase
from drgn.helpers.linux.kallsyms import (
load_builtin_kallsyms,
load_module_kallsyms,
load_proc_kallsyms,
)
from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod


def compare_local_symbols(self, finder, modules=False):
Expand Down Expand Up @@ -87,3 +91,15 @@ def test_builtin_kallsyms(self):
self.skipTest("VMCOREINFO is missing necessary symbols")
finder = load_builtin_kallsyms(self.prog)
compare_local_symbols(self, finder)

@skip_unless_have_test_kmod
def test_module_kallsyms(self):
finder = load_module_kallsyms(self.prog)
test_data = finder("drgn_test_empty_list", None, True)[0]
self.assertEqual("drgn_test_empty_list", test_data.name)
self.assertEqual(SymbolKind.OBJECT, test_data.kind)
self.assertEqual(SymbolBinding.GLOBAL, test_data.binding)
size = self.prog.type("struct list_head").size
self.assertEqual(size, test_data.size)
address = self.prog.object("drgn_test_empty_list").address_
self.assertEqual(address, test_data.address)

0 comments on commit 500e52b

Please sign in to comment.