Skip to content

Commit

Permalink
helpers: linux: add module kallsyms helpers
Browse files Browse the repository at this point in the history
Add Python helpers which load module kallsyms and return a symbol index
for them. Unlike the /proc/kallsyms and built-in kallsyms, these are
quite easy to handle using regular Python & drgn code, so implement them
as Python helpers.

There are (at least) two use cases for these helpers:
1. After loading CTF and built-in vmlinux kallsyms, support for module
   kallsyms is still necessary.
2. Sometimes, people only extract vmlinux DWARF debuginfo. Adding module
   symbols can allow stack traces and other symbolization to work even
   without module debuginfo.

Signed-off-by: Stephen Brennan <[email protected]>
  • Loading branch information
brenns10 committed Sep 30, 2024
1 parent 1b79138 commit 61c95ab
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 4 deletions.
167 changes: 165 additions & 2 deletions drgn/helpers/linux/kallsyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,27 @@
"""
import os
import re
from typing import Dict
from typing import Dict, List, Tuple

from _drgn import (
_linux_helper_load_builtin_kallsyms,
_linux_helper_load_proc_kallsyms as _load_proc_kallsyms,
)
from drgn import Program, ProgramFlags, SymbolIndex
from drgn import (
Object,
Program,
ProgramFlags,
Symbol,
SymbolBinding,
SymbolIndex,
SymbolKind,
)
from drgn.helpers.linux.module import for_each_module

__all__ = (
"load_vmlinux_kallsyms",
"load_module_kallsyms",
"module_kallsyms",
)


Expand Down Expand Up @@ -73,3 +84,155 @@ def load_vmlinux_kallsyms(prog: Program) -> SymbolIndex:
return _load_proc_kallsyms()
else:
return _load_builtin_kallsyms(prog)


def _nm_type_to_binding_kind(code: str) -> Tuple[SymbolBinding, SymbolKind]:
binding = SymbolBinding.UNKNOWN
kind = SymbolKind.UNKNOWN
if code == "v":
binding = SymbolBinding.WEAK
kind = SymbolKind.OBJECT
elif code == "w":
binding = SymbolBinding.WEAK
elif code in "tT":
kind = SymbolKind.FUNC
elif code.lower() in "srbgncd":
kind = SymbolKind.OBJECT
if binding == SymbolBinding.UNKNOWN and code.isupper():
binding = SymbolBinding.GLOBAL
return binding, kind


def _st_info_to_binding_kind(info: int) -> Tuple[SymbolBinding, SymbolKind]:
binding_int = info >> 4
STB_WEAK = 2
STB_GNU_UNIQUE = 10
if binding_int <= STB_WEAK or binding_int == STB_GNU_UNIQUE:
binding = SymbolBinding(binding_int + 1)
else:
binding = SymbolBinding.UNKNOWN
type_ = info & 0xF
STT_TLS = 6
STT_GNU_IFUNC = 10
if type_ <= STT_TLS or type_ == STT_GNU_IFUNC:
kind = SymbolKind(type_)
else:
kind = SymbolKind.UNKNOWN
return binding, kind


def _elf_sym_to_symbol(name: str, obj: Object, has_typetab: bool) -> Symbol:
# Linux likes to have the nm(1) character code for its symbols, which it
# refers to as the symbol's "type" (this is of course distinct from the ELF
# notion of a symbol type, let alone what drgn considers a "type"...).
#
# Prior to 5439c985c5a8 ("module: Overwrite st_size instead of st_info"),
# merged in v5.0, the kernel simply overwrote the "st_info" field with a
# single-character code that represents the nm(1) character code for that
# symbol. However, starting with that commit, it was switched to overwrite
# the "st_size" field instead! This was thankfully fixed in v5.2 with
# 1c7651f43777 ("kallsyms: store type information in its own array").
#
# Unfortunately, this leaves us with three possibilities:
# 1. Pre-v5.0: interpret the "st_info" as a character from nm(1) and try to
# infer the kind and bindings.
# 2. 5.0-5.2: interpret the "st_info" as normal, but ignore the "st_size"
# field since it is bogus.
# 3. 5.2+: both fields are valid, and the nm(1) code is stored in "typetab".
#
# Case 3 can be determined easily by the presence of "typetab" in "struct
# mod_kallsyms". However, cases 1 & 2 are indistinguishable. For our
# purposes, it makes more sense to fall back to case 1. After all, neither
# 5.0 or 5.1 were LTS kernels, nor are they actively used by any major
# distro. We have no way to deal with 5.0 or 5.1, whereas we can make some
# informed guesses for pre-5.0 based on the nm(1) code.
if has_typetab:
binding, kind = _st_info_to_binding_kind(obj.st_info.value_())
else:
binding, kind = _nm_type_to_binding_kind(chr(obj.st_info.value_()))
return Symbol( # type: ignore
name,
obj.st_value.value_(),
obj.st_size.value_(),
binding,
kind,
)


def module_kallsyms(module: Object) -> List[Symbol]:
"""
Return a list of symbols for a kernel module
When compiled with ``CONFIG_KALLSYMS``, the kernel maintains ELF symbol
information about each module within ``struct module``. This function
accesses this symbol information, and returns a list of drgn :class:`Symbol`
objects for the module. Keep in mind that unless ``CONFIG_KALLSYMS_ALL`` is
enabled, these symbols are typically only function symbols.
:param module: :class:`Object` of type ``struct module *``
:returns: a list of symbols
"""
try:
ks = module.kallsyms
except AttributeError:
# Prior to 8244062ef1e54 ("modules: fix longstanding /proc/kallsyms vs
# module insertion race."), the kallsyms variables were stored directly
# on the module object. This commit was introduced in 4.5, but was
# backported to some stable kernels too. Fall back to the module object
# in cases where kallsyms field isn't available.
ks = module

prog = module.prog_
num_symtab = ks.num_symtab.value_()
try:
ks.member_("typetab")
has_typetab = True
except LookupError:
has_typetab = False

# The symtab field is a pointer, but it points at an array of Elf_Sym
# objects. Indexing it requires drgn to do pointer arithmetic and issue a
# lot of very small /proc/kcore reads, which can be a real performance
# issue. So convert it into an object representing a correctly-sized array,
# and then read that object all at once. This does one /proc/kcore read,
# which is a major improvement!
symtab = Object(
prog,
type=prog.array_type(ks.symtab.type_.type, num_symtab),
address=ks.symtab.value_(),
).read_()

# The strtab is similarly a pointer into a contigous array of strings packed
# next to each other. Reading individual strings from /proc/kcore can be
# quite slow. So read the entire array of bytes into a Python bytes value,
# and we'll extract the individual symbol strings from there.
last_string_start = symtab[num_symtab - 1].st_name.value_()
last_string_len = len(ks.strtab[last_string_start].address_of_().string_()) + 1
strtab = prog.read(ks.strtab.value_(), last_string_start + last_string_len)
syms = []
for i in range(ks.num_symtab.value_()):
elfsym = symtab[i]
if not elfsym.st_name:
continue
str_index = elfsym.st_name.value_()
nul_byte = strtab.find(b"\x00", str_index)
name = strtab[str_index:nul_byte].decode("ascii")
syms.append(_elf_sym_to_symbol(name, elfsym, has_typetab))
return syms


def load_module_kallsyms(prog: Program) -> SymbolIndex:
"""
Return a symbol index containing all module symbols from kallsyms
For kernels built with ``CONFIG_KALLSYMS``, loaded kernel modules contain
an ELF symbol table in kernel memory. This function can parse those data
structures and create a symbol index usable by drgn. However, it requires
that you already have debuginfo for the vmlinux image.
:returns: a symbol index containing all symbols from module kallsyms
"""
all_symbols = []
for module in for_each_module(prog):
all_symbols.extend(module_kallsyms(module))
return SymbolIndex(all_symbols)
20 changes: 18 additions & 2 deletions tests/linux_kernel/helpers/test_kallsyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
from unittest import TestCase

from drgn import Symbol, SymbolBinding, SymbolKind
from drgn.helpers.linux.kallsyms import _load_builtin_kallsyms, _load_proc_kallsyms
from tests.linux_kernel import LinuxKernelTestCase
from drgn.helpers.linux.kallsyms import (
_load_builtin_kallsyms,
_load_proc_kallsyms,
load_module_kallsyms,
)
from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod


def compare_local_symbols(self, finder, modules=False):
Expand Down Expand Up @@ -93,3 +97,15 @@ def test_builtin_kallsyms(self):
self.skipTest("VMCOREINFO is missing necessary symbols")
finder = _load_builtin_kallsyms(self.prog)
compare_local_symbols(self, finder)

@skip_unless_have_test_kmod
def test_module_kallsyms(self):
finder = load_module_kallsyms(self.prog)
test_data = finder(None, "drgn_test_empty_list", None, True)[0]
self.assertEqual("drgn_test_empty_list", test_data.name)
self.assertEqual(SymbolKind.OBJECT, test_data.kind)
self.assertIn(test_data.binding, (SymbolBinding.GLOBAL, SymbolBinding.UNKNOWN))
size = self.prog.type("struct list_head").size
self.assertEqual(size, test_data.size)
address = self.prog.object("drgn_test_empty_list").address_
self.assertEqual(address, test_data.address)

0 comments on commit 61c95ab

Please sign in to comment.