diff --git a/drgn/helpers/linux/kallsyms.py b/drgn/helpers/linux/kallsyms.py index bd2c0887e..448cbe239 100644 --- a/drgn/helpers/linux/kallsyms.py +++ b/drgn/helpers/linux/kallsyms.py @@ -13,16 +13,26 @@ """ import os import re -from typing import Dict +from typing import Dict, List, Tuple from _drgn import ( _linux_helper_load_builtin_kallsyms, _linux_helper_load_proc_kallsyms as _load_proc_kallsyms, ) -from drgn import Program, ProgramFlags, SymbolIndex +from drgn import ( + Object, + Program, + ProgramFlags, + Symbol, + SymbolBinding, + SymbolIndex, + SymbolKind, +) +from drgn.helpers.linux.module import for_each_module __all__ = ( "load_vmlinux_kallsyms", + "load_module_kallsyms", ) @@ -73,3 +83,155 @@ def load_vmlinux_kallsyms(prog: Program) -> SymbolIndex: return _load_proc_kallsyms() else: return _load_builtin_kallsyms(prog) + + +def _nm_type_to_binding_kind(code: str) -> Tuple[SymbolBinding, SymbolKind]: + binding = SymbolBinding.UNKNOWN + kind = SymbolKind.UNKNOWN + if code == "v": + binding = SymbolBinding.WEAK + kind = SymbolKind.OBJECT + elif code == "w": + binding = SymbolBinding.WEAK + elif code in "tT": + kind = SymbolKind.FUNC + elif code.lower() in "srbgncd": + kind = SymbolKind.OBJECT + if binding == SymbolBinding.UNKNOWN and code.isupper(): + binding = SymbolBinding.GLOBAL + return binding, kind + + +def _st_info_to_binding_kind(info: int) -> Tuple[SymbolBinding, SymbolKind]: + binding_int = info >> 4 + STB_WEAK = 2 + STB_GNU_UNIQUE = 10 + if binding_int <= STB_WEAK or binding_int == STB_GNU_UNIQUE: + binding = SymbolBinding(binding_int + 1) + else: + binding = SymbolBinding.UNKNOWN + type_ = info & 0xF + STT_TLS = 6 + STT_GNU_IFUNC = 10 + if type_ <= STT_TLS or type_ == STT_GNU_IFUNC: + kind = SymbolKind(type_) + else: + kind = SymbolKind.UNKNOWN + return binding, kind + + +def _elf_sym_to_symbol(name: str, obj: Object, has_typetab: bool) -> Symbol: + # Linux likes to have the nm(1) character code for its symbols, which it + # refers to as the symbol's "type" (this is of course distinct from the ELF + # notion of a symbol type, let alone what drgn considers a "type"...). + # + # Prior to 5439c985c5a8 ("module: Overwrite st_size instead of st_info"), + # merged in v5.0, the kernel simply overwrote the "st_info" field with a + # single-character code that represents the nm(1) character code for that + # symbol. However, starting with that commit, it was switched to overwrite + # the "st_size" field instead! This was thankfully fixed in v5.2 with + # 1c7651f43777 ("kallsyms: store type information in its own array"). + # + # Unfortunately, this leaves us with three possibilities: + # 1. Pre-v5.0: interpret the "st_info" as a character from nm(1) and try to + # infer the kind and bindings. + # 2. 5.0-5.2: interpret the "st_info" as normal, but ignore the "st_size" + # field since it is bogus. + # 3. 5.2+: both fields are valid, and the nm(1) code is stored in "typetab". + # + # Case 3 can be determined easily by the presence of "typetab" in "struct + # mod_kallsyms". However, cases 1 & 2 are indistinguishable. For our + # purposes, it makes more sense to fall back to case 1. After all, neither + # 5.0 or 5.1 were LTS kernels, nor are they actively used by any major + # distro. We have no way to deal with 5.0 or 5.1, whereas we can make some + # informed guesses for pre-5.0 based on the nm(1) code. + if has_typetab: + binding, kind = _st_info_to_binding_kind(obj.st_info.value_()) + else: + binding, kind = _nm_type_to_binding_kind(chr(obj.st_info.value_())) + return Symbol( # type: ignore + name, + obj.st_value.value_(), + obj.st_size.value_(), + binding, + kind, + ) + + +def _module_kallsyms(module: Object) -> List[Symbol]: + """ + Return a list of symbols for a kernel module + + When compiled with ``CONFIG_KALLSYMS``, the kernel maintains ELF symbol + information about each module within ``struct module``. This function + accesses this symbol information, and returns a list of drgn :class:`Symbol` + objects for the module. Keep in mind that unless ``CONFIG_KALLSYMS_ALL`` is + enabled, these symbols are typically only function symbols. + + :param module: :class:`Object` of type ``struct module *`` + :returns: a list of symbols + """ + try: + ks = module.kallsyms + except AttributeError: + # Prior to 8244062ef1e54 ("modules: fix longstanding /proc/kallsyms vs + # module insertion race."), the kallsyms variables were stored directly + # on the module object. This commit was introduced in 4.5, but was + # backported to some stable kernels too. Fall back to the module object + # in cases where kallsyms field isn't available. + ks = module + + prog = module.prog_ + num_symtab = ks.num_symtab.value_() + try: + ks.member_("typetab") + has_typetab = True + except LookupError: + has_typetab = False + + # The symtab field is a pointer, but it points at an array of Elf_Sym + # objects. Indexing it requires drgn to do pointer arithmetic and issue a + # lot of very small /proc/kcore reads, which can be a real performance + # issue. So convert it into an object representing a correctly-sized array, + # and then read that object all at once. This does one /proc/kcore read, + # which is a major improvement! + symtab = Object( + prog, + type=prog.array_type(ks.symtab.type_.type, num_symtab), + address=ks.symtab.value_(), + ).read_() + + # The strtab is similarly a pointer into a contigous array of strings packed + # next to each other. Reading individual strings from /proc/kcore can be + # quite slow. So read the entire array of bytes into a Python bytes value, + # and we'll extract the individual symbol strings from there. + last_string_start = symtab[num_symtab - 1].st_name.value_() + last_string_len = len(ks.strtab[last_string_start].address_of_().string_()) + 1 + strtab = prog.read(ks.strtab.value_(), last_string_start + last_string_len) + syms = [] + for i in range(ks.num_symtab.value_()): + elfsym = symtab[i] + if not elfsym.st_name: + continue + str_index = elfsym.st_name.value_() + nul_byte = strtab.find(b"\x00", str_index) + name = strtab[str_index:nul_byte].decode("ascii") + syms.append(_elf_sym_to_symbol(name, elfsym, has_typetab)) + return syms + + +def load_module_kallsyms(prog: Program) -> SymbolIndex: + """ + Return a symbol index containing all module symbols from kallsyms + + For kernels built with ``CONFIG_KALLSYMS``, loaded kernel modules contain + an ELF symbol table in kernel memory. This function can parse those data + structures and create a symbol index usable by drgn. However, it requires + that you already have debuginfo for the vmlinux image. + + :returns: a symbol index containing all symbols from module kallsyms + """ + all_symbols = [] + for module in for_each_module(prog): + all_symbols.extend(_module_kallsyms(module)) + return SymbolIndex(all_symbols) diff --git a/tests/linux_kernel/helpers/test_kallsyms.py b/tests/linux_kernel/helpers/test_kallsyms.py index 4533de2c4..104f6fae5 100644 --- a/tests/linux_kernel/helpers/test_kallsyms.py +++ b/tests/linux_kernel/helpers/test_kallsyms.py @@ -5,8 +5,12 @@ from unittest import TestCase from drgn import Symbol, SymbolBinding, SymbolKind -from drgn.helpers.linux.kallsyms import _load_builtin_kallsyms, _load_proc_kallsyms -from tests.linux_kernel import LinuxKernelTestCase +from drgn.helpers.linux.kallsyms import ( + _load_builtin_kallsyms, + _load_proc_kallsyms, + load_module_kallsyms, +) +from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod def compare_local_symbols(self, finder, modules=False): @@ -93,3 +97,15 @@ def test_builtin_kallsyms(self): self.skipTest("VMCOREINFO is missing necessary symbols") finder = _load_builtin_kallsyms(self.prog) compare_local_symbols(self, finder) + + @skip_unless_have_test_kmod + def test_module_kallsyms(self): + finder = load_module_kallsyms(self.prog) + test_data = finder(None, "drgn_test_empty_list", None, True)[0] + self.assertEqual("drgn_test_empty_list", test_data.name) + self.assertEqual(SymbolKind.OBJECT, test_data.kind) + self.assertIn(test_data.binding, (SymbolBinding.GLOBAL, SymbolBinding.UNKNOWN)) + size = self.prog.type("struct list_head").size + self.assertEqual(size, test_data.size) + address = self.prog.object("drgn_test_empty_list").address_ + self.assertEqual(address, test_data.address)