diff --git a/go.mod b/go.mod index 762f05e3d..8e440d167 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/onsi/gomega v1.27.8 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.0-rc4 - github.com/opencontainers/runc v1.1.7 + github.com/opencontainers/runc v1.1.8 github.com/opencontainers/runtime-spec v1.1.0-rc.3 github.com/opencontainers/runtime-tools v0.9.1-0.20230317050512-e931285f4b69 github.com/opencontainers/selinux v1.11.0 @@ -57,6 +57,7 @@ require ( github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/chzyer/readline v1.5.1 // indirect + github.com/cilium/ebpf v0.9.1 // indirect github.com/containerd/cgroups v1.1.0 // indirect github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01 // indirect @@ -141,4 +142,4 @@ retract ( v1.0.0 // We reverted to v0.… version numbers; the v1.0.0 tag was actually deleted. ) -replace github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.1-0.20220617142545-8b9452f75cbc +replace github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.8 diff --git a/go.sum b/go.sum index 7b3b236e2..29b5f8e59 100644 --- a/go.sum +++ b/go.sum @@ -35,6 +35,8 @@ github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObk github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= +github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4= +github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/container-orchestrated-devices/container-device-interface v0.6.0 h1:aWwcz/Ep0Fd7ZuBjQGjU/jdPloM7ydhMW13h85jZNvk= @@ -87,6 +89,7 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7 github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a h1:yDWHCSQ40h88yih2JAcL6Ls/kVkSE8GFACTGVnMPruw= github.com/facebookgo/limitgroup v0.0.0-20150612190941-6abd8d71ec01 h1:IeaD1VDVBPlx3viJT9Md8if8IxxJnO+x0JCGb054heg= github.com/facebookgo/muster v0.0.0-20150708232844-fd3d7953fd52 h1:a4DFiKFJiDRGFD1qIcqGLX/WlUMD9dyLSLDt+9QZgt8= +github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= @@ -305,8 +308,8 @@ github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3I github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/image-spec v1.1.0-rc4 h1:oOxKUJWnFC4YGHCCMNql1x4YaDfYBTS5Y4x/Cgeo1E0= github.com/opencontainers/image-spec v1.1.0-rc4/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= -github.com/opencontainers/runc v1.1.1-0.20220617142545-8b9452f75cbc h1:qjkUzmFsOFbQyjObybk40mRida83j5IHRaKzLGdBbEU= -github.com/opencontainers/runc v1.1.1-0.20220617142545-8b9452f75cbc/go.mod h1:wUOQGsiKae6VzA/UvlCK3cO+pHk8F2VQHlIoITEfMM8= +github.com/opencontainers/runc v1.1.8 h1:zICRlc+C1XzivLc3nzE+cbJV4LIi8tib6YG0MqC6OqA= +github.com/opencontainers/runc v1.1.8/go.mod h1:CbUumNnWCuTGFukNXahoo/RFBZvDAgRh/smNYNOhA50= github.com/opencontainers/runtime-spec v1.1.0-rc.3 h1:l04uafi6kxByhbxev7OWiuUv0LZxEsYUfDWZ6bztAuU= github.com/opencontainers/runtime-spec v1.1.0-rc.3/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-tools v0.9.1-0.20230317050512-e931285f4b69 h1:NL4xDvl68WWqQ+8WPMM3l5PsZTxaT7Z4K3VSKDRuAGs= diff --git a/vendor/github.com/cilium/ebpf/.clang-format b/vendor/github.com/cilium/ebpf/.clang-format new file mode 100644 index 000000000..4eb94b1ba --- /dev/null +++ b/vendor/github.com/cilium/ebpf/.clang-format @@ -0,0 +1,17 @@ +--- +Language: Cpp +BasedOnStyle: LLVM +AlignAfterOpenBracket: DontAlign +AlignConsecutiveAssignments: true +AlignEscapedNewlines: DontAlign +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortFunctionsOnASingleLine: false +BreakBeforeBraces: Attach +IndentWidth: 4 +KeepEmptyLinesAtTheStartOfBlocks: false +TabWidth: 4 +UseTab: ForContinuationAndIndentation +ColumnLimit: 1000 +... diff --git a/vendor/github.com/cilium/ebpf/.gitignore b/vendor/github.com/cilium/ebpf/.gitignore new file mode 100644 index 000000000..b46162b8e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/.gitignore @@ -0,0 +1,14 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib +*.o +!*_bpf*.o + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out diff --git a/vendor/github.com/cilium/ebpf/.golangci.yaml b/vendor/github.com/cilium/ebpf/.golangci.yaml new file mode 100644 index 000000000..dc62dd6d0 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/.golangci.yaml @@ -0,0 +1,28 @@ +--- +issues: + exclude-rules: + # syscall param structs will have unused fields in Go code. + - path: syscall.*.go + linters: + - structcheck + +linters: + disable-all: true + enable: + - deadcode + - errcheck + - goimports + - gosimple + - govet + - ineffassign + - misspell + - staticcheck + - structcheck + - typecheck + - unused + - varcheck + + # Could be enabled later: + # - gocyclo + # - maligned + # - gosec diff --git a/vendor/github.com/cilium/ebpf/ARCHITECTURE.md b/vendor/github.com/cilium/ebpf/ARCHITECTURE.md new file mode 100644 index 000000000..8cd7e2486 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/ARCHITECTURE.md @@ -0,0 +1,86 @@ +Architecture of the library +=== + + ELF -> Specifications -> Objects -> Links + +ELF +--- + +BPF is usually produced by using Clang to compile a subset of C. Clang outputs +an ELF file which contains program byte code (aka BPF), but also metadata for +maps used by the program. The metadata follows the conventions set by libbpf +shipped with the kernel. Certain ELF sections have special meaning +and contain structures defined by libbpf. Newer versions of clang emit +additional metadata in BPF Type Format (aka BTF). + +The library aims to be compatible with libbpf so that moving from a C toolchain +to a Go one creates little friction. To that end, the [ELF reader](elf_reader.go) +is tested against the Linux selftests and avoids introducing custom behaviour +if possible. + +The output of the ELF reader is a `CollectionSpec` which encodes +all of the information contained in the ELF in a form that is easy to work with +in Go. + +### BTF + +The BPF Type Format describes more than just the types used by a BPF program. It +includes debug aids like which source line corresponds to which instructions and +what global variables are used. + +[BTF parsing](internal/btf/) lives in a separate internal package since exposing +it would mean an additional maintenance burden, and because the API still +has sharp corners. The most important concept is the `btf.Type` interface, which +also describes things that aren't really types like `.rodata` or `.bss` sections. +`btf.Type`s can form cyclical graphs, which can easily lead to infinite loops if +one is not careful. Hopefully a safe pattern to work with `btf.Type` emerges as +we write more code that deals with it. + +Specifications +--- + +`CollectionSpec`, `ProgramSpec` and `MapSpec` are blueprints for in-kernel +objects and contain everything necessary to execute the relevant `bpf(2)` +syscalls. Since the ELF reader outputs a `CollectionSpec` it's possible to +modify clang-compiled BPF code, for example to rewrite constants. At the same +time the [asm](asm/) package provides an assembler that can be used to generate +`ProgramSpec` on the fly. + +Creating a spec should never require any privileges or be restricted in any way, +for example by only allowing programs in native endianness. This ensures that +the library stays flexible. + +Objects +--- + +`Program` and `Map` are the result of loading specs into the kernel. Sometimes +loading a spec will fail because the kernel is too old, or a feature is not +enabled. There are multiple ways the library deals with that: + +* Fallback: older kernels don't allow naming programs and maps. The library + automatically detects support for names, and omits them during load if + necessary. This works since name is primarily a debug aid. + +* Sentinel error: sometimes it's possible to detect that a feature isn't available. + In that case the library will return an error wrapping `ErrNotSupported`. + This is also useful to skip tests that can't run on the current kernel. + +Once program and map objects are loaded they expose the kernel's low-level API, +e.g. `NextKey`. Often this API is awkward to use in Go, so there are safer +wrappers on top of the low-level API, like `MapIterator`. The low-level API is +useful when our higher-level API doesn't support a particular use case. + +Links +--- + +BPF can be attached to many different points in the kernel and newer BPF hooks +tend to use bpf_link to do so. Older hooks unfortunately use a combination of +syscalls, netlink messages, etc. Adding support for a new link type should not +pull in large dependencies like netlink, so XDP programs or tracepoints are +out of scope. + +Each bpf_link_type has one corresponding Go type, e.g. `link.tracing` corresponds +to BPF_LINK_TRACING. In general, these types should be unexported as long as they +don't export methods outside of the Link interface. Each Go type may have multiple +exported constructors. For example `AttachTracing` and `AttachLSM` create a +tracing link, but are distinct functions since they may require different arguments. diff --git a/vendor/github.com/cilium/ebpf/CODE_OF_CONDUCT.md b/vendor/github.com/cilium/ebpf/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..8e42838c5 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/CODE_OF_CONDUCT.md @@ -0,0 +1,46 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at nathanjsweet at gmail dot com or i at lmb dot io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/vendor/github.com/cilium/ebpf/CONTRIBUTING.md b/vendor/github.com/cilium/ebpf/CONTRIBUTING.md new file mode 100644 index 000000000..0d29eae81 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/CONTRIBUTING.md @@ -0,0 +1,40 @@ +# How to contribute + +Development is on [GitHub](https://github.com/cilium/ebpf) and contributions in +the form of pull requests and issues reporting bugs or suggesting new features +are welcome. Please take a look at [the architecture](ARCHITECTURE.md) to get +a better understanding for the high-level goals. + +New features must be accompanied by tests. Before starting work on any large +feature, please [join](https://ebpf.io/slack) the +[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack to +discuss the design first. + +When submitting pull requests, consider writing details about what problem you +are solving and why the proposed approach solves that problem in commit messages +and/or pull request description to help future library users and maintainers to +reason about the proposed changes. + +## Running the tests + +Many of the tests require privileges to set resource limits and load eBPF code. +The easiest way to obtain these is to run the tests with `sudo`. + +To test the current package with your local kernel you can simply run: +``` +go test -exec sudo ./... +``` + +To test the current package with a different kernel version you can use the [run-tests.sh](run-tests.sh) script. +It requires [virtme](https://github.com/amluto/virtme) and qemu to be installed. + +Examples: + +```bash +# Run all tests on a 5.4 kernel +./run-tests.sh 5.4 + +# Run a subset of tests: +./run-tests.sh 5.4 go test ./link +``` + diff --git a/vendor/github.com/cilium/ebpf/LICENSE b/vendor/github.com/cilium/ebpf/LICENSE new file mode 100644 index 000000000..c637ae99c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/LICENSE @@ -0,0 +1,23 @@ +MIT License + +Copyright (c) 2017 Nathan Sweet +Copyright (c) 2018, 2019 Cloudflare +Copyright (c) 2019 Authors of Cilium + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/cilium/ebpf/MAINTAINERS.md b/vendor/github.com/cilium/ebpf/MAINTAINERS.md new file mode 100644 index 000000000..9c18e7e76 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/MAINTAINERS.md @@ -0,0 +1,8 @@ +# Maintainers + + * [Lorenz Bauer] + * [Timo Beckers] (Isovalent) + + +[Lorenz Bauer]: https://github.com/lmb +[Timo Beckers]: https://github.com/ti-mo diff --git a/vendor/github.com/cilium/ebpf/Makefile b/vendor/github.com/cilium/ebpf/Makefile new file mode 100644 index 000000000..2d5f04c37 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/Makefile @@ -0,0 +1,110 @@ +# The development version of clang is distributed as the 'clang' binary, +# while stable/released versions have a version number attached. +# Pin the default clang to a stable version. +CLANG ?= clang-14 +STRIP ?= llvm-strip-14 +OBJCOPY ?= llvm-objcopy-14 +CFLAGS := -O2 -g -Wall -Werror $(CFLAGS) + +CI_KERNEL_URL ?= https://github.com/cilium/ci-kernels/raw/master/ + +# Obtain an absolute path to the directory of the Makefile. +# Assume the Makefile is in the root of the repository. +REPODIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +UIDGID := $(shell stat -c '%u:%g' ${REPODIR}) + +# Prefer podman if installed, otherwise use docker. +# Note: Setting the var at runtime will always override. +CONTAINER_ENGINE ?= $(if $(shell command -v podman), podman, docker) +CONTAINER_RUN_ARGS ?= $(if $(filter ${CONTAINER_ENGINE}, podman), --log-driver=none, --user "${UIDGID}") + +IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE) +VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION) + + +# clang <8 doesn't tag relocs properly (STT_NOTYPE) +# clang 9 is the first version emitting BTF +TARGETS := \ + testdata/loader-clang-7 \ + testdata/loader-clang-9 \ + testdata/loader-$(CLANG) \ + testdata/btf_map_init \ + testdata/invalid_map \ + testdata/raw_tracepoint \ + testdata/invalid_map_static \ + testdata/invalid_btf_map_init \ + testdata/strings \ + testdata/freplace \ + testdata/iproute2_map_compat \ + testdata/map_spin_lock \ + testdata/subprog_reloc \ + testdata/fwd_decl \ + btf/testdata/relocs \ + btf/testdata/relocs_read \ + btf/testdata/relocs_read_tgt + +.PHONY: all clean container-all container-shell generate + +.DEFAULT_TARGET = container-all + +# Build all ELF binaries using a containerized LLVM toolchain. +container-all: + ${CONTAINER_ENGINE} run --rm ${CONTAINER_RUN_ARGS} \ + -v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \ + --env CFLAGS="-fdebug-prefix-map=/ebpf=." \ + --env HOME="/tmp" \ + "${IMAGE}:${VERSION}" \ + $(MAKE) all + +# (debug) Drop the user into a shell inside the container as root. +container-shell: + ${CONTAINER_ENGINE} run --rm -ti \ + -v "${REPODIR}":/ebpf -w /ebpf \ + "${IMAGE}:${VERSION}" + +clean: + -$(RM) testdata/*.elf + -$(RM) btf/testdata/*.elf + +format: + find . -type f -name "*.c" | xargs clang-format -i + +all: format $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) generate + ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf + ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf + +# $BPF_CLANG is used in go:generate invocations. +generate: export BPF_CLANG := $(CLANG) +generate: export BPF_CFLAGS := $(CFLAGS) +generate: + go generate ./cmd/bpf2go/test + go generate ./internal/sys + cd examples/ && go generate ./... + +testdata/loader-%-el.elf: testdata/loader.c + $* $(CFLAGS) -target bpfel -c $< -o $@ + $(STRIP) -g $@ + +testdata/loader-%-eb.elf: testdata/loader.c + $* $(CFLAGS) -target bpfeb -c $< -o $@ + $(STRIP) -g $@ + +%-el.elf: %.c + $(CLANG) $(CFLAGS) -target bpfel -c $< -o $@ + $(STRIP) -g $@ + +%-eb.elf : %.c + $(CLANG) $(CFLAGS) -target bpfeb -c $< -o $@ + $(STRIP) -g $@ + +.PHONY: generate-btf +generate-btf: KERNEL_VERSION?=5.18 +generate-btf: + $(eval TMP := $(shell mktemp -d)) + curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION).bz" -o "$(TMP)/bzImage" + ./testdata/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux" + $(OBJCOPY) --dump-section .BTF=/dev/stdout "$(TMP)/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz" + curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-selftests-bpf.tgz" -o "$(TMP)/selftests.tgz" + tar -xf "$(TMP)/selftests.tgz" --to-stdout tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko | \ + $(OBJCOPY) --dump-section .BTF="btf/testdata/btf_testmod.btf" - /dev/null + $(RM) -r "$(TMP)" diff --git a/vendor/github.com/cilium/ebpf/README.md b/vendor/github.com/cilium/ebpf/README.md new file mode 100644 index 000000000..3e490de71 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/README.md @@ -0,0 +1,77 @@ +# eBPF + +[![PkgGoDev](https://pkg.go.dev/badge/github.com/cilium/ebpf)](https://pkg.go.dev/github.com/cilium/ebpf) + +![HoneyGopher](.github/images/cilium-ebpf.png) + +eBPF is a pure Go library that provides utilities for loading, compiling, and +debugging eBPF programs. It has minimal external dependencies and is intended to +be used in long running processes. + +The library is maintained by [Cloudflare](https://www.cloudflare.com) and +[Cilium](https://www.cilium.io). + +See [ebpf.io](https://ebpf.io) for other projects from the eBPF ecosystem. + +## Getting Started + +A small collection of Go and eBPF programs that serve as examples for building +your own tools can be found under [examples/](examples/). + +Contributions are highly encouraged, as they highlight certain use cases of +eBPF and the library, and help shape the future of the project. + +## Getting Help + +Please +[join](https://ebpf.io/slack) the +[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack if you +have questions regarding the library. + +## Packages + +This library includes the following packages: + +* [asm](https://pkg.go.dev/github.com/cilium/ebpf/asm) contains a basic + assembler, allowing you to write eBPF assembly instructions directly + within your Go code. (You don't need to use this if you prefer to write your eBPF program in C.) +* [cmd/bpf2go](https://pkg.go.dev/github.com/cilium/ebpf/cmd/bpf2go) allows + compiling and embedding eBPF programs written in C within Go code. As well as + compiling the C code, it auto-generates Go code for loading and manipulating + the eBPF program and map objects. +* [link](https://pkg.go.dev/github.com/cilium/ebpf/link) allows attaching eBPF + to various hooks +* [perf](https://pkg.go.dev/github.com/cilium/ebpf/perf) allows reading from a + `PERF_EVENT_ARRAY` +* [ringbuf](https://pkg.go.dev/github.com/cilium/ebpf/ringbuf) allows reading from a + `BPF_MAP_TYPE_RINGBUF` map +* [features](https://pkg.go.dev/github.com/cilium/ebpf/features) implements the equivalent + of `bpftool feature probe` for discovering BPF-related kernel features using native Go. +* [rlimit](https://pkg.go.dev/github.com/cilium/ebpf/rlimit) provides a convenient API to lift + the `RLIMIT_MEMLOCK` constraint on kernels before 5.11. + +## Requirements + +* A version of Go that is [supported by + upstream](https://golang.org/doc/devel/release.html#policy) +* Linux >= 4.9. CI is run against kernel.org LTS releases. 4.4 should work but is + not tested against. + +## Regenerating Testdata + +Run `make` in the root of this repository to rebuild testdata in all +subpackages. This requires Docker, as it relies on a standardized build +environment to keep the build output stable. + +It is possible to regenerate data using Podman by overriding the `CONTAINER_*` +variables: `CONTAINER_ENGINE=podman CONTAINER_RUN_ARGS= make`. + +The toolchain image build files are kept in [testdata/docker/](testdata/docker/). + +## License + +MIT + +### eBPF Gopher + +The eBPF honeygopher is based on the Go gopher designed by Renee French. diff --git a/vendor/github.com/cilium/ebpf/asm/alu.go b/vendor/github.com/cilium/ebpf/asm/alu.go new file mode 100644 index 000000000..70ccc4d15 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/alu.go @@ -0,0 +1,149 @@ +package asm + +//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp + +// Source of ALU / ALU64 / Branch operations +// +// msb lsb +// +----+-+---+ +// |op |S|cls| +// +----+-+---+ +type Source uint8 + +const sourceMask OpCode = 0x08 + +// Source bitmask +const ( + // InvalidSource is returned by getters when invoked + // on non ALU / branch OpCodes. + InvalidSource Source = 0xff + // ImmSource src is from constant + ImmSource Source = 0x00 + // RegSource src is from register + RegSource Source = 0x08 +) + +// The Endianness of a byte swap instruction. +type Endianness uint8 + +const endianMask = sourceMask + +// Endian flags +const ( + InvalidEndian Endianness = 0xff + // Convert to little endian + LE Endianness = 0x00 + // Convert to big endian + BE Endianness = 0x08 +) + +// ALUOp are ALU / ALU64 operations +// +// msb lsb +// +----+-+---+ +// |OP |s|cls| +// +----+-+---+ +type ALUOp uint8 + +const aluMask OpCode = 0xf0 + +const ( + // InvalidALUOp is returned by getters when invoked + // on non ALU OpCodes + InvalidALUOp ALUOp = 0xff + // Add - addition + Add ALUOp = 0x00 + // Sub - subtraction + Sub ALUOp = 0x10 + // Mul - multiplication + Mul ALUOp = 0x20 + // Div - division + Div ALUOp = 0x30 + // Or - bitwise or + Or ALUOp = 0x40 + // And - bitwise and + And ALUOp = 0x50 + // LSh - bitwise shift left + LSh ALUOp = 0x60 + // RSh - bitwise shift right + RSh ALUOp = 0x70 + // Neg - sign/unsign signing bit + Neg ALUOp = 0x80 + // Mod - modulo + Mod ALUOp = 0x90 + // Xor - bitwise xor + Xor ALUOp = 0xa0 + // Mov - move value from one place to another + Mov ALUOp = 0xb0 + // ArSh - arithmatic shift + ArSh ALUOp = 0xc0 + // Swap - endian conversions + Swap ALUOp = 0xd0 +) + +// HostTo converts from host to another endianness. +func HostTo(endian Endianness, dst Register, size Size) Instruction { + var imm int64 + switch size { + case Half: + imm = 16 + case Word: + imm = 32 + case DWord: + imm = 64 + default: + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)), + Dst: dst, + Constant: imm, + } +} + +// Op returns the OpCode for an ALU operation with a given source. +func (op ALUOp) Op(source Source) OpCode { + return OpCode(ALU64Class).SetALUOp(op).SetSource(source) +} + +// Reg emits `dst (op) src`. +func (op ALUOp) Reg(dst, src Register) Instruction { + return Instruction{ + OpCode: op.Op(RegSource), + Dst: dst, + Src: src, + } +} + +// Imm emits `dst (op) value`. +func (op ALUOp) Imm(dst Register, value int32) Instruction { + return Instruction{ + OpCode: op.Op(ImmSource), + Dst: dst, + Constant: int64(value), + } +} + +// Op32 returns the OpCode for a 32-bit ALU operation with a given source. +func (op ALUOp) Op32(source Source) OpCode { + return OpCode(ALUClass).SetALUOp(op).SetSource(source) +} + +// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst. +func (op ALUOp) Reg32(dst, src Register) Instruction { + return Instruction{ + OpCode: op.Op32(RegSource), + Dst: dst, + Src: src, + } +} + +// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst. +func (op ALUOp) Imm32(dst Register, value int32) Instruction { + return Instruction{ + OpCode: op.Op32(ImmSource), + Dst: dst, + Constant: int64(value), + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/alu_string.go b/vendor/github.com/cilium/ebpf/asm/alu_string.go new file mode 100644 index 000000000..72d3fe629 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/alu_string.go @@ -0,0 +1,107 @@ +// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidSource-255] + _ = x[ImmSource-0] + _ = x[RegSource-8] +} + +const ( + _Source_name_0 = "ImmSource" + _Source_name_1 = "RegSource" + _Source_name_2 = "InvalidSource" +) + +func (i Source) String() string { + switch { + case i == 0: + return _Source_name_0 + case i == 8: + return _Source_name_1 + case i == 255: + return _Source_name_2 + default: + return "Source(" + strconv.FormatInt(int64(i), 10) + ")" + } +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidEndian-255] + _ = x[LE-0] + _ = x[BE-8] +} + +const ( + _Endianness_name_0 = "LE" + _Endianness_name_1 = "BE" + _Endianness_name_2 = "InvalidEndian" +) + +func (i Endianness) String() string { + switch { + case i == 0: + return _Endianness_name_0 + case i == 8: + return _Endianness_name_1 + case i == 255: + return _Endianness_name_2 + default: + return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")" + } +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidALUOp-255] + _ = x[Add-0] + _ = x[Sub-16] + _ = x[Mul-32] + _ = x[Div-48] + _ = x[Or-64] + _ = x[And-80] + _ = x[LSh-96] + _ = x[RSh-112] + _ = x[Neg-128] + _ = x[Mod-144] + _ = x[Xor-160] + _ = x[Mov-176] + _ = x[ArSh-192] + _ = x[Swap-208] +} + +const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp" + +var _ALUOp_map = map[ALUOp]string{ + 0: _ALUOp_name[0:3], + 16: _ALUOp_name[3:6], + 32: _ALUOp_name[6:9], + 48: _ALUOp_name[9:12], + 64: _ALUOp_name[12:14], + 80: _ALUOp_name[14:17], + 96: _ALUOp_name[17:20], + 112: _ALUOp_name[20:23], + 128: _ALUOp_name[23:26], + 144: _ALUOp_name[26:29], + 160: _ALUOp_name[29:32], + 176: _ALUOp_name[32:35], + 192: _ALUOp_name[35:39], + 208: _ALUOp_name[39:43], + 255: _ALUOp_name[43:55], +} + +func (i ALUOp) String() string { + if str, ok := _ALUOp_map[i]; ok { + return str + } + return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")" +} diff --git a/vendor/github.com/cilium/ebpf/asm/doc.go b/vendor/github.com/cilium/ebpf/asm/doc.go new file mode 100644 index 000000000..7031bdc27 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/doc.go @@ -0,0 +1,2 @@ +// Package asm is an assembler for eBPF bytecode. +package asm diff --git a/vendor/github.com/cilium/ebpf/asm/func.go b/vendor/github.com/cilium/ebpf/asm/func.go new file mode 100644 index 000000000..a14e9e2c3 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/func.go @@ -0,0 +1,242 @@ +package asm + +//go:generate stringer -output func_string.go -type=BuiltinFunc + +// BuiltinFunc is a built-in eBPF function. +type BuiltinFunc int32 + +func (_ BuiltinFunc) Max() BuiltinFunc { + return maxBuiltinFunc - 1 +} + +// eBPF built-in functions +// +// You can regenerate this list using the following gawk script: +// +// /FN\(.+\),/ { +// match($1, /\((.+)\)/, r) +// split(r[1], p, "_") +// printf "Fn" +// for (i in p) { +// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2) +// } +// print "" +// } +// +// The script expects include/uapi/linux/bpf.h as it's input. +const ( + FnUnspec BuiltinFunc = iota + FnMapLookupElem + FnMapUpdateElem + FnMapDeleteElem + FnProbeRead + FnKtimeGetNs + FnTracePrintk + FnGetPrandomU32 + FnGetSmpProcessorId + FnSkbStoreBytes + FnL3CsumReplace + FnL4CsumReplace + FnTailCall + FnCloneRedirect + FnGetCurrentPidTgid + FnGetCurrentUidGid + FnGetCurrentComm + FnGetCgroupClassid + FnSkbVlanPush + FnSkbVlanPop + FnSkbGetTunnelKey + FnSkbSetTunnelKey + FnPerfEventRead + FnRedirect + FnGetRouteRealm + FnPerfEventOutput + FnSkbLoadBytes + FnGetStackid + FnCsumDiff + FnSkbGetTunnelOpt + FnSkbSetTunnelOpt + FnSkbChangeProto + FnSkbChangeType + FnSkbUnderCgroup + FnGetHashRecalc + FnGetCurrentTask + FnProbeWriteUser + FnCurrentTaskUnderCgroup + FnSkbChangeTail + FnSkbPullData + FnCsumUpdate + FnSetHashInvalid + FnGetNumaNodeId + FnSkbChangeHead + FnXdpAdjustHead + FnProbeReadStr + FnGetSocketCookie + FnGetSocketUid + FnSetHash + FnSetsockopt + FnSkbAdjustRoom + FnRedirectMap + FnSkRedirectMap + FnSockMapUpdate + FnXdpAdjustMeta + FnPerfEventReadValue + FnPerfProgReadValue + FnGetsockopt + FnOverrideReturn + FnSockOpsCbFlagsSet + FnMsgRedirectMap + FnMsgApplyBytes + FnMsgCorkBytes + FnMsgPullData + FnBind + FnXdpAdjustTail + FnSkbGetXfrmState + FnGetStack + FnSkbLoadBytesRelative + FnFibLookup + FnSockHashUpdate + FnMsgRedirectHash + FnSkRedirectHash + FnLwtPushEncap + FnLwtSeg6StoreBytes + FnLwtSeg6AdjustSrh + FnLwtSeg6Action + FnRcRepeat + FnRcKeydown + FnSkbCgroupId + FnGetCurrentCgroupId + FnGetLocalStorage + FnSkSelectReuseport + FnSkbAncestorCgroupId + FnSkLookupTcp + FnSkLookupUdp + FnSkRelease + FnMapPushElem + FnMapPopElem + FnMapPeekElem + FnMsgPushData + FnMsgPopData + FnRcPointerRel + FnSpinLock + FnSpinUnlock + FnSkFullsock + FnTcpSock + FnSkbEcnSetCe + FnGetListenerSock + FnSkcLookupTcp + FnTcpCheckSyncookie + FnSysctlGetName + FnSysctlGetCurrentValue + FnSysctlGetNewValue + FnSysctlSetNewValue + FnStrtol + FnStrtoul + FnSkStorageGet + FnSkStorageDelete + FnSendSignal + FnTcpGenSyncookie + FnSkbOutput + FnProbeReadUser + FnProbeReadKernel + FnProbeReadUserStr + FnProbeReadKernelStr + FnTcpSendAck + FnSendSignalThread + FnJiffies64 + FnReadBranchRecords + FnGetNsCurrentPidTgid + FnXdpOutput + FnGetNetnsCookie + FnGetCurrentAncestorCgroupId + FnSkAssign + FnKtimeGetBootNs + FnSeqPrintf + FnSeqWrite + FnSkCgroupId + FnSkAncestorCgroupId + FnRingbufOutput + FnRingbufReserve + FnRingbufSubmit + FnRingbufDiscard + FnRingbufQuery + FnCsumLevel + FnSkcToTcp6Sock + FnSkcToTcpSock + FnSkcToTcpTimewaitSock + FnSkcToTcpRequestSock + FnSkcToUdp6Sock + FnGetTaskStack + FnLoadHdrOpt + FnStoreHdrOpt + FnReserveHdrOpt + FnInodeStorageGet + FnInodeStorageDelete + FnDPath + FnCopyFromUser + FnSnprintfBtf + FnSeqPrintfBtf + FnSkbCgroupClassid + FnRedirectNeigh + FnPerCpuPtr + FnThisCpuPtr + FnRedirectPeer + FnTaskStorageGet + FnTaskStorageDelete + FnGetCurrentTaskBtf + FnBprmOptsSet + FnKtimeGetCoarseNs + FnImaInodeHash + FnSockFromFile + FnCheckMtu + FnForEachMapElem + FnSnprintf + FnSysBpf + FnBtfFindByNameKind + FnSysClose + FnTimerInit + FnTimerSetCallback + FnTimerStart + FnTimerCancel + FnGetFuncIp + FnGetAttachCookie + FnTaskPtRegs + FnGetBranchSnapshot + FnTraceVprintk + FnSkcToUnixSock + FnKallsymsLookupName + FnFindVma + FnLoop + FnStrncmp + FnGetFuncArg + FnGetFuncRet + FnGetFuncArgCnt + FnGetRetval + FnSetRetval + FnXdpGetBuffLen + FnXdpLoadBytes + FnXdpStoreBytes + FnCopyFromUserTask + FnSkbSetTstamp + FnImaFileHash + FnKptrXchg + FnMapLookupPercpuElem + FnSkcToMptcpSock + FnDynptrFromMem + FnRingbufReserveDynptr + FnRingbufSubmitDynptr + FnRingbufDiscardDynptr + FnDynptrRead + FnDynptrWrite + FnDynptrData + + maxBuiltinFunc +) + +// Call emits a function call. +func (fn BuiltinFunc) Call() Instruction { + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(Call), + Constant: int64(fn), + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/func_string.go b/vendor/github.com/cilium/ebpf/asm/func_string.go new file mode 100644 index 000000000..b7431b7f6 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/func_string.go @@ -0,0 +1,227 @@ +// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[FnUnspec-0] + _ = x[FnMapLookupElem-1] + _ = x[FnMapUpdateElem-2] + _ = x[FnMapDeleteElem-3] + _ = x[FnProbeRead-4] + _ = x[FnKtimeGetNs-5] + _ = x[FnTracePrintk-6] + _ = x[FnGetPrandomU32-7] + _ = x[FnGetSmpProcessorId-8] + _ = x[FnSkbStoreBytes-9] + _ = x[FnL3CsumReplace-10] + _ = x[FnL4CsumReplace-11] + _ = x[FnTailCall-12] + _ = x[FnCloneRedirect-13] + _ = x[FnGetCurrentPidTgid-14] + _ = x[FnGetCurrentUidGid-15] + _ = x[FnGetCurrentComm-16] + _ = x[FnGetCgroupClassid-17] + _ = x[FnSkbVlanPush-18] + _ = x[FnSkbVlanPop-19] + _ = x[FnSkbGetTunnelKey-20] + _ = x[FnSkbSetTunnelKey-21] + _ = x[FnPerfEventRead-22] + _ = x[FnRedirect-23] + _ = x[FnGetRouteRealm-24] + _ = x[FnPerfEventOutput-25] + _ = x[FnSkbLoadBytes-26] + _ = x[FnGetStackid-27] + _ = x[FnCsumDiff-28] + _ = x[FnSkbGetTunnelOpt-29] + _ = x[FnSkbSetTunnelOpt-30] + _ = x[FnSkbChangeProto-31] + _ = x[FnSkbChangeType-32] + _ = x[FnSkbUnderCgroup-33] + _ = x[FnGetHashRecalc-34] + _ = x[FnGetCurrentTask-35] + _ = x[FnProbeWriteUser-36] + _ = x[FnCurrentTaskUnderCgroup-37] + _ = x[FnSkbChangeTail-38] + _ = x[FnSkbPullData-39] + _ = x[FnCsumUpdate-40] + _ = x[FnSetHashInvalid-41] + _ = x[FnGetNumaNodeId-42] + _ = x[FnSkbChangeHead-43] + _ = x[FnXdpAdjustHead-44] + _ = x[FnProbeReadStr-45] + _ = x[FnGetSocketCookie-46] + _ = x[FnGetSocketUid-47] + _ = x[FnSetHash-48] + _ = x[FnSetsockopt-49] + _ = x[FnSkbAdjustRoom-50] + _ = x[FnRedirectMap-51] + _ = x[FnSkRedirectMap-52] + _ = x[FnSockMapUpdate-53] + _ = x[FnXdpAdjustMeta-54] + _ = x[FnPerfEventReadValue-55] + _ = x[FnPerfProgReadValue-56] + _ = x[FnGetsockopt-57] + _ = x[FnOverrideReturn-58] + _ = x[FnSockOpsCbFlagsSet-59] + _ = x[FnMsgRedirectMap-60] + _ = x[FnMsgApplyBytes-61] + _ = x[FnMsgCorkBytes-62] + _ = x[FnMsgPullData-63] + _ = x[FnBind-64] + _ = x[FnXdpAdjustTail-65] + _ = x[FnSkbGetXfrmState-66] + _ = x[FnGetStack-67] + _ = x[FnSkbLoadBytesRelative-68] + _ = x[FnFibLookup-69] + _ = x[FnSockHashUpdate-70] + _ = x[FnMsgRedirectHash-71] + _ = x[FnSkRedirectHash-72] + _ = x[FnLwtPushEncap-73] + _ = x[FnLwtSeg6StoreBytes-74] + _ = x[FnLwtSeg6AdjustSrh-75] + _ = x[FnLwtSeg6Action-76] + _ = x[FnRcRepeat-77] + _ = x[FnRcKeydown-78] + _ = x[FnSkbCgroupId-79] + _ = x[FnGetCurrentCgroupId-80] + _ = x[FnGetLocalStorage-81] + _ = x[FnSkSelectReuseport-82] + _ = x[FnSkbAncestorCgroupId-83] + _ = x[FnSkLookupTcp-84] + _ = x[FnSkLookupUdp-85] + _ = x[FnSkRelease-86] + _ = x[FnMapPushElem-87] + _ = x[FnMapPopElem-88] + _ = x[FnMapPeekElem-89] + _ = x[FnMsgPushData-90] + _ = x[FnMsgPopData-91] + _ = x[FnRcPointerRel-92] + _ = x[FnSpinLock-93] + _ = x[FnSpinUnlock-94] + _ = x[FnSkFullsock-95] + _ = x[FnTcpSock-96] + _ = x[FnSkbEcnSetCe-97] + _ = x[FnGetListenerSock-98] + _ = x[FnSkcLookupTcp-99] + _ = x[FnTcpCheckSyncookie-100] + _ = x[FnSysctlGetName-101] + _ = x[FnSysctlGetCurrentValue-102] + _ = x[FnSysctlGetNewValue-103] + _ = x[FnSysctlSetNewValue-104] + _ = x[FnStrtol-105] + _ = x[FnStrtoul-106] + _ = x[FnSkStorageGet-107] + _ = x[FnSkStorageDelete-108] + _ = x[FnSendSignal-109] + _ = x[FnTcpGenSyncookie-110] + _ = x[FnSkbOutput-111] + _ = x[FnProbeReadUser-112] + _ = x[FnProbeReadKernel-113] + _ = x[FnProbeReadUserStr-114] + _ = x[FnProbeReadKernelStr-115] + _ = x[FnTcpSendAck-116] + _ = x[FnSendSignalThread-117] + _ = x[FnJiffies64-118] + _ = x[FnReadBranchRecords-119] + _ = x[FnGetNsCurrentPidTgid-120] + _ = x[FnXdpOutput-121] + _ = x[FnGetNetnsCookie-122] + _ = x[FnGetCurrentAncestorCgroupId-123] + _ = x[FnSkAssign-124] + _ = x[FnKtimeGetBootNs-125] + _ = x[FnSeqPrintf-126] + _ = x[FnSeqWrite-127] + _ = x[FnSkCgroupId-128] + _ = x[FnSkAncestorCgroupId-129] + _ = x[FnRingbufOutput-130] + _ = x[FnRingbufReserve-131] + _ = x[FnRingbufSubmit-132] + _ = x[FnRingbufDiscard-133] + _ = x[FnRingbufQuery-134] + _ = x[FnCsumLevel-135] + _ = x[FnSkcToTcp6Sock-136] + _ = x[FnSkcToTcpSock-137] + _ = x[FnSkcToTcpTimewaitSock-138] + _ = x[FnSkcToTcpRequestSock-139] + _ = x[FnSkcToUdp6Sock-140] + _ = x[FnGetTaskStack-141] + _ = x[FnLoadHdrOpt-142] + _ = x[FnStoreHdrOpt-143] + _ = x[FnReserveHdrOpt-144] + _ = x[FnInodeStorageGet-145] + _ = x[FnInodeStorageDelete-146] + _ = x[FnDPath-147] + _ = x[FnCopyFromUser-148] + _ = x[FnSnprintfBtf-149] + _ = x[FnSeqPrintfBtf-150] + _ = x[FnSkbCgroupClassid-151] + _ = x[FnRedirectNeigh-152] + _ = x[FnPerCpuPtr-153] + _ = x[FnThisCpuPtr-154] + _ = x[FnRedirectPeer-155] + _ = x[FnTaskStorageGet-156] + _ = x[FnTaskStorageDelete-157] + _ = x[FnGetCurrentTaskBtf-158] + _ = x[FnBprmOptsSet-159] + _ = x[FnKtimeGetCoarseNs-160] + _ = x[FnImaInodeHash-161] + _ = x[FnSockFromFile-162] + _ = x[FnCheckMtu-163] + _ = x[FnForEachMapElem-164] + _ = x[FnSnprintf-165] + _ = x[FnSysBpf-166] + _ = x[FnBtfFindByNameKind-167] + _ = x[FnSysClose-168] + _ = x[FnTimerInit-169] + _ = x[FnTimerSetCallback-170] + _ = x[FnTimerStart-171] + _ = x[FnTimerCancel-172] + _ = x[FnGetFuncIp-173] + _ = x[FnGetAttachCookie-174] + _ = x[FnTaskPtRegs-175] + _ = x[FnGetBranchSnapshot-176] + _ = x[FnTraceVprintk-177] + _ = x[FnSkcToUnixSock-178] + _ = x[FnKallsymsLookupName-179] + _ = x[FnFindVma-180] + _ = x[FnLoop-181] + _ = x[FnStrncmp-182] + _ = x[FnGetFuncArg-183] + _ = x[FnGetFuncRet-184] + _ = x[FnGetFuncArgCnt-185] + _ = x[FnGetRetval-186] + _ = x[FnSetRetval-187] + _ = x[FnXdpGetBuffLen-188] + _ = x[FnXdpLoadBytes-189] + _ = x[FnXdpStoreBytes-190] + _ = x[FnCopyFromUserTask-191] + _ = x[FnSkbSetTstamp-192] + _ = x[FnImaFileHash-193] + _ = x[FnKptrXchg-194] + _ = x[FnMapLookupPercpuElem-195] + _ = x[FnSkcToMptcpSock-196] + _ = x[FnDynptrFromMem-197] + _ = x[FnRingbufReserveDynptr-198] + _ = x[FnRingbufSubmitDynptr-199] + _ = x[FnRingbufDiscardDynptr-200] + _ = x[FnDynptrRead-201] + _ = x[FnDynptrWrite-202] + _ = x[FnDynptrData-203] + _ = x[maxBuiltinFunc-204] +} + +const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDatamaxBuiltinFunc" + +var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3011} + +func (i BuiltinFunc) String() string { + if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) { + return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]] +} diff --git a/vendor/github.com/cilium/ebpf/asm/instruction.go b/vendor/github.com/cilium/ebpf/asm/instruction.go new file mode 100644 index 000000000..f17d88b51 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/instruction.go @@ -0,0 +1,859 @@ +package asm + +import ( + "crypto/sha1" + "encoding/binary" + "encoding/hex" + "errors" + "fmt" + "io" + "math" + "sort" + "strings" + + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// InstructionSize is the size of a BPF instruction in bytes +const InstructionSize = 8 + +// RawInstructionOffset is an offset in units of raw BPF instructions. +type RawInstructionOffset uint64 + +var ErrUnreferencedSymbol = errors.New("unreferenced symbol") +var ErrUnsatisfiedMapReference = errors.New("unsatisfied map reference") +var ErrUnsatisfiedProgramReference = errors.New("unsatisfied program reference") + +// Bytes returns the offset of an instruction in bytes. +func (rio RawInstructionOffset) Bytes() uint64 { + return uint64(rio) * InstructionSize +} + +// Instruction is a single eBPF instruction. +type Instruction struct { + OpCode OpCode + Dst Register + Src Register + Offset int16 + Constant int64 + + // Metadata contains optional metadata about this instruction. + Metadata Metadata +} + +// Unmarshal decodes a BPF instruction. +func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) { + data := make([]byte, InstructionSize) + if _, err := io.ReadFull(r, data); err != nil { + return 0, err + } + + ins.OpCode = OpCode(data[0]) + + regs := data[1] + switch bo { + case binary.LittleEndian: + ins.Dst, ins.Src = Register(regs&0xF), Register(regs>>4) + case binary.BigEndian: + ins.Dst, ins.Src = Register(regs>>4), Register(regs&0xf) + } + + ins.Offset = int16(bo.Uint16(data[2:4])) + // Convert to int32 before widening to int64 + // to ensure the signed bit is carried over. + ins.Constant = int64(int32(bo.Uint32(data[4:8]))) + + if !ins.OpCode.IsDWordLoad() { + return InstructionSize, nil + } + + // Pull another instruction from the stream to retrieve the second + // half of the 64-bit immediate value. + if _, err := io.ReadFull(r, data); err != nil { + // No Wrap, to avoid io.EOF clash + return 0, errors.New("64bit immediate is missing second half") + } + + // Require that all fields other than the value are zero. + if bo.Uint32(data[0:4]) != 0 { + return 0, errors.New("64bit immediate has non-zero fields") + } + + cons1 := uint32(ins.Constant) + cons2 := int32(bo.Uint32(data[4:8])) + ins.Constant = int64(cons2)<<32 | int64(cons1) + + return 2 * InstructionSize, nil +} + +// Marshal encodes a BPF instruction. +func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) { + if ins.OpCode == InvalidOpCode { + return 0, errors.New("invalid opcode") + } + + isDWordLoad := ins.OpCode.IsDWordLoad() + + cons := int32(ins.Constant) + if isDWordLoad { + // Encode least significant 32bit first for 64bit operations. + cons = int32(uint32(ins.Constant)) + } + + regs, err := newBPFRegisters(ins.Dst, ins.Src, bo) + if err != nil { + return 0, fmt.Errorf("can't marshal registers: %s", err) + } + + data := make([]byte, InstructionSize) + data[0] = byte(ins.OpCode) + data[1] = byte(regs) + bo.PutUint16(data[2:4], uint16(ins.Offset)) + bo.PutUint32(data[4:8], uint32(cons)) + if _, err := w.Write(data); err != nil { + return 0, err + } + + if !isDWordLoad { + return InstructionSize, nil + } + + // The first half of the second part of a double-wide instruction + // must be zero. The second half carries the value. + bo.PutUint32(data[0:4], 0) + bo.PutUint32(data[4:8], uint32(ins.Constant>>32)) + if _, err := w.Write(data); err != nil { + return 0, err + } + + return 2 * InstructionSize, nil +} + +// AssociateMap associates a Map with this Instruction. +// +// Implicitly clears the Instruction's Reference field. +// +// Returns an error if the Instruction is not a map load. +func (ins *Instruction) AssociateMap(m FDer) error { + if !ins.IsLoadFromMap() { + return errors.New("not a load from a map") + } + + ins.Metadata.Set(referenceMeta{}, nil) + ins.Metadata.Set(mapMeta{}, m) + + return nil +} + +// RewriteMapPtr changes an instruction to use a new map fd. +// +// Returns an error if the instruction doesn't load a map. +// +// Deprecated: use AssociateMap instead. If you cannot provide a Map, +// wrap an fd in a type implementing FDer. +func (ins *Instruction) RewriteMapPtr(fd int) error { + if !ins.IsLoadFromMap() { + return errors.New("not a load from a map") + } + + ins.encodeMapFD(fd) + + return nil +} + +func (ins *Instruction) encodeMapFD(fd int) { + // Preserve the offset value for direct map loads. + offset := uint64(ins.Constant) & (math.MaxUint32 << 32) + rawFd := uint64(uint32(fd)) + ins.Constant = int64(offset | rawFd) +} + +// MapPtr returns the map fd for this instruction. +// +// The result is undefined if the instruction is not a load from a map, +// see IsLoadFromMap. +// +// Deprecated: use Map() instead. +func (ins *Instruction) MapPtr() int { + // If there is a map associated with the instruction, return its FD. + if fd := ins.Metadata.Get(mapMeta{}); fd != nil { + return fd.(FDer).FD() + } + + // Fall back to the fd stored in the Constant field + return ins.mapFd() +} + +// mapFd returns the map file descriptor stored in the 32 least significant +// bits of ins' Constant field. +func (ins *Instruction) mapFd() int { + return int(int32(ins.Constant)) +} + +// RewriteMapOffset changes the offset of a direct load from a map. +// +// Returns an error if the instruction is not a direct load. +func (ins *Instruction) RewriteMapOffset(offset uint32) error { + if !ins.OpCode.IsDWordLoad() { + return fmt.Errorf("%s is not a 64 bit load", ins.OpCode) + } + + if ins.Src != PseudoMapValue { + return errors.New("not a direct load from a map") + } + + fd := uint64(ins.Constant) & math.MaxUint32 + ins.Constant = int64(uint64(offset)<<32 | fd) + return nil +} + +func (ins *Instruction) mapOffset() uint32 { + return uint32(uint64(ins.Constant) >> 32) +} + +// IsLoadFromMap returns true if the instruction loads from a map. +// +// This covers both loading the map pointer and direct map value loads. +func (ins *Instruction) IsLoadFromMap() bool { + return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue) +} + +// IsFunctionCall returns true if the instruction calls another BPF function. +// +// This is not the same thing as a BPF helper call. +func (ins *Instruction) IsFunctionCall() bool { + return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall +} + +// IsLoadOfFunctionPointer returns true if the instruction loads a function pointer. +func (ins *Instruction) IsLoadOfFunctionPointer() bool { + return ins.OpCode.IsDWordLoad() && ins.Src == PseudoFunc +} + +// IsFunctionReference returns true if the instruction references another BPF +// function, either by invoking a Call jump operation or by loading a function +// pointer. +func (ins *Instruction) IsFunctionReference() bool { + return ins.IsFunctionCall() || ins.IsLoadOfFunctionPointer() +} + +// IsBuiltinCall returns true if the instruction is a built-in call, i.e. BPF helper call. +func (ins *Instruction) IsBuiltinCall() bool { + return ins.OpCode.JumpOp() == Call && ins.Src == R0 && ins.Dst == R0 +} + +// IsConstantLoad returns true if the instruction loads a constant of the +// given size. +func (ins *Instruction) IsConstantLoad(size Size) bool { + return ins.OpCode == LoadImmOp(size) && ins.Src == R0 && ins.Offset == 0 +} + +// Format implements fmt.Formatter. +func (ins Instruction) Format(f fmt.State, c rune) { + if c != 'v' { + fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c) + return + } + + op := ins.OpCode + + if op == InvalidOpCode { + fmt.Fprint(f, "INVALID") + return + } + + // Omit trailing space for Exit + if op.JumpOp() == Exit { + fmt.Fprint(f, op) + return + } + + if ins.IsLoadFromMap() { + fd := ins.mapFd() + m := ins.Map() + switch ins.Src { + case PseudoMapFD: + if m != nil { + fmt.Fprintf(f, "LoadMapPtr dst: %s map: %s", ins.Dst, m) + } else { + fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd) + } + + case PseudoMapValue: + if m != nil { + fmt.Fprintf(f, "LoadMapValue dst: %s, map: %s off: %d", ins.Dst, m, ins.mapOffset()) + } else { + fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset()) + } + } + + goto ref + } + + fmt.Fprintf(f, "%v ", op) + switch cls := op.Class(); { + case cls.isLoadOrStore(): + switch op.Mode() { + case ImmMode: + fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant) + case AbsMode: + fmt.Fprintf(f, "imm: %d", ins.Constant) + case IndMode: + fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant) + case MemMode: + fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant) + case XAddMode: + fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src) + } + + case cls.IsALU(): + fmt.Fprintf(f, "dst: %s ", ins.Dst) + if op.ALUOp() == Swap || op.Source() == ImmSource { + fmt.Fprintf(f, "imm: %d", ins.Constant) + } else { + fmt.Fprintf(f, "src: %s", ins.Src) + } + + case cls.IsJump(): + switch jop := op.JumpOp(); jop { + case Call: + if ins.Src == PseudoCall { + // bpf-to-bpf call + fmt.Fprint(f, ins.Constant) + } else { + fmt.Fprint(f, BuiltinFunc(ins.Constant)) + } + + default: + fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset) + if op.Source() == ImmSource { + fmt.Fprintf(f, "imm: %d", ins.Constant) + } else { + fmt.Fprintf(f, "src: %s", ins.Src) + } + } + } + +ref: + if ins.Reference() != "" { + fmt.Fprintf(f, " <%s>", ins.Reference()) + } +} + +func (ins Instruction) equal(other Instruction) bool { + return ins.OpCode == other.OpCode && + ins.Dst == other.Dst && + ins.Src == other.Src && + ins.Offset == other.Offset && + ins.Constant == other.Constant +} + +// Size returns the amount of bytes ins would occupy in binary form. +func (ins Instruction) Size() uint64 { + return uint64(InstructionSize * ins.OpCode.rawInstructions()) +} + +type symbolMeta struct{} + +// WithSymbol marks the Instruction as a Symbol, which other Instructions +// can point to using corresponding calls to WithReference. +func (ins Instruction) WithSymbol(name string) Instruction { + ins.Metadata.Set(symbolMeta{}, name) + return ins +} + +// Sym creates a symbol. +// +// Deprecated: use WithSymbol instead. +func (ins Instruction) Sym(name string) Instruction { + return ins.WithSymbol(name) +} + +// Symbol returns the value ins has been marked with using WithSymbol, +// otherwise returns an empty string. A symbol is often an Instruction +// at the start of a function body. +func (ins Instruction) Symbol() string { + sym, _ := ins.Metadata.Get(symbolMeta{}).(string) + return sym +} + +type referenceMeta struct{} + +// WithReference makes ins reference another Symbol or map by name. +func (ins Instruction) WithReference(ref string) Instruction { + ins.Metadata.Set(referenceMeta{}, ref) + return ins +} + +// Reference returns the Symbol or map name referenced by ins, if any. +func (ins Instruction) Reference() string { + ref, _ := ins.Metadata.Get(referenceMeta{}).(string) + return ref +} + +type mapMeta struct{} + +// Map returns the Map referenced by ins, if any. +// An Instruction will contain a Map if e.g. it references an existing, +// pinned map that was opened during ELF loading. +func (ins Instruction) Map() FDer { + fd, _ := ins.Metadata.Get(mapMeta{}).(FDer) + return fd +} + +type sourceMeta struct{} + +// WithSource adds source information about the Instruction. +func (ins Instruction) WithSource(src fmt.Stringer) Instruction { + ins.Metadata.Set(sourceMeta{}, src) + return ins +} + +// Source returns source information about the Instruction. The field is +// present when the compiler emits BTF line info about the Instruction and +// usually contains the line of source code responsible for it. +func (ins Instruction) Source() fmt.Stringer { + str, _ := ins.Metadata.Get(sourceMeta{}).(fmt.Stringer) + return str +} + +// A Comment can be passed to Instruction.WithSource to add a comment +// to an instruction. +type Comment string + +func (s Comment) String() string { + return string(s) +} + +// FDer represents a resource tied to an underlying file descriptor. +// Used as a stand-in for e.g. ebpf.Map since that type cannot be +// imported here and FD() is the only method we rely on. +type FDer interface { + FD() int +} + +// Instructions is an eBPF program. +type Instructions []Instruction + +// Unmarshal unmarshals an Instructions from a binary instruction stream. +// All instructions in insns are replaced by instructions decoded from r. +func (insns *Instructions) Unmarshal(r io.Reader, bo binary.ByteOrder) error { + if len(*insns) > 0 { + *insns = nil + } + + var offset uint64 + for { + var ins Instruction + n, err := ins.Unmarshal(r, bo) + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("offset %d: %w", offset, err) + } + + *insns = append(*insns, ins) + offset += n + } + + return nil +} + +// Name returns the name of the function insns belongs to, if any. +func (insns Instructions) Name() string { + if len(insns) == 0 { + return "" + } + return insns[0].Symbol() +} + +func (insns Instructions) String() string { + return fmt.Sprint(insns) +} + +// Size returns the amount of bytes insns would occupy in binary form. +func (insns Instructions) Size() uint64 { + var sum uint64 + for _, ins := range insns { + sum += ins.Size() + } + return sum +} + +// AssociateMap updates all Instructions that Reference the given symbol +// to point to an existing Map m instead. +// +// Returns ErrUnreferencedSymbol error if no references to symbol are found +// in insns. If symbol is anything else than the symbol name of map (e.g. +// a bpf2bpf subprogram), an error is returned. +func (insns Instructions) AssociateMap(symbol string, m FDer) error { + if symbol == "" { + return errors.New("empty symbol") + } + + var found bool + for i := range insns { + ins := &insns[i] + if ins.Reference() != symbol { + continue + } + + if err := ins.AssociateMap(m); err != nil { + return err + } + + found = true + } + + if !found { + return fmt.Errorf("symbol %s: %w", symbol, ErrUnreferencedSymbol) + } + + return nil +} + +// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd. +// +// Returns ErrUnreferencedSymbol if the symbol isn't used. +// +// Deprecated: use AssociateMap instead. +func (insns Instructions) RewriteMapPtr(symbol string, fd int) error { + if symbol == "" { + return errors.New("empty symbol") + } + + var found bool + for i := range insns { + ins := &insns[i] + if ins.Reference() != symbol { + continue + } + + if !ins.IsLoadFromMap() { + return errors.New("not a load from a map") + } + + ins.encodeMapFD(fd) + + found = true + } + + if !found { + return fmt.Errorf("symbol %s: %w", symbol, ErrUnreferencedSymbol) + } + + return nil +} + +// SymbolOffsets returns the set of symbols and their offset in +// the instructions. +func (insns Instructions) SymbolOffsets() (map[string]int, error) { + offsets := make(map[string]int) + + for i, ins := range insns { + if ins.Symbol() == "" { + continue + } + + if _, ok := offsets[ins.Symbol()]; ok { + return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol()) + } + + offsets[ins.Symbol()] = i + } + + return offsets, nil +} + +// FunctionReferences returns a set of symbol names these Instructions make +// bpf-to-bpf calls to. +func (insns Instructions) FunctionReferences() []string { + calls := make(map[string]struct{}) + for _, ins := range insns { + if ins.Constant != -1 { + // BPF-to-BPF calls have -1 constants. + continue + } + + if ins.Reference() == "" { + continue + } + + if !ins.IsFunctionReference() { + continue + } + + calls[ins.Reference()] = struct{}{} + } + + result := make([]string, 0, len(calls)) + for call := range calls { + result = append(result, call) + } + + sort.Strings(result) + return result +} + +// ReferenceOffsets returns the set of references and their offset in +// the instructions. +func (insns Instructions) ReferenceOffsets() map[string][]int { + offsets := make(map[string][]int) + + for i, ins := range insns { + if ins.Reference() == "" { + continue + } + + offsets[ins.Reference()] = append(offsets[ins.Reference()], i) + } + + return offsets +} + +// Format implements fmt.Formatter. +// +// You can control indentation of symbols by +// specifying a width. Setting a precision controls the indentation of +// instructions. +// The default character is a tab, which can be overridden by specifying +// the ' ' space flag. +func (insns Instructions) Format(f fmt.State, c rune) { + if c != 's' && c != 'v' { + fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c) + return + } + + // Precision is better in this case, because it allows + // specifying 0 padding easily. + padding, ok := f.Precision() + if !ok { + padding = 1 + } + + indent := strings.Repeat("\t", padding) + if f.Flag(' ') { + indent = strings.Repeat(" ", padding) + } + + symPadding, ok := f.Width() + if !ok { + symPadding = padding - 1 + } + if symPadding < 0 { + symPadding = 0 + } + + symIndent := strings.Repeat("\t", symPadding) + if f.Flag(' ') { + symIndent = strings.Repeat(" ", symPadding) + } + + // Guess how many digits we need at most, by assuming that all instructions + // are double wide. + highestOffset := len(insns) * 2 + offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset)))) + + iter := insns.Iterate() + for iter.Next() { + if iter.Ins.Symbol() != "" { + fmt.Fprintf(f, "%s%s:\n", symIndent, iter.Ins.Symbol()) + } + if src := iter.Ins.Source(); src != nil { + line := strings.TrimSpace(src.String()) + if line != "" { + fmt.Fprintf(f, "%s%*s; %s\n", indent, offsetWidth, " ", line) + } + } + fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, iter.Offset, iter.Ins) + } +} + +// Marshal encodes a BPF program into the kernel format. +// +// insns may be modified if there are unresolved jumps or bpf2bpf calls. +// +// Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction +// without a matching Symbol Instruction within insns. +func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error { + if err := insns.encodeFunctionReferences(); err != nil { + return err + } + + if err := insns.encodeMapPointers(); err != nil { + return err + } + + for i, ins := range insns { + if _, err := ins.Marshal(w, bo); err != nil { + return fmt.Errorf("instruction %d: %w", i, err) + } + } + return nil +} + +// Tag calculates the kernel tag for a series of instructions. +// +// It mirrors bpf_prog_calc_tag in the kernel and so can be compared +// to ProgramInfo.Tag to figure out whether a loaded program matches +// certain instructions. +func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) { + h := sha1.New() + for i, ins := range insns { + if ins.IsLoadFromMap() { + ins.Constant = 0 + } + _, err := ins.Marshal(h, bo) + if err != nil { + return "", fmt.Errorf("instruction %d: %w", i, err) + } + } + return hex.EncodeToString(h.Sum(nil)[:unix.BPF_TAG_SIZE]), nil +} + +// encodeFunctionReferences populates the Offset (or Constant, depending on +// the instruction type) field of instructions with a Reference field to point +// to the offset of the corresponding instruction with a matching Symbol field. +// +// Only Reference Instructions that are either jumps or BPF function references +// (calls or function pointer loads) are populated. +// +// Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction +// without at least one corresponding Symbol Instruction within insns. +func (insns Instructions) encodeFunctionReferences() error { + // Index the offsets of instructions tagged as a symbol. + symbolOffsets := make(map[string]RawInstructionOffset) + iter := insns.Iterate() + for iter.Next() { + ins := iter.Ins + + if ins.Symbol() == "" { + continue + } + + if _, ok := symbolOffsets[ins.Symbol()]; ok { + return fmt.Errorf("duplicate symbol %s", ins.Symbol()) + } + + symbolOffsets[ins.Symbol()] = iter.Offset + } + + // Find all instructions tagged as references to other symbols. + // Depending on the instruction type, populate their constant or offset + // fields to point to the symbol they refer to within the insn stream. + iter = insns.Iterate() + for iter.Next() { + i := iter.Index + offset := iter.Offset + ins := iter.Ins + + if ins.Reference() == "" { + continue + } + + switch { + case ins.IsFunctionReference() && ins.Constant == -1: + symOffset, ok := symbolOffsets[ins.Reference()] + if !ok { + return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference) + } + + ins.Constant = int64(symOffset - offset - 1) + + case ins.OpCode.Class().IsJump() && ins.Offset == -1: + symOffset, ok := symbolOffsets[ins.Reference()] + if !ok { + return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference) + } + + ins.Offset = int16(symOffset - offset - 1) + } + } + + return nil +} + +// encodeMapPointers finds all Map Instructions and encodes their FDs +// into their Constant fields. +func (insns Instructions) encodeMapPointers() error { + iter := insns.Iterate() + for iter.Next() { + ins := iter.Ins + + if !ins.IsLoadFromMap() { + continue + } + + m := ins.Map() + if m == nil { + continue + } + + fd := m.FD() + if fd < 0 { + return fmt.Errorf("map %s: %w", m, sys.ErrClosedFd) + } + + ins.encodeMapFD(m.FD()) + } + + return nil +} + +// Iterate allows iterating a BPF program while keeping track of +// various offsets. +// +// Modifying the instruction slice will lead to undefined behaviour. +func (insns Instructions) Iterate() *InstructionIterator { + return &InstructionIterator{insns: insns} +} + +// InstructionIterator iterates over a BPF program. +type InstructionIterator struct { + insns Instructions + // The instruction in question. + Ins *Instruction + // The index of the instruction in the original instruction slice. + Index int + // The offset of the instruction in raw BPF instructions. This accounts + // for double-wide instructions. + Offset RawInstructionOffset +} + +// Next returns true as long as there are any instructions remaining. +func (iter *InstructionIterator) Next() bool { + if len(iter.insns) == 0 { + return false + } + + if iter.Ins != nil { + iter.Index++ + iter.Offset += RawInstructionOffset(iter.Ins.OpCode.rawInstructions()) + } + iter.Ins = &iter.insns[0] + iter.insns = iter.insns[1:] + return true +} + +type bpfRegisters uint8 + +func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, error) { + switch bo { + case binary.LittleEndian: + return bpfRegisters((src << 4) | (dst & 0xF)), nil + case binary.BigEndian: + return bpfRegisters((dst << 4) | (src & 0xF)), nil + default: + return 0, fmt.Errorf("unrecognized ByteOrder %T", bo) + } +} + +// IsUnreferencedSymbol returns true if err was caused by +// an unreferenced symbol. +// +// Deprecated: use errors.Is(err, asm.ErrUnreferencedSymbol). +func IsUnreferencedSymbol(err error) bool { + return errors.Is(err, ErrUnreferencedSymbol) +} diff --git a/vendor/github.com/cilium/ebpf/asm/jump.go b/vendor/github.com/cilium/ebpf/asm/jump.go new file mode 100644 index 000000000..e31e42cac --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/jump.go @@ -0,0 +1,127 @@ +package asm + +//go:generate stringer -output jump_string.go -type=JumpOp + +// JumpOp affect control flow. +// +// msb lsb +// +----+-+---+ +// |OP |s|cls| +// +----+-+---+ +type JumpOp uint8 + +const jumpMask OpCode = aluMask + +const ( + // InvalidJumpOp is returned by getters when invoked + // on non branch OpCodes + InvalidJumpOp JumpOp = 0xff + // Ja jumps by offset unconditionally + Ja JumpOp = 0x00 + // JEq jumps by offset if r == imm + JEq JumpOp = 0x10 + // JGT jumps by offset if r > imm + JGT JumpOp = 0x20 + // JGE jumps by offset if r >= imm + JGE JumpOp = 0x30 + // JSet jumps by offset if r & imm + JSet JumpOp = 0x40 + // JNE jumps by offset if r != imm + JNE JumpOp = 0x50 + // JSGT jumps by offset if signed r > signed imm + JSGT JumpOp = 0x60 + // JSGE jumps by offset if signed r >= signed imm + JSGE JumpOp = 0x70 + // Call builtin or user defined function from imm + Call JumpOp = 0x80 + // Exit ends execution, with value in r0 + Exit JumpOp = 0x90 + // JLT jumps by offset if r < imm + JLT JumpOp = 0xa0 + // JLE jumps by offset if r <= imm + JLE JumpOp = 0xb0 + // JSLT jumps by offset if signed r < signed imm + JSLT JumpOp = 0xc0 + // JSLE jumps by offset if signed r <= signed imm + JSLE JumpOp = 0xd0 +) + +// Return emits an exit instruction. +// +// Requires a return value in R0. +func Return() Instruction { + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(Exit), + } +} + +// Op returns the OpCode for a given jump source. +func (op JumpOp) Op(source Source) OpCode { + return OpCode(JumpClass).SetJumpOp(op).SetSource(source) +} + +// Imm compares 64 bit dst to 64 bit value (sign extended), and adjusts PC by offset if the condition is fulfilled. +func (op JumpOp) Imm(dst Register, value int32, label string) Instruction { + return Instruction{ + OpCode: op.opCode(JumpClass, ImmSource), + Dst: dst, + Offset: -1, + Constant: int64(value), + }.WithReference(label) +} + +// Imm32 compares 32 bit dst to 32 bit value, and adjusts PC by offset if the condition is fulfilled. +// Requires kernel 5.1. +func (op JumpOp) Imm32(dst Register, value int32, label string) Instruction { + return Instruction{ + OpCode: op.opCode(Jump32Class, ImmSource), + Dst: dst, + Offset: -1, + Constant: int64(value), + }.WithReference(label) +} + +// Reg compares 64 bit dst to 64 bit src, and adjusts PC by offset if the condition is fulfilled. +func (op JumpOp) Reg(dst, src Register, label string) Instruction { + return Instruction{ + OpCode: op.opCode(JumpClass, RegSource), + Dst: dst, + Src: src, + Offset: -1, + }.WithReference(label) +} + +// Reg32 compares 32 bit dst to 32 bit src, and adjusts PC by offset if the condition is fulfilled. +// Requires kernel 5.1. +func (op JumpOp) Reg32(dst, src Register, label string) Instruction { + return Instruction{ + OpCode: op.opCode(Jump32Class, RegSource), + Dst: dst, + Src: src, + Offset: -1, + }.WithReference(label) +} + +func (op JumpOp) opCode(class Class, source Source) OpCode { + if op == Exit || op == Call || op == Ja { + return InvalidOpCode + } + + return OpCode(class).SetJumpOp(op).SetSource(source) +} + +// Label adjusts PC to the address of the label. +func (op JumpOp) Label(label string) Instruction { + if op == Call { + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(Call), + Src: PseudoCall, + Constant: -1, + }.WithReference(label) + } + + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(op), + Offset: -1, + }.WithReference(label) +} diff --git a/vendor/github.com/cilium/ebpf/asm/jump_string.go b/vendor/github.com/cilium/ebpf/asm/jump_string.go new file mode 100644 index 000000000..85a4aaffa --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/jump_string.go @@ -0,0 +1,53 @@ +// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidJumpOp-255] + _ = x[Ja-0] + _ = x[JEq-16] + _ = x[JGT-32] + _ = x[JGE-48] + _ = x[JSet-64] + _ = x[JNE-80] + _ = x[JSGT-96] + _ = x[JSGE-112] + _ = x[Call-128] + _ = x[Exit-144] + _ = x[JLT-160] + _ = x[JLE-176] + _ = x[JSLT-192] + _ = x[JSLE-208] +} + +const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp" + +var _JumpOp_map = map[JumpOp]string{ + 0: _JumpOp_name[0:2], + 16: _JumpOp_name[2:5], + 32: _JumpOp_name[5:8], + 48: _JumpOp_name[8:11], + 64: _JumpOp_name[11:15], + 80: _JumpOp_name[15:18], + 96: _JumpOp_name[18:22], + 112: _JumpOp_name[22:26], + 128: _JumpOp_name[26:30], + 144: _JumpOp_name[30:34], + 160: _JumpOp_name[34:37], + 176: _JumpOp_name[37:40], + 192: _JumpOp_name[40:44], + 208: _JumpOp_name[44:48], + 255: _JumpOp_name[48:61], +} + +func (i JumpOp) String() string { + if str, ok := _JumpOp_map[i]; ok { + return str + } + return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")" +} diff --git a/vendor/github.com/cilium/ebpf/asm/load_store.go b/vendor/github.com/cilium/ebpf/asm/load_store.go new file mode 100644 index 000000000..85ed286b0 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/load_store.go @@ -0,0 +1,204 @@ +package asm + +//go:generate stringer -output load_store_string.go -type=Mode,Size + +// Mode for load and store operations +// +// msb lsb +// +---+--+---+ +// |MDE|sz|cls| +// +---+--+---+ +type Mode uint8 + +const modeMask OpCode = 0xe0 + +const ( + // InvalidMode is returned by getters when invoked + // on non load / store OpCodes + InvalidMode Mode = 0xff + // ImmMode - immediate value + ImmMode Mode = 0x00 + // AbsMode - immediate value + offset + AbsMode Mode = 0x20 + // IndMode - indirect (imm+src) + IndMode Mode = 0x40 + // MemMode - load from memory + MemMode Mode = 0x60 + // XAddMode - add atomically across processors. + XAddMode Mode = 0xc0 +) + +// Size of load and store operations +// +// msb lsb +// +---+--+---+ +// |mde|SZ|cls| +// +---+--+---+ +type Size uint8 + +const sizeMask OpCode = 0x18 + +const ( + // InvalidSize is returned by getters when invoked + // on non load / store OpCodes + InvalidSize Size = 0xff + // DWord - double word; 64 bits + DWord Size = 0x18 + // Word - word; 32 bits + Word Size = 0x00 + // Half - half-word; 16 bits + Half Size = 0x08 + // Byte - byte; 8 bits + Byte Size = 0x10 +) + +// Sizeof returns the size in bytes. +func (s Size) Sizeof() int { + switch s { + case DWord: + return 8 + case Word: + return 4 + case Half: + return 2 + case Byte: + return 1 + default: + return -1 + } +} + +// LoadMemOp returns the OpCode to load a value of given size from memory. +func LoadMemOp(size Size) OpCode { + return OpCode(LdXClass).SetMode(MemMode).SetSize(size) +} + +// LoadMem emits `dst = *(size *)(src + offset)`. +func LoadMem(dst, src Register, offset int16, size Size) Instruction { + return Instruction{ + OpCode: LoadMemOp(size), + Dst: dst, + Src: src, + Offset: offset, + } +} + +// LoadImmOp returns the OpCode to load an immediate of given size. +// +// As of kernel 4.20, only DWord size is accepted. +func LoadImmOp(size Size) OpCode { + return OpCode(LdClass).SetMode(ImmMode).SetSize(size) +} + +// LoadImm emits `dst = (size)value`. +// +// As of kernel 4.20, only DWord size is accepted. +func LoadImm(dst Register, value int64, size Size) Instruction { + return Instruction{ + OpCode: LoadImmOp(size), + Dst: dst, + Constant: value, + } +} + +// LoadMapPtr stores a pointer to a map in dst. +func LoadMapPtr(dst Register, fd int) Instruction { + if fd < 0 { + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: LoadImmOp(DWord), + Dst: dst, + Src: PseudoMapFD, + Constant: int64(uint32(fd)), + } +} + +// LoadMapValue stores a pointer to the value at a certain offset of a map. +func LoadMapValue(dst Register, fd int, offset uint32) Instruction { + if fd < 0 { + return Instruction{OpCode: InvalidOpCode} + } + + fdAndOffset := (uint64(offset) << 32) | uint64(uint32(fd)) + return Instruction{ + OpCode: LoadImmOp(DWord), + Dst: dst, + Src: PseudoMapValue, + Constant: int64(fdAndOffset), + } +} + +// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff. +func LoadIndOp(size Size) OpCode { + return OpCode(LdClass).SetMode(IndMode).SetSize(size) +} + +// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`. +func LoadInd(dst, src Register, offset int32, size Size) Instruction { + return Instruction{ + OpCode: LoadIndOp(size), + Dst: dst, + Src: src, + Constant: int64(offset), + } +} + +// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff. +func LoadAbsOp(size Size) OpCode { + return OpCode(LdClass).SetMode(AbsMode).SetSize(size) +} + +// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`. +func LoadAbs(offset int32, size Size) Instruction { + return Instruction{ + OpCode: LoadAbsOp(size), + Dst: R0, + Constant: int64(offset), + } +} + +// StoreMemOp returns the OpCode for storing a register of given size in memory. +func StoreMemOp(size Size) OpCode { + return OpCode(StXClass).SetMode(MemMode).SetSize(size) +} + +// StoreMem emits `*(size *)(dst + offset) = src` +func StoreMem(dst Register, offset int16, src Register, size Size) Instruction { + return Instruction{ + OpCode: StoreMemOp(size), + Dst: dst, + Src: src, + Offset: offset, + } +} + +// StoreImmOp returns the OpCode for storing an immediate of given size in memory. +func StoreImmOp(size Size) OpCode { + return OpCode(StClass).SetMode(MemMode).SetSize(size) +} + +// StoreImm emits `*(size *)(dst + offset) = value`. +func StoreImm(dst Register, offset int16, value int64, size Size) Instruction { + return Instruction{ + OpCode: StoreImmOp(size), + Dst: dst, + Offset: offset, + Constant: value, + } +} + +// StoreXAddOp returns the OpCode to atomically add a register to a value in memory. +func StoreXAddOp(size Size) OpCode { + return OpCode(StXClass).SetMode(XAddMode).SetSize(size) +} + +// StoreXAdd atomically adds src to *dst. +func StoreXAdd(dst, src Register, size Size) Instruction { + return Instruction{ + OpCode: StoreXAddOp(size), + Dst: dst, + Src: src, + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/load_store_string.go b/vendor/github.com/cilium/ebpf/asm/load_store_string.go new file mode 100644 index 000000000..76d29a075 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/load_store_string.go @@ -0,0 +1,80 @@ +// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidMode-255] + _ = x[ImmMode-0] + _ = x[AbsMode-32] + _ = x[IndMode-64] + _ = x[MemMode-96] + _ = x[XAddMode-192] +} + +const ( + _Mode_name_0 = "ImmMode" + _Mode_name_1 = "AbsMode" + _Mode_name_2 = "IndMode" + _Mode_name_3 = "MemMode" + _Mode_name_4 = "XAddMode" + _Mode_name_5 = "InvalidMode" +) + +func (i Mode) String() string { + switch { + case i == 0: + return _Mode_name_0 + case i == 32: + return _Mode_name_1 + case i == 64: + return _Mode_name_2 + case i == 96: + return _Mode_name_3 + case i == 192: + return _Mode_name_4 + case i == 255: + return _Mode_name_5 + default: + return "Mode(" + strconv.FormatInt(int64(i), 10) + ")" + } +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidSize-255] + _ = x[DWord-24] + _ = x[Word-0] + _ = x[Half-8] + _ = x[Byte-16] +} + +const ( + _Size_name_0 = "Word" + _Size_name_1 = "Half" + _Size_name_2 = "Byte" + _Size_name_3 = "DWord" + _Size_name_4 = "InvalidSize" +) + +func (i Size) String() string { + switch { + case i == 0: + return _Size_name_0 + case i == 8: + return _Size_name_1 + case i == 16: + return _Size_name_2 + case i == 24: + return _Size_name_3 + case i == 255: + return _Size_name_4 + default: + return "Size(" + strconv.FormatInt(int64(i), 10) + ")" + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/metadata.go b/vendor/github.com/cilium/ebpf/asm/metadata.go new file mode 100644 index 000000000..dd368a936 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/metadata.go @@ -0,0 +1,80 @@ +package asm + +// Metadata contains metadata about an instruction. +type Metadata struct { + head *metaElement +} + +type metaElement struct { + next *metaElement + key, value interface{} +} + +// Find the element containing key. +// +// Returns nil if there is no such element. +func (m *Metadata) find(key interface{}) *metaElement { + for e := m.head; e != nil; e = e.next { + if e.key == key { + return e + } + } + return nil +} + +// Remove an element from the linked list. +// +// Copies as many elements of the list as necessary to remove r, but doesn't +// perform a full copy. +func (m *Metadata) remove(r *metaElement) { + current := &m.head + for e := m.head; e != nil; e = e.next { + if e == r { + // We've found the element we want to remove. + *current = e.next + + // No need to copy the tail. + return + } + + // There is another element in front of the one we want to remove. + // We have to copy it to be able to change metaElement.next. + cpy := &metaElement{key: e.key, value: e.value} + *current = cpy + current = &cpy.next + } +} + +// Set a key to a value. +// +// If value is nil, the key is removed. Avoids modifying old metadata by +// copying if necessary. +func (m *Metadata) Set(key, value interface{}) { + if e := m.find(key); e != nil { + if e.value == value { + // Key is present and the value is the same. Nothing to do. + return + } + + // Key is present with a different value. Create a copy of the list + // which doesn't have the element in it. + m.remove(e) + } + + // m.head is now a linked list that doesn't contain key. + if value == nil { + return + } + + m.head = &metaElement{key: key, value: value, next: m.head} +} + +// Get the value of a key. +// +// Returns nil if no value with the given key is present. +func (m *Metadata) Get(key interface{}) interface{} { + if e := m.find(key); e != nil { + return e.value + } + return nil +} diff --git a/vendor/github.com/cilium/ebpf/asm/opcode.go b/vendor/github.com/cilium/ebpf/asm/opcode.go new file mode 100644 index 000000000..b11917e18 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/opcode.go @@ -0,0 +1,271 @@ +package asm + +import ( + "fmt" + "strings" +) + +//go:generate stringer -output opcode_string.go -type=Class + +// Class of operations +// +// msb lsb +// +---+--+---+ +// | ?? |CLS| +// +---+--+---+ +type Class uint8 + +const classMask OpCode = 0x07 + +const ( + // LdClass loads immediate values into registers. + // Also used for non-standard load operations from cBPF. + LdClass Class = 0x00 + // LdXClass loads memory into registers. + LdXClass Class = 0x01 + // StClass stores immediate values to memory. + StClass Class = 0x02 + // StXClass stores registers to memory. + StXClass Class = 0x03 + // ALUClass describes arithmetic operators. + ALUClass Class = 0x04 + // JumpClass describes jump operators. + JumpClass Class = 0x05 + // Jump32Class describes jump operators with 32-bit comparisons. + // Requires kernel 5.1. + Jump32Class Class = 0x06 + // ALU64Class describes arithmetic operators in 64-bit mode. + ALU64Class Class = 0x07 +) + +// IsLoad checks if this is either LdClass or LdXClass. +func (cls Class) IsLoad() bool { + return cls == LdClass || cls == LdXClass +} + +// IsStore checks if this is either StClass or StXClass. +func (cls Class) IsStore() bool { + return cls == StClass || cls == StXClass +} + +func (cls Class) isLoadOrStore() bool { + return cls.IsLoad() || cls.IsStore() +} + +// IsALU checks if this is either ALUClass or ALU64Class. +func (cls Class) IsALU() bool { + return cls == ALUClass || cls == ALU64Class +} + +// IsJump checks if this is either JumpClass or Jump32Class. +func (cls Class) IsJump() bool { + return cls == JumpClass || cls == Jump32Class +} + +func (cls Class) isJumpOrALU() bool { + return cls.IsJump() || cls.IsALU() +} + +// OpCode is a packed eBPF opcode. +// +// Its encoding is defined by a Class value: +// +// msb lsb +// +----+-+---+ +// | ???? |CLS| +// +----+-+---+ +type OpCode uint8 + +// InvalidOpCode is returned by setters on OpCode +const InvalidOpCode OpCode = 0xff + +// rawInstructions returns the number of BPF instructions required +// to encode this opcode. +func (op OpCode) rawInstructions() int { + if op.IsDWordLoad() { + return 2 + } + return 1 +} + +func (op OpCode) IsDWordLoad() bool { + return op == LoadImmOp(DWord) +} + +// Class returns the class of operation. +func (op OpCode) Class() Class { + return Class(op & classMask) +} + +// Mode returns the mode for load and store operations. +func (op OpCode) Mode() Mode { + if !op.Class().isLoadOrStore() { + return InvalidMode + } + return Mode(op & modeMask) +} + +// Size returns the size for load and store operations. +func (op OpCode) Size() Size { + if !op.Class().isLoadOrStore() { + return InvalidSize + } + return Size(op & sizeMask) +} + +// Source returns the source for branch and ALU operations. +func (op OpCode) Source() Source { + if !op.Class().isJumpOrALU() || op.ALUOp() == Swap { + return InvalidSource + } + return Source(op & sourceMask) +} + +// ALUOp returns the ALUOp. +func (op OpCode) ALUOp() ALUOp { + if !op.Class().IsALU() { + return InvalidALUOp + } + return ALUOp(op & aluMask) +} + +// Endianness returns the Endianness for a byte swap instruction. +func (op OpCode) Endianness() Endianness { + if op.ALUOp() != Swap { + return InvalidEndian + } + return Endianness(op & endianMask) +} + +// JumpOp returns the JumpOp. +// Returns InvalidJumpOp if it doesn't encode a jump. +func (op OpCode) JumpOp() JumpOp { + if !op.Class().IsJump() { + return InvalidJumpOp + } + + jumpOp := JumpOp(op & jumpMask) + + // Some JumpOps are only supported by JumpClass, not Jump32Class. + if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call || jumpOp == Ja) { + return InvalidJumpOp + } + + return jumpOp +} + +// SetMode sets the mode on load and store operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetMode(mode Mode) OpCode { + if !op.Class().isLoadOrStore() || !valid(OpCode(mode), modeMask) { + return InvalidOpCode + } + return (op & ^modeMask) | OpCode(mode) +} + +// SetSize sets the size on load and store operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetSize(size Size) OpCode { + if !op.Class().isLoadOrStore() || !valid(OpCode(size), sizeMask) { + return InvalidOpCode + } + return (op & ^sizeMask) | OpCode(size) +} + +// SetSource sets the source on jump and ALU operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetSource(source Source) OpCode { + if !op.Class().isJumpOrALU() || !valid(OpCode(source), sourceMask) { + return InvalidOpCode + } + return (op & ^sourceMask) | OpCode(source) +} + +// SetALUOp sets the ALUOp on ALU operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetALUOp(alu ALUOp) OpCode { + if !op.Class().IsALU() || !valid(OpCode(alu), aluMask) { + return InvalidOpCode + } + return (op & ^aluMask) | OpCode(alu) +} + +// SetJumpOp sets the JumpOp on jump operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetJumpOp(jump JumpOp) OpCode { + if !op.Class().IsJump() || !valid(OpCode(jump), jumpMask) { + return InvalidOpCode + } + + newOp := (op & ^jumpMask) | OpCode(jump) + + // Check newOp is legal. + if newOp.JumpOp() == InvalidJumpOp { + return InvalidOpCode + } + + return newOp +} + +func (op OpCode) String() string { + var f strings.Builder + + switch class := op.Class(); { + case class.isLoadOrStore(): + f.WriteString(strings.TrimSuffix(class.String(), "Class")) + + mode := op.Mode() + f.WriteString(strings.TrimSuffix(mode.String(), "Mode")) + + switch op.Size() { + case DWord: + f.WriteString("DW") + case Word: + f.WriteString("W") + case Half: + f.WriteString("H") + case Byte: + f.WriteString("B") + } + + case class.IsALU(): + f.WriteString(op.ALUOp().String()) + + if op.ALUOp() == Swap { + // Width for Endian is controlled by Constant + f.WriteString(op.Endianness().String()) + } else { + if class == ALUClass { + f.WriteString("32") + } + + f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) + } + + case class.IsJump(): + f.WriteString(op.JumpOp().String()) + + if class == Jump32Class { + f.WriteString("32") + } + + if jop := op.JumpOp(); jop != Exit && jop != Call { + f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) + } + + default: + fmt.Fprintf(&f, "OpCode(%#x)", uint8(op)) + } + + return f.String() +} + +// valid returns true if all bits in value are covered by mask. +func valid(value, mask OpCode) bool { + return value & ^mask == 0 +} diff --git a/vendor/github.com/cilium/ebpf/asm/opcode_string.go b/vendor/github.com/cilium/ebpf/asm/opcode_string.go new file mode 100644 index 000000000..58bc3e7e7 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/opcode_string.go @@ -0,0 +1,30 @@ +// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[LdClass-0] + _ = x[LdXClass-1] + _ = x[StClass-2] + _ = x[StXClass-3] + _ = x[ALUClass-4] + _ = x[JumpClass-5] + _ = x[Jump32Class-6] + _ = x[ALU64Class-7] +} + +const _Class_name = "LdClassLdXClassStClassStXClassALUClassJumpClassJump32ClassALU64Class" + +var _Class_index = [...]uint8{0, 7, 15, 22, 30, 38, 47, 58, 68} + +func (i Class) String() string { + if i >= Class(len(_Class_index)-1) { + return "Class(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Class_name[_Class_index[i]:_Class_index[i+1]] +} diff --git a/vendor/github.com/cilium/ebpf/asm/register.go b/vendor/github.com/cilium/ebpf/asm/register.go new file mode 100644 index 000000000..dd5d44f1c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/register.go @@ -0,0 +1,50 @@ +package asm + +import ( + "fmt" +) + +// Register is the source or destination of most operations. +type Register uint8 + +// R0 contains return values. +const R0 Register = 0 + +// Registers for function arguments. +const ( + R1 Register = R0 + 1 + iota + R2 + R3 + R4 + R5 +) + +// Callee saved registers preserved by function calls. +const ( + R6 Register = R5 + 1 + iota + R7 + R8 + R9 +) + +// Read-only frame pointer to access stack. +const ( + R10 Register = R9 + 1 + RFP = R10 +) + +// Pseudo registers used by 64bit loads and jumps +const ( + PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD + PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE + PseudoCall = R1 // BPF_PSEUDO_CALL + PseudoFunc = R4 // BPF_PSEUDO_FUNC +) + +func (r Register) String() string { + v := uint8(r) + if v == 10 { + return "rfp" + } + return fmt.Sprintf("r%d", v) +} diff --git a/vendor/github.com/cilium/ebpf/attachtype_string.go b/vendor/github.com/cilium/ebpf/attachtype_string.go new file mode 100644 index 000000000..de355ed90 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/attachtype_string.go @@ -0,0 +1,65 @@ +// Code generated by "stringer -type AttachType -trimprefix Attach"; DO NOT EDIT. + +package ebpf + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[AttachNone-0] + _ = x[AttachCGroupInetIngress-0] + _ = x[AttachCGroupInetEgress-1] + _ = x[AttachCGroupInetSockCreate-2] + _ = x[AttachCGroupSockOps-3] + _ = x[AttachSkSKBStreamParser-4] + _ = x[AttachSkSKBStreamVerdict-5] + _ = x[AttachCGroupDevice-6] + _ = x[AttachSkMsgVerdict-7] + _ = x[AttachCGroupInet4Bind-8] + _ = x[AttachCGroupInet6Bind-9] + _ = x[AttachCGroupInet4Connect-10] + _ = x[AttachCGroupInet6Connect-11] + _ = x[AttachCGroupInet4PostBind-12] + _ = x[AttachCGroupInet6PostBind-13] + _ = x[AttachCGroupUDP4Sendmsg-14] + _ = x[AttachCGroupUDP6Sendmsg-15] + _ = x[AttachLircMode2-16] + _ = x[AttachFlowDissector-17] + _ = x[AttachCGroupSysctl-18] + _ = x[AttachCGroupUDP4Recvmsg-19] + _ = x[AttachCGroupUDP6Recvmsg-20] + _ = x[AttachCGroupGetsockopt-21] + _ = x[AttachCGroupSetsockopt-22] + _ = x[AttachTraceRawTp-23] + _ = x[AttachTraceFEntry-24] + _ = x[AttachTraceFExit-25] + _ = x[AttachModifyReturn-26] + _ = x[AttachLSMMac-27] + _ = x[AttachTraceIter-28] + _ = x[AttachCgroupInet4GetPeername-29] + _ = x[AttachCgroupInet6GetPeername-30] + _ = x[AttachCgroupInet4GetSockname-31] + _ = x[AttachCgroupInet6GetSockname-32] + _ = x[AttachXDPDevMap-33] + _ = x[AttachCgroupInetSockRelease-34] + _ = x[AttachXDPCPUMap-35] + _ = x[AttachSkLookup-36] + _ = x[AttachXDP-37] + _ = x[AttachSkSKBVerdict-38] + _ = x[AttachSkReuseportSelect-39] + _ = x[AttachSkReuseportSelectOrMigrate-40] + _ = x[AttachPerfEvent-41] +} + +const _AttachType_name = "NoneCGroupInetEgressCGroupInetSockCreateCGroupSockOpsSkSKBStreamParserSkSKBStreamVerdictCGroupDeviceSkMsgVerdictCGroupInet4BindCGroupInet6BindCGroupInet4ConnectCGroupInet6ConnectCGroupInet4PostBindCGroupInet6PostBindCGroupUDP4SendmsgCGroupUDP6SendmsgLircMode2FlowDissectorCGroupSysctlCGroupUDP4RecvmsgCGroupUDP6RecvmsgCGroupGetsockoptCGroupSetsockoptTraceRawTpTraceFEntryTraceFExitModifyReturnLSMMacTraceIterCgroupInet4GetPeernameCgroupInet6GetPeernameCgroupInet4GetSocknameCgroupInet6GetSocknameXDPDevMapCgroupInetSockReleaseXDPCPUMapSkLookupXDPSkSKBVerdictSkReuseportSelectSkReuseportSelectOrMigratePerfEvent" + +var _AttachType_index = [...]uint16{0, 4, 20, 40, 53, 70, 88, 100, 112, 127, 142, 160, 178, 197, 216, 233, 250, 259, 272, 284, 301, 318, 334, 350, 360, 371, 381, 393, 399, 408, 430, 452, 474, 496, 505, 526, 535, 543, 546, 558, 575, 601, 610} + +func (i AttachType) String() string { + if i >= AttachType(len(_AttachType_index)-1) { + return "AttachType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _AttachType_name[_AttachType_index[i]:_AttachType_index[i+1]] +} diff --git a/vendor/github.com/cilium/ebpf/btf/btf.go b/vendor/github.com/cilium/ebpf/btf/btf.go new file mode 100644 index 000000000..a5969332a --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/btf.go @@ -0,0 +1,897 @@ +package btf + +import ( + "bufio" + "bytes" + "debug/elf" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "os" + "reflect" + + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +const btfMagic = 0xeB9F + +// Errors returned by BTF functions. +var ( + ErrNotSupported = internal.ErrNotSupported + ErrNotFound = errors.New("not found") + ErrNoExtendedInfo = errors.New("no extended info") +) + +// ID represents the unique ID of a BTF object. +type ID = sys.BTFID + +// Spec represents decoded BTF. +type Spec struct { + // Data from .BTF. + rawTypes []rawType + strings *stringTable + + // All types contained by the spec. For the base type, the position of + // a type in the slice is its ID. + types types + + // Type IDs indexed by type. + typeIDs map[Type]TypeID + + // Types indexed by essential name. + // Includes all struct flavors and types with the same name. + namedTypes map[essentialName][]Type + + byteOrder binary.ByteOrder +} + +type btfHeader struct { + Magic uint16 + Version uint8 + Flags uint8 + HdrLen uint32 + + TypeOff uint32 + TypeLen uint32 + StringOff uint32 + StringLen uint32 +} + +// typeStart returns the offset from the beginning of the .BTF section +// to the start of its type entries. +func (h *btfHeader) typeStart() int64 { + return int64(h.HdrLen + h.TypeOff) +} + +// stringStart returns the offset from the beginning of the .BTF section +// to the start of its string table. +func (h *btfHeader) stringStart() int64 { + return int64(h.HdrLen + h.StringOff) +} + +// LoadSpec opens file and calls LoadSpecFromReader on it. +func LoadSpec(file string) (*Spec, error) { + fh, err := os.Open(file) + if err != nil { + return nil, err + } + defer fh.Close() + + return LoadSpecFromReader(fh) +} + +// LoadSpecFromReader reads from an ELF or a raw BTF blob. +// +// Returns ErrNotFound if reading from an ELF which contains no BTF. ExtInfos +// may be nil. +func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) { + file, err := internal.NewSafeELFFile(rd) + if err != nil { + if bo := guessRawBTFByteOrder(rd); bo != nil { + // Try to parse a naked BTF blob. This will return an error if + // we encounter a Datasec, since we can't fix it up. + spec, err := loadRawSpec(io.NewSectionReader(rd, 0, math.MaxInt64), bo, nil, nil) + return spec, err + } + + return nil, err + } + + return loadSpecFromELF(file) +} + +// LoadSpecAndExtInfosFromReader reads from an ELF. +// +// ExtInfos may be nil if the ELF doesn't contain section metadta. +// Returns ErrNotFound if the ELF contains no BTF. +func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) { + file, err := internal.NewSafeELFFile(rd) + if err != nil { + return nil, nil, err + } + + spec, err := loadSpecFromELF(file) + if err != nil { + return nil, nil, err + } + + extInfos, err := loadExtInfosFromELF(file, spec.types, spec.strings) + if err != nil && !errors.Is(err, ErrNotFound) { + return nil, nil, err + } + + return spec, extInfos, nil +} + +// variableOffsets extracts all symbols offsets from an ELF and indexes them by +// section and variable name. +// +// References to variables in BTF data sections carry unsigned 32-bit offsets. +// Some ELF symbols (e.g. in vmlinux) may point to virtual memory that is well +// beyond this range. Since these symbols cannot be described by BTF info, +// ignore them here. +func variableOffsets(file *internal.SafeELFFile) (map[variable]uint32, error) { + symbols, err := file.Symbols() + if err != nil { + return nil, fmt.Errorf("can't read symbols: %v", err) + } + + variableOffsets := make(map[variable]uint32) + for _, symbol := range symbols { + if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE { + // Ignore things like SHN_ABS + continue + } + + if symbol.Value > math.MaxUint32 { + // VarSecinfo offset is u32, cannot reference symbols in higher regions. + continue + } + + if int(symbol.Section) >= len(file.Sections) { + return nil, fmt.Errorf("symbol %s: invalid section %d", symbol.Name, symbol.Section) + } + + secName := file.Sections[symbol.Section].Name + variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value) + } + + return variableOffsets, nil +} + +func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) { + var ( + btfSection *elf.Section + sectionSizes = make(map[string]uint32) + ) + + for _, sec := range file.Sections { + switch sec.Name { + case ".BTF": + btfSection = sec + default: + if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS { + break + } + + if sec.Size > math.MaxUint32 { + return nil, fmt.Errorf("section %s exceeds maximum size", sec.Name) + } + + sectionSizes[sec.Name] = uint32(sec.Size) + } + } + + if btfSection == nil { + return nil, fmt.Errorf("btf: %w", ErrNotFound) + } + + vars, err := variableOffsets(file) + if err != nil { + return nil, err + } + + if btfSection.ReaderAt == nil { + return nil, fmt.Errorf("compressed BTF is not supported") + } + + rawTypes, rawStrings, err := parseBTF(btfSection.ReaderAt, file.ByteOrder, nil) + if err != nil { + return nil, err + } + + err = fixupDatasec(rawTypes, rawStrings, sectionSizes, vars) + if err != nil { + return nil, err + } + + return inflateSpec(rawTypes, rawStrings, file.ByteOrder, nil) +} + +func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder, + baseTypes types, baseStrings *stringTable) (*Spec, error) { + + rawTypes, rawStrings, err := parseBTF(btf, bo, baseStrings) + if err != nil { + return nil, err + } + + return inflateSpec(rawTypes, rawStrings, bo, baseTypes) +} + +func inflateSpec(rawTypes []rawType, rawStrings *stringTable, bo binary.ByteOrder, + baseTypes types) (*Spec, error) { + + types, err := inflateRawTypes(rawTypes, baseTypes, rawStrings) + if err != nil { + return nil, err + } + + typeIDs, typesByName := indexTypes(types, TypeID(len(baseTypes))) + + return &Spec{ + rawTypes: rawTypes, + namedTypes: typesByName, + typeIDs: typeIDs, + types: types, + strings: rawStrings, + byteOrder: bo, + }, nil +} + +func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essentialName][]Type) { + namedTypes := 0 + for _, typ := range types { + if typ.TypeName() != "" { + // Do a pre-pass to figure out how big types by name has to be. + // Most types have unique names, so it's OK to ignore essentialName + // here. + namedTypes++ + } + } + + typeIDs := make(map[Type]TypeID, len(types)) + typesByName := make(map[essentialName][]Type, namedTypes) + + for i, typ := range types { + if name := newEssentialName(typ.TypeName()); name != "" { + typesByName[name] = append(typesByName[name], typ) + } + typeIDs[typ] = TypeID(i) + typeIDOffset + } + + return typeIDs, typesByName +} + +// LoadKernelSpec returns the current kernel's BTF information. +// +// Defaults to /sys/kernel/btf/vmlinux and falls back to scanning the file system +// for vmlinux ELFs. Returns an error wrapping ErrNotSupported if BTF is not enabled. +func LoadKernelSpec() (*Spec, error) { + fh, err := os.Open("/sys/kernel/btf/vmlinux") + if err == nil { + defer fh.Close() + + return loadRawSpec(fh, internal.NativeEndian, nil, nil) + } + + file, err := findVMLinux() + if err != nil { + return nil, err + } + defer file.Close() + + return loadSpecFromELF(file) +} + +// findVMLinux scans multiple well-known paths for vmlinux kernel images. +func findVMLinux() (*internal.SafeELFFile, error) { + release, err := internal.KernelRelease() + if err != nil { + return nil, err + } + + // use same list of locations as libbpf + // https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122 + locations := []string{ + "/boot/vmlinux-%s", + "/lib/modules/%s/vmlinux-%[1]s", + "/lib/modules/%s/build/vmlinux", + "/usr/lib/modules/%s/kernel/vmlinux", + "/usr/lib/debug/boot/vmlinux-%s", + "/usr/lib/debug/boot/vmlinux-%s.debug", + "/usr/lib/debug/lib/modules/%s/vmlinux", + } + + for _, loc := range locations { + file, err := internal.OpenSafeELFFile(fmt.Sprintf(loc, release)) + if errors.Is(err, os.ErrNotExist) { + continue + } + return file, err + } + + return nil, fmt.Errorf("no BTF found for kernel version %s: %w", release, internal.ErrNotSupported) +} + +// parseBTFHeader parses the header of the .BTF section. +func parseBTFHeader(r io.Reader, bo binary.ByteOrder) (*btfHeader, error) { + var header btfHeader + if err := binary.Read(r, bo, &header); err != nil { + return nil, fmt.Errorf("can't read header: %v", err) + } + + if header.Magic != btfMagic { + return nil, fmt.Errorf("incorrect magic value %v", header.Magic) + } + + if header.Version != 1 { + return nil, fmt.Errorf("unexpected version %v", header.Version) + } + + if header.Flags != 0 { + return nil, fmt.Errorf("unsupported flags %v", header.Flags) + } + + remainder := int64(header.HdrLen) - int64(binary.Size(&header)) + if remainder < 0 { + return nil, errors.New("header length shorter than btfHeader size") + } + + if _, err := io.CopyN(internal.DiscardZeroes{}, r, remainder); err != nil { + return nil, fmt.Errorf("header padding: %v", err) + } + + return &header, nil +} + +func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder { + buf := new(bufio.Reader) + for _, bo := range []binary.ByteOrder{ + binary.LittleEndian, + binary.BigEndian, + } { + buf.Reset(io.NewSectionReader(r, 0, math.MaxInt64)) + if _, err := parseBTFHeader(buf, bo); err == nil { + return bo + } + } + + return nil +} + +// parseBTF reads a .BTF section into memory and parses it into a list of +// raw types and a string table. +func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([]rawType, *stringTable, error) { + buf := internal.NewBufferedSectionReader(btf, 0, math.MaxInt64) + header, err := parseBTFHeader(buf, bo) + if err != nil { + return nil, nil, fmt.Errorf("parsing .BTF header: %v", err) + } + + rawStrings, err := readStringTable(io.NewSectionReader(btf, header.stringStart(), int64(header.StringLen)), + baseStrings) + if err != nil { + return nil, nil, fmt.Errorf("can't read type names: %w", err) + } + + buf.Reset(io.NewSectionReader(btf, header.typeStart(), int64(header.TypeLen))) + rawTypes, err := readTypes(buf, bo, header.TypeLen) + if err != nil { + return nil, nil, fmt.Errorf("can't read types: %w", err) + } + + return rawTypes, rawStrings, nil +} + +type variable struct { + section string + name string +} + +func fixupDatasec(rawTypes []rawType, rawStrings *stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error { + for i, rawType := range rawTypes { + if rawType.Kind() != kindDatasec { + continue + } + + name, err := rawStrings.Lookup(rawType.NameOff) + if err != nil { + return err + } + + if name == ".kconfig" || name == ".ksyms" { + return fmt.Errorf("reference to %s: %w", name, ErrNotSupported) + } + + if rawTypes[i].SizeType != 0 { + continue + } + + size, ok := sectionSizes[name] + if !ok { + return fmt.Errorf("data section %s: missing size", name) + } + + rawTypes[i].SizeType = size + + secinfos := rawType.data.([]btfVarSecinfo) + for j, secInfo := range secinfos { + id := int(secInfo.Type - 1) + if id >= len(rawTypes) { + return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j) + } + + varName, err := rawStrings.Lookup(rawTypes[id].NameOff) + if err != nil { + return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err) + } + + offset, ok := variableOffsets[variable{name, varName}] + if !ok { + return fmt.Errorf("data section %s: missing offset for variable %s", name, varName) + } + + secinfos[j].Offset = offset + } + } + + return nil +} + +// Copy creates a copy of Spec. +func (s *Spec) Copy() *Spec { + types := copyTypes(s.types, nil) + + typeIDOffset := TypeID(0) + if len(s.types) != 0 { + typeIDOffset = s.typeIDs[s.types[0]] + } + typeIDs, typesByName := indexTypes(types, typeIDOffset) + + // NB: Other parts of spec are not copied since they are immutable. + return &Spec{ + s.rawTypes, + s.strings, + types, + typeIDs, + typesByName, + s.byteOrder, + } +} + +type marshalOpts struct { + ByteOrder binary.ByteOrder + StripFuncLinkage bool +} + +func (s *Spec) marshal(opts marshalOpts) ([]byte, error) { + var ( + buf bytes.Buffer + header = new(btfHeader) + headerLen = binary.Size(header) + ) + + // Reserve space for the header. We have to write it last since + // we don't know the size of the type section yet. + _, _ = buf.Write(make([]byte, headerLen)) + + // Write type section, just after the header. + for _, raw := range s.rawTypes { + switch { + case opts.StripFuncLinkage && raw.Kind() == kindFunc: + raw.SetLinkage(StaticFunc) + } + + if err := raw.Marshal(&buf, opts.ByteOrder); err != nil { + return nil, fmt.Errorf("can't marshal BTF: %w", err) + } + } + + typeLen := uint32(buf.Len() - headerLen) + + // Write string section after type section. + stringsLen := s.strings.Length() + buf.Grow(stringsLen) + if err := s.strings.Marshal(&buf); err != nil { + return nil, err + } + + // Fill out the header, and write it out. + header = &btfHeader{ + Magic: btfMagic, + Version: 1, + Flags: 0, + HdrLen: uint32(headerLen), + TypeOff: 0, + TypeLen: typeLen, + StringOff: typeLen, + StringLen: uint32(stringsLen), + } + + raw := buf.Bytes() + err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header) + if err != nil { + return nil, fmt.Errorf("can't write header: %v", err) + } + + return raw, nil +} + +type sliceWriter []byte + +func (sw sliceWriter) Write(p []byte) (int, error) { + if len(p) != len(sw) { + return 0, errors.New("size doesn't match") + } + + return copy(sw, p), nil +} + +// TypeByID returns the BTF Type with the given type ID. +// +// Returns an error wrapping ErrNotFound if a Type with the given ID +// does not exist in the Spec. +func (s *Spec) TypeByID(id TypeID) (Type, error) { + return s.types.ByID(id) +} + +// TypeID returns the ID for a given Type. +// +// Returns an error wrapping ErrNoFound if the type isn't part of the Spec. +func (s *Spec) TypeID(typ Type) (TypeID, error) { + if _, ok := typ.(*Void); ok { + // Equality is weird for void, since it is a zero sized type. + return 0, nil + } + + id, ok := s.typeIDs[typ] + if !ok { + return 0, fmt.Errorf("no ID for type %s: %w", typ, ErrNotFound) + } + + return id, nil +} + +// AnyTypesByName returns a list of BTF Types with the given name. +// +// If the BTF blob describes multiple compilation units like vmlinux, multiple +// Types with the same name and kind can exist, but might not describe the same +// data structure. +// +// Returns an error wrapping ErrNotFound if no matching Type exists in the Spec. +func (s *Spec) AnyTypesByName(name string) ([]Type, error) { + types := s.namedTypes[newEssentialName(name)] + if len(types) == 0 { + return nil, fmt.Errorf("type name %s: %w", name, ErrNotFound) + } + + // Return a copy to prevent changes to namedTypes. + result := make([]Type, 0, len(types)) + for _, t := range types { + // Match against the full name, not just the essential one + // in case the type being looked up is a struct flavor. + if t.TypeName() == name { + result = append(result, t) + } + } + return result, nil +} + +// AnyTypeByName returns a Type with the given name. +// +// Returns an error if multiple types of that name exist. +func (s *Spec) AnyTypeByName(name string) (Type, error) { + types, err := s.AnyTypesByName(name) + if err != nil { + return nil, err + } + + if len(types) > 1 { + return nil, fmt.Errorf("found multiple types: %v", types) + } + + return types[0], nil +} + +// TypeByName searches for a Type with a specific name. Since multiple +// Types with the same name can exist, the parameter typ is taken to +// narrow down the search in case of a clash. +// +// typ must be a non-nil pointer to an implementation of a Type. +// On success, the address of the found Type will be copied to typ. +// +// Returns an error wrapping ErrNotFound if no matching +// Type exists in the Spec. If multiple candidates are found, +// an error is returned. +func (s *Spec) TypeByName(name string, typ interface{}) error { + typValue := reflect.ValueOf(typ) + if typValue.Kind() != reflect.Ptr { + return fmt.Errorf("%T is not a pointer", typ) + } + + typPtr := typValue.Elem() + if !typPtr.CanSet() { + return fmt.Errorf("%T cannot be set", typ) + } + + wanted := typPtr.Type() + if !wanted.AssignableTo(reflect.TypeOf((*Type)(nil)).Elem()) { + return fmt.Errorf("%T does not satisfy Type interface", typ) + } + + types, err := s.AnyTypesByName(name) + if err != nil { + return err + } + + var candidate Type + for _, typ := range types { + if reflect.TypeOf(typ) != wanted { + continue + } + + if candidate != nil { + return fmt.Errorf("type %s: multiple candidates for %T", name, typ) + } + + candidate = typ + } + + if candidate == nil { + return fmt.Errorf("type %s: %w", name, ErrNotFound) + } + + typPtr.Set(reflect.ValueOf(candidate)) + + return nil +} + +// LoadSplitSpecFromReader loads split BTF from a reader. +// +// Types from base are used to resolve references in the split BTF. +// The returned Spec only contains types from the split BTF, not from the base. +func LoadSplitSpecFromReader(r io.ReaderAt, base *Spec) (*Spec, error) { + return loadRawSpec(r, internal.NativeEndian, base.types, base.strings) +} + +// TypesIterator iterates over types of a given spec. +type TypesIterator struct { + spec *Spec + index int + // The last visited type in the spec. + Type Type +} + +// Iterate returns the types iterator. +func (s *Spec) Iterate() *TypesIterator { + return &TypesIterator{spec: s, index: 0} +} + +// Next returns true as long as there are any remaining types. +func (iter *TypesIterator) Next() bool { + if len(iter.spec.types) <= iter.index { + return false + } + + iter.Type = iter.spec.types[iter.index] + iter.index++ + return true +} + +// Handle is a reference to BTF loaded into the kernel. +type Handle struct { + fd *sys.FD + + // Size of the raw BTF in bytes. + size uint32 +} + +// NewHandle loads BTF into the kernel. +// +// Returns ErrNotSupported if BTF is not supported. +func NewHandle(spec *Spec) (*Handle, error) { + if err := haveBTF(); err != nil { + return nil, err + } + + if spec.byteOrder != internal.NativeEndian { + return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian) + } + + btf, err := spec.marshal(marshalOpts{ + ByteOrder: internal.NativeEndian, + StripFuncLinkage: haveFuncLinkage() != nil, + }) + if err != nil { + return nil, fmt.Errorf("can't marshal BTF: %w", err) + } + + if uint64(len(btf)) > math.MaxUint32 { + return nil, errors.New("BTF exceeds the maximum size") + } + + attr := &sys.BtfLoadAttr{ + Btf: sys.NewSlicePointer(btf), + BtfSize: uint32(len(btf)), + } + + fd, err := sys.BtfLoad(attr) + if err != nil { + logBuf := make([]byte, 64*1024) + attr.BtfLogBuf = sys.NewSlicePointer(logBuf) + attr.BtfLogSize = uint32(len(logBuf)) + attr.BtfLogLevel = 1 + // NB: The syscall will never return ENOSPC as of 5.18-rc4. + _, _ = sys.BtfLoad(attr) + return nil, internal.ErrorWithLog(err, logBuf) + } + + return &Handle{fd, attr.BtfSize}, nil +} + +// NewHandleFromID returns the BTF handle for a given id. +// +// Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible. +// +// Returns ErrNotExist, if there is no BTF with the given id. +// +// Requires CAP_SYS_ADMIN. +func NewHandleFromID(id ID) (*Handle, error) { + fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{ + Id: uint32(id), + }) + if err != nil { + return nil, fmt.Errorf("get FD for ID %d: %w", id, err) + } + + info, err := newHandleInfoFromFD(fd) + if err != nil { + _ = fd.Close() + return nil, err + } + + return &Handle{fd, info.size}, nil +} + +// Spec parses the kernel BTF into Go types. +// +// base is used to decode split BTF and may be nil. +func (h *Handle) Spec(base *Spec) (*Spec, error) { + var btfInfo sys.BtfInfo + btfBuffer := make([]byte, h.size) + btfInfo.Btf, btfInfo.BtfSize = sys.NewSlicePointerLen(btfBuffer) + + if err := sys.ObjInfo(h.fd, &btfInfo); err != nil { + return nil, err + } + + var baseTypes types + var baseStrings *stringTable + if base != nil { + baseTypes = base.types + baseStrings = base.strings + } + + return loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, baseTypes, baseStrings) +} + +// Close destroys the handle. +// +// Subsequent calls to FD will return an invalid value. +func (h *Handle) Close() error { + if h == nil { + return nil + } + + return h.fd.Close() +} + +// FD returns the file descriptor for the handle. +func (h *Handle) FD() int { + return h.fd.Int() +} + +// Info returns metadata about the handle. +func (h *Handle) Info() (*HandleInfo, error) { + return newHandleInfoFromFD(h.fd) +} + +func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte { + const minHeaderLength = 24 + + typesLen := uint32(binary.Size(types)) + header := btfHeader{ + Magic: btfMagic, + Version: 1, + HdrLen: minHeaderLength, + TypeOff: 0, + TypeLen: typesLen, + StringOff: typesLen, + StringLen: uint32(len(strings)), + } + + buf := new(bytes.Buffer) + _ = binary.Write(buf, bo, &header) + _ = binary.Write(buf, bo, types) + buf.Write(strings) + + return buf.Bytes() +} + +var haveBTF = internal.FeatureTest("BTF", "5.1", func() error { + var ( + types struct { + Integer btfType + Var btfType + btfVar struct{ Linkage uint32 } + } + strings = []byte{0, 'a', 0} + ) + + // We use a BTF_KIND_VAR here, to make sure that + // the kernel understands BTF at least as well as we + // do. BTF_KIND_VAR was introduced ~5.1. + types.Integer.SetKind(kindPointer) + types.Var.NameOff = 1 + types.Var.SetKind(kindVar) + types.Var.SizeType = 1 + + btf := marshalBTF(&types, strings, internal.NativeEndian) + + fd, err := sys.BtfLoad(&sys.BtfLoadAttr{ + Btf: sys.NewSlicePointer(btf), + BtfSize: uint32(len(btf)), + }) + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { + // Treat both EINVAL and EPERM as not supported: loading the program + // might still succeed without BTF. + return internal.ErrNotSupported + } + if err != nil { + return err + } + + fd.Close() + return nil +}) + +var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() error { + if err := haveBTF(); err != nil { + return err + } + + var ( + types struct { + FuncProto btfType + Func btfType + } + strings = []byte{0, 'a', 0} + ) + + types.FuncProto.SetKind(kindFuncProto) + types.Func.SetKind(kindFunc) + types.Func.SizeType = 1 // aka FuncProto + types.Func.NameOff = 1 + types.Func.SetLinkage(GlobalFunc) + + btf := marshalBTF(&types, strings, internal.NativeEndian) + + fd, err := sys.BtfLoad(&sys.BtfLoadAttr{ + Btf: sys.NewSlicePointer(btf), + BtfSize: uint32(len(btf)), + }) + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + if err != nil { + return err + } + + fd.Close() + return nil +}) diff --git a/vendor/github.com/cilium/ebpf/btf/btf_types.go b/vendor/github.com/cilium/ebpf/btf/btf_types.go new file mode 100644 index 000000000..481018049 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/btf_types.go @@ -0,0 +1,343 @@ +package btf + +import ( + "encoding/binary" + "fmt" + "io" +) + +//go:generate stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage + +// btfKind describes a Type. +type btfKind uint8 + +// Equivalents of the BTF_KIND_* constants. +const ( + kindUnknown btfKind = iota + kindInt + kindPointer + kindArray + kindStruct + kindUnion + kindEnum + kindForward + kindTypedef + kindVolatile + kindConst + kindRestrict + // Added ~4.20 + kindFunc + kindFuncProto + // Added ~5.1 + kindVar + kindDatasec + // Added ~5.13 + kindFloat +) + +// FuncLinkage describes BTF function linkage metadata. +type FuncLinkage int + +// Equivalent of enum btf_func_linkage. +const ( + StaticFunc FuncLinkage = iota // static + GlobalFunc // global + ExternFunc // extern +) + +// VarLinkage describes BTF variable linkage metadata. +type VarLinkage int + +const ( + StaticVar VarLinkage = iota // static + GlobalVar // global + ExternVar // extern +) + +const ( + btfTypeKindShift = 24 + btfTypeKindLen = 5 + btfTypeVlenShift = 0 + btfTypeVlenMask = 16 + btfTypeKindFlagShift = 31 + btfTypeKindFlagMask = 1 +) + +// btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst. +type btfType struct { + NameOff uint32 + /* "info" bits arrangement + * bits 0-15: vlen (e.g. # of struct's members), linkage + * bits 16-23: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd + */ + Info uint32 + /* "size" is used by INT, ENUM, STRUCT and UNION. + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. + * "type" is a type_id referring to another type. + */ + SizeType uint32 +} + +func (k btfKind) String() string { + switch k { + case kindUnknown: + return "Unknown" + case kindInt: + return "Integer" + case kindPointer: + return "Pointer" + case kindArray: + return "Array" + case kindStruct: + return "Struct" + case kindUnion: + return "Union" + case kindEnum: + return "Enumeration" + case kindForward: + return "Forward" + case kindTypedef: + return "Typedef" + case kindVolatile: + return "Volatile" + case kindConst: + return "Const" + case kindRestrict: + return "Restrict" + case kindFunc: + return "Function" + case kindFuncProto: + return "Function Proto" + case kindVar: + return "Variable" + case kindDatasec: + return "Section" + case kindFloat: + return "Float" + default: + return fmt.Sprintf("Unknown (%d)", k) + } +} + +func mask(len uint32) uint32 { + return (1 << len) - 1 +} + +func readBits(value, len, shift uint32) uint32 { + return (value >> shift) & mask(len) +} + +func writeBits(value, len, shift, new uint32) uint32 { + value &^= mask(len) << shift + value |= (new & mask(len)) << shift + return value +} + +func (bt *btfType) info(len, shift uint32) uint32 { + return readBits(bt.Info, len, shift) +} + +func (bt *btfType) setInfo(value, len, shift uint32) { + bt.Info = writeBits(bt.Info, len, shift, value) +} + +func (bt *btfType) Kind() btfKind { + return btfKind(bt.info(btfTypeKindLen, btfTypeKindShift)) +} + +func (bt *btfType) SetKind(kind btfKind) { + bt.setInfo(uint32(kind), btfTypeKindLen, btfTypeKindShift) +} + +func (bt *btfType) Vlen() int { + return int(bt.info(btfTypeVlenMask, btfTypeVlenShift)) +} + +func (bt *btfType) SetVlen(vlen int) { + bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift) +} + +func (bt *btfType) KindFlag() bool { + return bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift) == 1 +} + +func (bt *btfType) Linkage() FuncLinkage { + return FuncLinkage(bt.info(btfTypeVlenMask, btfTypeVlenShift)) +} + +func (bt *btfType) SetLinkage(linkage FuncLinkage) { + bt.setInfo(uint32(linkage), btfTypeVlenMask, btfTypeVlenShift) +} + +func (bt *btfType) Type() TypeID { + // TODO: Panic here if wrong kind? + return TypeID(bt.SizeType) +} + +func (bt *btfType) Size() uint32 { + // TODO: Panic here if wrong kind? + return bt.SizeType +} + +func (bt *btfType) SetSize(size uint32) { + bt.SizeType = size +} + +type rawType struct { + btfType + data interface{} +} + +func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error { + if err := binary.Write(w, bo, &rt.btfType); err != nil { + return err + } + + if rt.data == nil { + return nil + } + + return binary.Write(w, bo, rt.data) +} + +// btfInt encodes additional data for integers. +// +// ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b +// ? = undefined +// e = encoding +// o = offset (bitfields?) +// b = bits (bitfields) +type btfInt struct { + Raw uint32 +} + +const ( + btfIntEncodingLen = 4 + btfIntEncodingShift = 24 + btfIntOffsetLen = 8 + btfIntOffsetShift = 16 + btfIntBitsLen = 8 + btfIntBitsShift = 0 +) + +func (bi btfInt) Encoding() IntEncoding { + return IntEncoding(readBits(bi.Raw, btfIntEncodingLen, btfIntEncodingShift)) +} + +func (bi *btfInt) SetEncoding(e IntEncoding) { + bi.Raw = writeBits(uint32(bi.Raw), btfIntEncodingLen, btfIntEncodingShift, uint32(e)) +} + +func (bi btfInt) Offset() Bits { + return Bits(readBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift)) +} + +func (bi *btfInt) SetOffset(offset uint32) { + bi.Raw = writeBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift, offset) +} + +func (bi btfInt) Bits() Bits { + return Bits(readBits(bi.Raw, btfIntBitsLen, btfIntBitsShift)) +} + +func (bi *btfInt) SetBits(bits byte) { + bi.Raw = writeBits(bi.Raw, btfIntBitsLen, btfIntBitsShift, uint32(bits)) +} + +type btfArray struct { + Type TypeID + IndexType TypeID + Nelems uint32 +} + +type btfMember struct { + NameOff uint32 + Type TypeID + Offset uint32 +} + +type btfVarSecinfo struct { + Type TypeID + Offset uint32 + Size uint32 +} + +type btfVariable struct { + Linkage uint32 +} + +type btfEnum struct { + NameOff uint32 + Val int32 +} + +type btfParam struct { + NameOff uint32 + Type TypeID +} + +func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, error) { + var header btfType + // because of the interleaving between types and struct members it is difficult to + // precompute the numbers of raw types this will parse + // this "guess" is a good first estimation + sizeOfbtfType := uintptr(binary.Size(btfType{})) + tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2 + types := make([]rawType, 0, tyMaxCount) + + for id := TypeID(1); ; id++ { + if err := binary.Read(r, bo, &header); err == io.EOF { + return types, nil + } else if err != nil { + return nil, fmt.Errorf("can't read type info for id %v: %v", id, err) + } + + var data interface{} + switch header.Kind() { + case kindInt: + data = new(btfInt) + case kindPointer: + case kindArray: + data = new(btfArray) + case kindStruct: + fallthrough + case kindUnion: + data = make([]btfMember, header.Vlen()) + case kindEnum: + data = make([]btfEnum, header.Vlen()) + case kindForward: + case kindTypedef: + case kindVolatile: + case kindConst: + case kindRestrict: + case kindFunc: + case kindFuncProto: + data = make([]btfParam, header.Vlen()) + case kindVar: + data = new(btfVariable) + case kindDatasec: + data = make([]btfVarSecinfo, header.Vlen()) + case kindFloat: + default: + return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind()) + } + + if data == nil { + types = append(types, rawType{header, nil}) + continue + } + + if err := binary.Read(r, bo, data); err != nil { + return nil, fmt.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err) + } + + types = append(types, rawType{header, data}) + } +} diff --git a/vendor/github.com/cilium/ebpf/btf/btf_types_string.go b/vendor/github.com/cilium/ebpf/btf/btf_types_string.go new file mode 100644 index 000000000..0e0c17d68 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/btf_types_string.go @@ -0,0 +1,44 @@ +// Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage"; DO NOT EDIT. + +package btf + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[StaticFunc-0] + _ = x[GlobalFunc-1] + _ = x[ExternFunc-2] +} + +const _FuncLinkage_name = "staticglobalextern" + +var _FuncLinkage_index = [...]uint8{0, 6, 12, 18} + +func (i FuncLinkage) String() string { + if i < 0 || i >= FuncLinkage(len(_FuncLinkage_index)-1) { + return "FuncLinkage(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _FuncLinkage_name[_FuncLinkage_index[i]:_FuncLinkage_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[StaticVar-0] + _ = x[GlobalVar-1] + _ = x[ExternVar-2] +} + +const _VarLinkage_name = "staticglobalextern" + +var _VarLinkage_index = [...]uint8{0, 6, 12, 18} + +func (i VarLinkage) String() string { + if i < 0 || i >= VarLinkage(len(_VarLinkage_index)-1) { + return "VarLinkage(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _VarLinkage_name[_VarLinkage_index[i]:_VarLinkage_index[i+1]] +} diff --git a/vendor/github.com/cilium/ebpf/btf/core.go b/vendor/github.com/cilium/ebpf/btf/core.go new file mode 100644 index 000000000..c48754809 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/core.go @@ -0,0 +1,972 @@ +package btf + +import ( + "encoding/binary" + "errors" + "fmt" + "math" + "reflect" + "strconv" + "strings" + + "github.com/cilium/ebpf/asm" +) + +// Code in this file is derived from libbpf, which is available under a BSD +// 2-Clause license. + +// COREFixup is the result of computing a CO-RE relocation for a target. +type COREFixup struct { + kind coreKind + local uint32 + target uint32 + // True if there is no valid fixup. The instruction is replaced with an + // invalid dummy. + poison bool + // True if the validation of the local value should be skipped. Used by + // some kinds of bitfield relocations. + skipLocalValidation bool +} + +func (f *COREFixup) equal(other COREFixup) bool { + return f.local == other.local && f.target == other.target +} + +func (f *COREFixup) String() string { + if f.poison { + return fmt.Sprintf("%s=poison", f.kind) + } + return fmt.Sprintf("%s=%d->%d", f.kind, f.local, f.target) +} + +func (f *COREFixup) Apply(ins *asm.Instruction) error { + if f.poison { + const badRelo = 0xbad2310 + + *ins = asm.BuiltinFunc(badRelo).Call() + return nil + } + + switch class := ins.OpCode.Class(); class { + case asm.LdXClass, asm.StClass, asm.StXClass: + if want := int16(f.local); !f.skipLocalValidation && want != ins.Offset { + return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, f.local) + } + + if f.target > math.MaxInt16 { + return fmt.Errorf("offset %d exceeds MaxInt16", f.target) + } + + ins.Offset = int16(f.target) + + case asm.LdClass: + if !ins.IsConstantLoad(asm.DWord) { + return fmt.Errorf("not a dword-sized immediate load") + } + + if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant { + return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v)", ins.Constant, want, f) + } + + ins.Constant = int64(f.target) + + case asm.ALUClass: + if ins.OpCode.ALUOp() == asm.Swap { + return fmt.Errorf("relocation against swap") + } + + fallthrough + + case asm.ALU64Class: + if src := ins.OpCode.Source(); src != asm.ImmSource { + return fmt.Errorf("invalid source %s", src) + } + + if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant { + return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v, kind: %v, ins: %v)", ins.Constant, want, f, f.kind, ins) + } + + if f.target > math.MaxInt32 { + return fmt.Errorf("immediate %d exceeds MaxInt32", f.target) + } + + ins.Constant = int64(f.target) + + default: + return fmt.Errorf("invalid class %s", class) + } + + return nil +} + +func (f COREFixup) isNonExistant() bool { + return f.kind.checksForExistence() && f.target == 0 +} + +// coreKind is the type of CO-RE relocation as specified in BPF source code. +type coreKind uint32 + +const ( + reloFieldByteOffset coreKind = iota /* field byte offset */ + reloFieldByteSize /* field size in bytes */ + reloFieldExists /* field existence in target kernel */ + reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */ + reloFieldLShiftU64 /* bitfield-specific left bitshift */ + reloFieldRShiftU64 /* bitfield-specific right bitshift */ + reloTypeIDLocal /* type ID in local BPF object */ + reloTypeIDTarget /* type ID in target kernel */ + reloTypeExists /* type existence in target kernel */ + reloTypeSize /* type size in bytes */ + reloEnumvalExists /* enum value existence in target kernel */ + reloEnumvalValue /* enum value integer value */ +) + +func (k coreKind) checksForExistence() bool { + return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists +} + +func (k coreKind) String() string { + switch k { + case reloFieldByteOffset: + return "byte_off" + case reloFieldByteSize: + return "byte_sz" + case reloFieldExists: + return "field_exists" + case reloFieldSigned: + return "signed" + case reloFieldLShiftU64: + return "lshift_u64" + case reloFieldRShiftU64: + return "rshift_u64" + case reloTypeIDLocal: + return "local_type_id" + case reloTypeIDTarget: + return "target_type_id" + case reloTypeExists: + return "type_exists" + case reloTypeSize: + return "type_size" + case reloEnumvalExists: + return "enumval_exists" + case reloEnumvalValue: + return "enumval_value" + default: + return "unknown" + } +} + +// CORERelocate calculates the difference in types between local and target. +// +// Returns a list of fixups which can be applied to instructions to make them +// match the target type(s). +// +// Fixups are returned in the order of relos, e.g. fixup[i] is the solution +// for relos[i]. +func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, error) { + if local.byteOrder != target.byteOrder { + return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder) + } + + type reloGroup struct { + relos []*CORERelocation + // Position of each relocation in relos. + indices []int + } + + // Split relocations into per Type lists. + relosByType := make(map[Type]*reloGroup) + result := make([]COREFixup, len(relos)) + for i, relo := range relos { + if relo.kind == reloTypeIDLocal { + // Filtering out reloTypeIDLocal here makes our lives a lot easier + // down the line, since it doesn't have a target at all. + if len(relo.accessor) > 1 || relo.accessor[0] != 0 { + return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor) + } + + id, err := local.TypeID(relo.typ) + if err != nil { + return nil, fmt.Errorf("%s: %w", relo.kind, err) + } + + result[i] = COREFixup{ + kind: relo.kind, + local: uint32(id), + target: uint32(id), + } + continue + } + + group, ok := relosByType[relo.typ] + if !ok { + group = &reloGroup{} + relosByType[relo.typ] = group + } + group.relos = append(group.relos, relo) + group.indices = append(group.indices, i) + } + + for localType, group := range relosByType { + localTypeName := localType.TypeName() + if localTypeName == "" { + return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported) + } + + targets := target.namedTypes[newEssentialName(localTypeName)] + fixups, err := coreCalculateFixups(local, target, localType, targets, group.relos) + if err != nil { + return nil, fmt.Errorf("relocate %s: %w", localType, err) + } + + for j, index := range group.indices { + result[index] = fixups[j] + } + } + + return result, nil +} + +var errAmbiguousRelocation = errors.New("ambiguous relocation") +var errImpossibleRelocation = errors.New("impossible relocation") + +// coreCalculateFixups calculates the fixups for the given relocations using +// the "best" target. +// +// The best target is determined by scoring: the less poisoning we have to do +// the better the target is. +func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type, relos []*CORERelocation) ([]COREFixup, error) { + localID, err := localSpec.TypeID(local) + if err != nil { + return nil, fmt.Errorf("local type ID: %w", err) + } + local = Copy(local, UnderlyingType) + + bestScore := len(relos) + var bestFixups []COREFixup + for i := range targets { + targetID, err := targetSpec.TypeID(targets[i]) + if err != nil { + return nil, fmt.Errorf("target type ID: %w", err) + } + target := Copy(targets[i], UnderlyingType) + + score := 0 // lower is better + fixups := make([]COREFixup, 0, len(relos)) + for _, relo := range relos { + fixup, err := coreCalculateFixup(localSpec.byteOrder, local, localID, target, targetID, relo) + if err != nil { + return nil, fmt.Errorf("target %s: %w", target, err) + } + if fixup.poison || fixup.isNonExistant() { + score++ + } + fixups = append(fixups, fixup) + } + + if score > bestScore { + // We have a better target already, ignore this one. + continue + } + + if score < bestScore { + // This is the best target yet, use it. + bestScore = score + bestFixups = fixups + continue + } + + // Some other target has the same score as the current one. Make sure + // the fixups agree with each other. + for i, fixup := range bestFixups { + if !fixup.equal(fixups[i]) { + return nil, fmt.Errorf("%s: multiple types match: %w", fixup.kind, errAmbiguousRelocation) + } + } + } + + if bestFixups == nil { + // Nothing at all matched, probably because there are no suitable + // targets at all. + // + // Poison everything except checksForExistence. + bestFixups = make([]COREFixup, len(relos)) + for i, relo := range relos { + if relo.kind.checksForExistence() { + bestFixups[i] = COREFixup{kind: relo.kind, local: 1, target: 0} + } else { + bestFixups[i] = COREFixup{kind: relo.kind, poison: true} + } + } + } + + return bestFixups, nil +} + +// coreCalculateFixup calculates the fixup for a single local type, target type +// and relocation. +func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, target Type, targetID TypeID, relo *CORERelocation) (COREFixup, error) { + fixup := func(local, target uint32) (COREFixup, error) { + return COREFixup{kind: relo.kind, local: local, target: target}, nil + } + fixupWithoutValidation := func(local, target uint32) (COREFixup, error) { + return COREFixup{kind: relo.kind, local: local, target: target, skipLocalValidation: true}, nil + } + poison := func() (COREFixup, error) { + if relo.kind.checksForExistence() { + return fixup(1, 0) + } + return COREFixup{kind: relo.kind, poison: true}, nil + } + zero := COREFixup{} + + switch relo.kind { + case reloTypeIDTarget, reloTypeSize, reloTypeExists: + if len(relo.accessor) > 1 || relo.accessor[0] != 0 { + return zero, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor) + } + + err := coreAreTypesCompatible(local, target) + if errors.Is(err, errImpossibleRelocation) { + return poison() + } + if err != nil { + return zero, fmt.Errorf("relocation %s: %w", relo.kind, err) + } + + switch relo.kind { + case reloTypeExists: + return fixup(1, 1) + + case reloTypeIDTarget: + return fixup(uint32(localID), uint32(targetID)) + + case reloTypeSize: + localSize, err := Sizeof(local) + if err != nil { + return zero, err + } + + targetSize, err := Sizeof(target) + if err != nil { + return zero, err + } + + return fixup(uint32(localSize), uint32(targetSize)) + } + + case reloEnumvalValue, reloEnumvalExists: + localValue, targetValue, err := coreFindEnumValue(local, relo.accessor, target) + if errors.Is(err, errImpossibleRelocation) { + return poison() + } + if err != nil { + return zero, fmt.Errorf("relocation %s: %w", relo.kind, err) + } + + switch relo.kind { + case reloEnumvalExists: + return fixup(1, 1) + + case reloEnumvalValue: + return fixup(uint32(localValue.Value), uint32(targetValue.Value)) + } + + case reloFieldSigned: + switch local.(type) { + case *Enum: + return fixup(1, 1) + case *Int: + return fixup( + uint32(local.(*Int).Encoding&Signed), + uint32(target.(*Int).Encoding&Signed), + ) + default: + return fixupWithoutValidation(0, 0) + } + + case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64: + if _, ok := target.(*Fwd); ok { + // We can't relocate fields using a forward declaration, so + // skip it. If a non-forward declaration is present in the BTF + // we'll find it in one of the other iterations. + return poison() + } + + localField, targetField, err := coreFindField(local, relo.accessor, target) + if errors.Is(err, errImpossibleRelocation) { + return poison() + } + if err != nil { + return zero, fmt.Errorf("target %s: %w", target, err) + } + + maybeSkipValidation := func(f COREFixup, err error) (COREFixup, error) { + f.skipLocalValidation = localField.bitfieldSize > 0 + return f, err + } + + switch relo.kind { + case reloFieldExists: + return fixup(1, 1) + + case reloFieldByteOffset: + return maybeSkipValidation(fixup(localField.offset, targetField.offset)) + + case reloFieldByteSize: + localSize, err := Sizeof(localField.Type) + if err != nil { + return zero, err + } + + targetSize, err := Sizeof(targetField.Type) + if err != nil { + return zero, err + } + return maybeSkipValidation(fixup(uint32(localSize), uint32(targetSize))) + + case reloFieldLShiftU64: + var target uint32 + if byteOrder == binary.LittleEndian { + targetSize, err := targetField.sizeBits() + if err != nil { + return zero, err + } + + target = uint32(64 - targetField.bitfieldOffset - targetSize) + } else { + loadWidth, err := Sizeof(targetField.Type) + if err != nil { + return zero, err + } + + target = uint32(64 - Bits(loadWidth*8) + targetField.bitfieldOffset) + } + return fixupWithoutValidation(0, target) + + case reloFieldRShiftU64: + targetSize, err := targetField.sizeBits() + if err != nil { + return zero, err + } + + return fixupWithoutValidation(0, uint32(64-targetSize)) + } + } + + return zero, fmt.Errorf("relocation %s: %w", relo.kind, ErrNotSupported) +} + +/* coreAccessor contains a path through a struct. It contains at least one index. + * + * The interpretation depends on the kind of the relocation. The following is + * taken from struct bpf_core_relo in libbpf_internal.h: + * + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * + * Example to provide a better feel. + * + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample s = ...; + * int x = &s->a; // encoded as "0:0" (a is field #0) + * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + */ +type coreAccessor []int + +func parseCOREAccessor(accessor string) (coreAccessor, error) { + if accessor == "" { + return nil, fmt.Errorf("empty accessor") + } + + parts := strings.Split(accessor, ":") + result := make(coreAccessor, 0, len(parts)) + for _, part := range parts { + // 31 bits to avoid overflowing int on 32 bit platforms. + index, err := strconv.ParseUint(part, 10, 31) + if err != nil { + return nil, fmt.Errorf("accessor index %q: %s", part, err) + } + + result = append(result, int(index)) + } + + return result, nil +} + +func (ca coreAccessor) String() string { + strs := make([]string, 0, len(ca)) + for _, i := range ca { + strs = append(strs, strconv.Itoa(i)) + } + return strings.Join(strs, ":") +} + +func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) { + e, ok := t.(*Enum) + if !ok { + return nil, fmt.Errorf("not an enum: %s", t) + } + + if len(ca) > 1 { + return nil, fmt.Errorf("invalid accessor %s for enum", ca) + } + + i := ca[0] + if i >= len(e.Values) { + return nil, fmt.Errorf("invalid index %d for %s", i, e) + } + + return &e.Values[i], nil +} + +// coreField represents the position of a "child" of a composite type from the +// start of that type. +// +// /- start of composite +// | offset * 8 | bitfieldOffset | bitfieldSize | ... | +// \- start of field end of field -/ +type coreField struct { + Type Type + + // The position of the field from the start of the composite type in bytes. + offset uint32 + + // The offset of the bitfield in bits from the start of the field. + bitfieldOffset Bits + + // The size of the bitfield in bits. + // + // Zero if the field is not a bitfield. + bitfieldSize Bits +} + +func (cf *coreField) adjustOffsetToNthElement(n int) error { + size, err := Sizeof(cf.Type) + if err != nil { + return err + } + + cf.offset += uint32(n) * uint32(size) + return nil +} + +func (cf *coreField) adjustOffsetBits(offset Bits) error { + align, err := alignof(cf.Type) + if err != nil { + return err + } + + // We can compute the load offset by: + // 1) converting the bit offset to bytes with a flooring division. + // 2) dividing and multiplying that offset by the alignment, yielding the + // load size aligned offset. + offsetBytes := uint32(offset/8) / uint32(align) * uint32(align) + + // The number of bits remaining is the bit offset less the number of bits + // we can "skip" with the aligned offset. + cf.bitfieldOffset = offset - Bits(offsetBytes*8) + + // We know that cf.offset is aligned at to at least align since we get it + // from the compiler via BTF. Adding an aligned offsetBytes preserves the + // alignment. + cf.offset += offsetBytes + return nil +} + +func (cf *coreField) sizeBits() (Bits, error) { + if cf.bitfieldSize > 0 { + return cf.bitfieldSize, nil + } + + // Someone is trying to access a non-bitfield via a bit shift relocation. + // This happens when a field changes from a bitfield to a regular field + // between kernel versions. Synthesise the size to make the shifts work. + size, err := Sizeof(cf.Type) + if err != nil { + return 0, nil + } + return Bits(size * 8), nil +} + +// coreFindField descends into the local type using the accessor and tries to +// find an equivalent field in target at each step. +// +// Returns the field and the offset of the field from the start of +// target in bits. +func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, coreField, error) { + local := coreField{Type: localT} + target := coreField{Type: targetT} + + // The first index is used to offset a pointer of the base type like + // when accessing an array. + if err := local.adjustOffsetToNthElement(localAcc[0]); err != nil { + return coreField{}, coreField{}, err + } + + if err := target.adjustOffsetToNthElement(localAcc[0]); err != nil { + return coreField{}, coreField{}, err + } + + if err := coreAreMembersCompatible(local.Type, target.Type); err != nil { + return coreField{}, coreField{}, fmt.Errorf("fields: %w", err) + } + + var localMaybeFlex, targetMaybeFlex bool + for i, acc := range localAcc[1:] { + switch localType := local.Type.(type) { + case composite: + // For composite types acc is used to find the field in the local type, + // and then we try to find a field in target with the same name. + localMembers := localType.members() + if acc >= len(localMembers) { + return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, localType) + } + + localMember := localMembers[acc] + if localMember.Name == "" { + _, ok := localMember.Type.(composite) + if !ok { + return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported) + } + + // This is an anonymous struct or union, ignore it. + local = coreField{ + Type: localMember.Type, + offset: local.offset + localMember.Offset.Bytes(), + } + localMaybeFlex = false + continue + } + + targetType, ok := target.Type.(composite) + if !ok { + return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation) + } + + targetMember, last, err := coreFindMember(targetType, localMember.Name) + if err != nil { + return coreField{}, coreField{}, err + } + + local = coreField{ + Type: localMember.Type, + offset: local.offset, + bitfieldSize: localMember.BitfieldSize, + } + localMaybeFlex = acc == len(localMembers)-1 + + target = coreField{ + Type: targetMember.Type, + offset: target.offset, + bitfieldSize: targetMember.BitfieldSize, + } + targetMaybeFlex = last + + if local.bitfieldSize == 0 && target.bitfieldSize == 0 { + local.offset += localMember.Offset.Bytes() + target.offset += targetMember.Offset.Bytes() + break + } + + // Either of the members is a bitfield. Make sure we're at the + // end of the accessor. + if next := i + 1; next < len(localAcc[1:]) { + return coreField{}, coreField{}, fmt.Errorf("can't descend into bitfield") + } + + if err := local.adjustOffsetBits(localMember.Offset); err != nil { + return coreField{}, coreField{}, err + } + + if err := target.adjustOffsetBits(targetMember.Offset); err != nil { + return coreField{}, coreField{}, err + } + + case *Array: + // For arrays, acc is the index in the target. + targetType, ok := target.Type.(*Array) + if !ok { + return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation) + } + + if localType.Nelems == 0 && !localMaybeFlex { + return coreField{}, coreField{}, fmt.Errorf("local type has invalid flexible array") + } + if targetType.Nelems == 0 && !targetMaybeFlex { + return coreField{}, coreField{}, fmt.Errorf("target type has invalid flexible array") + } + + if localType.Nelems > 0 && acc >= int(localType.Nelems) { + return coreField{}, coreField{}, fmt.Errorf("invalid access of %s at index %d", localType, acc) + } + if targetType.Nelems > 0 && acc >= int(targetType.Nelems) { + return coreField{}, coreField{}, fmt.Errorf("out of bounds access of target: %w", errImpossibleRelocation) + } + + local = coreField{ + Type: localType.Type, + offset: local.offset, + } + localMaybeFlex = false + + if err := local.adjustOffsetToNthElement(acc); err != nil { + return coreField{}, coreField{}, err + } + + target = coreField{ + Type: targetType.Type, + offset: target.offset, + } + targetMaybeFlex = false + + if err := target.adjustOffsetToNthElement(acc); err != nil { + return coreField{}, coreField{}, err + } + + default: + return coreField{}, coreField{}, fmt.Errorf("relocate field of %T: %w", localType, ErrNotSupported) + } + + if err := coreAreMembersCompatible(local.Type, target.Type); err != nil { + return coreField{}, coreField{}, err + } + } + + return local, target, nil +} + +// coreFindMember finds a member in a composite type while handling anonymous +// structs and unions. +func coreFindMember(typ composite, name string) (Member, bool, error) { + if name == "" { + return Member{}, false, errors.New("can't search for anonymous member") + } + + type offsetTarget struct { + composite + offset Bits + } + + targets := []offsetTarget{{typ, 0}} + visited := make(map[composite]bool) + + for i := 0; i < len(targets); i++ { + target := targets[i] + + // Only visit targets once to prevent infinite recursion. + if visited[target] { + continue + } + if len(visited) >= maxTypeDepth { + // This check is different than libbpf, which restricts the entire + // path to BPF_CORE_SPEC_MAX_LEN items. + return Member{}, false, fmt.Errorf("type is nested too deep") + } + visited[target] = true + + members := target.members() + for j, member := range members { + if member.Name == name { + // NB: This is safe because member is a copy. + member.Offset += target.offset + return member, j == len(members)-1, nil + } + + // The names don't match, but this member could be an anonymous struct + // or union. + if member.Name != "" { + continue + } + + comp, ok := member.Type.(composite) + if !ok { + return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type) + } + + targets = append(targets, offsetTarget{comp, target.offset + member.Offset}) + } + } + + return Member{}, false, fmt.Errorf("no matching member: %w", errImpossibleRelocation) +} + +// coreFindEnumValue follows localAcc to find the equivalent enum value in target. +func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localValue, targetValue *EnumValue, _ error) { + localValue, err := localAcc.enumValue(local) + if err != nil { + return nil, nil, err + } + + targetEnum, ok := target.(*Enum) + if !ok { + return nil, nil, errImpossibleRelocation + } + + localName := newEssentialName(localValue.Name) + for i, targetValue := range targetEnum.Values { + if newEssentialName(targetValue.Name) != localName { + continue + } + + return localValue, &targetEnum.Values[i], nil + } + + return nil, nil, errImpossibleRelocation +} + +/* The comment below is from bpf_core_types_are_compat in libbpf.c: + * + * Check local and target types for compatibility. This check is used for + * type-based CO-RE relocations and follow slightly different rules than + * field-based relocations. This function assumes that root types were already + * checked for name match. Beyond that initial root-level name check, names + * are completely ignored. Compatibility rules are as follows: + * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but + * kind should match for local and target types (i.e., STRUCT is not + * compatible with UNION); + * - for ENUMs, the size is ignored; + * - for INT, size and signedness are ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - CONST/VOLATILE/RESTRICT modifiers are ignored; + * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; + * - FUNC_PROTOs are compatible if they have compatible signature: same + * number of input args and compatible return and argument types. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + * + * Returns errImpossibleRelocation if types are not compatible. + */ +func coreAreTypesCompatible(localType Type, targetType Type) error { + var ( + localTs, targetTs typeDeque + l, t = &localType, &targetType + depth = 0 + ) + + for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() { + if depth >= maxTypeDepth { + return errors.New("types are nested too deep") + } + + localType = *l + targetType = *t + + if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { + return fmt.Errorf("type mismatch: %w", errImpossibleRelocation) + } + + switch lv := (localType).(type) { + case *Void, *Struct, *Union, *Enum, *Fwd, *Int: + // Nothing to do here + + case *Pointer, *Array: + depth++ + localType.walk(&localTs) + targetType.walk(&targetTs) + + case *FuncProto: + tv := targetType.(*FuncProto) + if len(lv.Params) != len(tv.Params) { + return fmt.Errorf("function param mismatch: %w", errImpossibleRelocation) + } + + depth++ + localType.walk(&localTs) + targetType.walk(&targetTs) + + default: + return fmt.Errorf("unsupported type %T", localType) + } + } + + if l != nil { + return fmt.Errorf("dangling local type %T", *l) + } + + if t != nil { + return fmt.Errorf("dangling target type %T", *t) + } + + return nil +} + +/* coreAreMembersCompatible checks two types for field-based relocation compatibility. + * + * The comment below is from bpf_core_fields_are_compat in libbpf.c: + * + * Check two types for compatibility for the purpose of field access + * relocation. const/volatile/restrict and typedefs are skipped to ensure we + * are relocating semantically compatible entities: + * - any two STRUCTs/UNIONs are compatible and can be mixed; + * - any two FWDs are compatible, if their names match (modulo flavor suffix); + * - any two PTRs are always compatible; + * - for ENUMs, names should be the same (ignoring flavor suffix) or at + * least one of enums should be anonymous; + * - for ENUMs, check sizes, names are ignored; + * - for INT, size and signedness are ignored; + * - any two FLOATs are always compatible; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * [ NB: coreAreMembersCompatible doesn't recurse, this check is done + * by coreFindField. ] + * - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + * + * Returns errImpossibleRelocation if the members are not compatible. + */ +func coreAreMembersCompatible(localType Type, targetType Type) error { + doNamesMatch := func(a, b string) error { + if a == "" || b == "" { + // allow anonymous and named type to match + return nil + } + + if newEssentialName(a) == newEssentialName(b) { + return nil + } + + return fmt.Errorf("names don't match: %w", errImpossibleRelocation) + } + + _, lok := localType.(composite) + _, tok := targetType.(composite) + if lok && tok { + return nil + } + + if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { + return fmt.Errorf("type mismatch: %w", errImpossibleRelocation) + } + + switch lv := localType.(type) { + case *Array, *Pointer, *Float, *Int: + return nil + + case *Enum: + tv := targetType.(*Enum) + return doNamesMatch(lv.Name, tv.Name) + + case *Fwd: + tv := targetType.(*Fwd) + return doNamesMatch(lv.Name, tv.Name) + + default: + return fmt.Errorf("type %s: %w", localType, ErrNotSupported) + } +} diff --git a/vendor/github.com/cilium/ebpf/btf/doc.go b/vendor/github.com/cilium/ebpf/btf/doc.go new file mode 100644 index 000000000..b1f4b1fc3 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/doc.go @@ -0,0 +1,5 @@ +// Package btf handles data encoded according to the BPF Type Format. +// +// The canonical documentation lives in the Linux kernel repository and is +// available at https://www.kernel.org/doc/html/latest/bpf/btf.html +package btf diff --git a/vendor/github.com/cilium/ebpf/btf/ext_info.go b/vendor/github.com/cilium/ebpf/btf/ext_info.go new file mode 100644 index 000000000..2c0e1afe2 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/ext_info.go @@ -0,0 +1,721 @@ +package btf + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "sort" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" +) + +// ExtInfos contains ELF section metadata. +type ExtInfos struct { + // The slices are sorted by offset in ascending order. + funcInfos map[string][]funcInfo + lineInfos map[string][]lineInfo + relocationInfos map[string][]coreRelocationInfo +} + +// loadExtInfosFromELF parses ext infos from the .BTF.ext section in an ELF. +// +// Returns an error wrapping ErrNotFound if no ext infos are present. +func loadExtInfosFromELF(file *internal.SafeELFFile, ts types, strings *stringTable) (*ExtInfos, error) { + section := file.Section(".BTF.ext") + if section == nil { + return nil, fmt.Errorf("btf ext infos: %w", ErrNotFound) + } + + if section.ReaderAt == nil { + return nil, fmt.Errorf("compressed ext_info is not supported") + } + + return loadExtInfos(section.ReaderAt, file.ByteOrder, ts, strings) +} + +// loadExtInfos parses bare ext infos. +func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringTable) (*ExtInfos, error) { + // Open unbuffered section reader. binary.Read() calls io.ReadFull on + // the header structs, resulting in one syscall per header. + headerRd := io.NewSectionReader(r, 0, math.MaxInt64) + extHeader, err := parseBTFExtHeader(headerRd, bo) + if err != nil { + return nil, fmt.Errorf("parsing BTF extension header: %w", err) + } + + coreHeader, err := parseBTFExtCOREHeader(headerRd, bo, extHeader) + if err != nil { + return nil, fmt.Errorf("parsing BTF CO-RE header: %w", err) + } + + buf := internal.NewBufferedSectionReader(r, extHeader.funcInfoStart(), int64(extHeader.FuncInfoLen)) + btfFuncInfos, err := parseFuncInfos(buf, bo, strings) + if err != nil { + return nil, fmt.Errorf("parsing BTF function info: %w", err) + } + + funcInfos := make(map[string][]funcInfo, len(btfFuncInfos)) + for section, bfis := range btfFuncInfos { + funcInfos[section], err = newFuncInfos(bfis, ts) + if err != nil { + return nil, fmt.Errorf("section %s: func infos: %w", section, err) + } + } + + buf = internal.NewBufferedSectionReader(r, extHeader.lineInfoStart(), int64(extHeader.LineInfoLen)) + btfLineInfos, err := parseLineInfos(buf, bo, strings) + if err != nil { + return nil, fmt.Errorf("parsing BTF line info: %w", err) + } + + lineInfos := make(map[string][]lineInfo, len(btfLineInfos)) + for section, blis := range btfLineInfos { + lineInfos[section], err = newLineInfos(blis, strings) + if err != nil { + return nil, fmt.Errorf("section %s: line infos: %w", section, err) + } + } + + if coreHeader == nil || coreHeader.COREReloLen == 0 { + return &ExtInfos{funcInfos, lineInfos, nil}, nil + } + + var btfCORERelos map[string][]bpfCORERelo + buf = internal.NewBufferedSectionReader(r, extHeader.coreReloStart(coreHeader), int64(coreHeader.COREReloLen)) + btfCORERelos, err = parseCORERelos(buf, bo, strings) + if err != nil { + return nil, fmt.Errorf("parsing CO-RE relocation info: %w", err) + } + + coreRelos := make(map[string][]coreRelocationInfo, len(btfCORERelos)) + for section, brs := range btfCORERelos { + coreRelos[section], err = newRelocationInfos(brs, ts, strings) + if err != nil { + return nil, fmt.Errorf("section %s: CO-RE relocations: %w", section, err) + } + } + + return &ExtInfos{funcInfos, lineInfos, coreRelos}, nil +} + +type funcInfoMeta struct{} +type coreRelocationMeta struct{} + +// Assign per-section metadata from BTF to a section's instructions. +func (ei *ExtInfos) Assign(insns asm.Instructions, section string) { + funcInfos := ei.funcInfos[section] + lineInfos := ei.lineInfos[section] + reloInfos := ei.relocationInfos[section] + + iter := insns.Iterate() + for iter.Next() { + if len(funcInfos) > 0 && funcInfos[0].offset == iter.Offset { + iter.Ins.Metadata.Set(funcInfoMeta{}, funcInfos[0].fn) + funcInfos = funcInfos[1:] + } + + if len(lineInfos) > 0 && lineInfos[0].offset == iter.Offset { + *iter.Ins = iter.Ins.WithSource(lineInfos[0].line) + lineInfos = lineInfos[1:] + } + + if len(reloInfos) > 0 && reloInfos[0].offset == iter.Offset { + iter.Ins.Metadata.Set(coreRelocationMeta{}, reloInfos[0].relo) + reloInfos = reloInfos[1:] + } + } +} + +// MarshalExtInfos encodes function and line info embedded in insns into kernel +// wire format. +func MarshalExtInfos(insns asm.Instructions, typeID func(Type) (TypeID, error)) (funcInfos, lineInfos []byte, _ error) { + iter := insns.Iterate() + var fiBuf, liBuf bytes.Buffer + for iter.Next() { + if fn := FuncMetadata(iter.Ins); fn != nil { + fi := &funcInfo{ + fn: fn, + offset: iter.Offset, + } + if err := fi.marshal(&fiBuf, typeID); err != nil { + return nil, nil, fmt.Errorf("write func info: %w", err) + } + } + + if line, ok := iter.Ins.Source().(*Line); ok { + li := &lineInfo{ + line: line, + offset: iter.Offset, + } + if err := li.marshal(&liBuf); err != nil { + return nil, nil, fmt.Errorf("write line info: %w", err) + } + } + } + return fiBuf.Bytes(), liBuf.Bytes(), nil +} + +// btfExtHeader is found at the start of the .BTF.ext section. +type btfExtHeader struct { + Magic uint16 + Version uint8 + Flags uint8 + + // HdrLen is larger than the size of struct btfExtHeader when it is + // immediately followed by a btfExtCOREHeader. + HdrLen uint32 + + FuncInfoOff uint32 + FuncInfoLen uint32 + LineInfoOff uint32 + LineInfoLen uint32 +} + +// parseBTFExtHeader parses the header of the .BTF.ext section. +func parseBTFExtHeader(r io.Reader, bo binary.ByteOrder) (*btfExtHeader, error) { + var header btfExtHeader + if err := binary.Read(r, bo, &header); err != nil { + return nil, fmt.Errorf("can't read header: %v", err) + } + + if header.Magic != btfMagic { + return nil, fmt.Errorf("incorrect magic value %v", header.Magic) + } + + if header.Version != 1 { + return nil, fmt.Errorf("unexpected version %v", header.Version) + } + + if header.Flags != 0 { + return nil, fmt.Errorf("unsupported flags %v", header.Flags) + } + + if int64(header.HdrLen) < int64(binary.Size(&header)) { + return nil, fmt.Errorf("header length shorter than btfExtHeader size") + } + + return &header, nil +} + +// funcInfoStart returns the offset from the beginning of the .BTF.ext section +// to the start of its func_info entries. +func (h *btfExtHeader) funcInfoStart() int64 { + return int64(h.HdrLen + h.FuncInfoOff) +} + +// lineInfoStart returns the offset from the beginning of the .BTF.ext section +// to the start of its line_info entries. +func (h *btfExtHeader) lineInfoStart() int64 { + return int64(h.HdrLen + h.LineInfoOff) +} + +// coreReloStart returns the offset from the beginning of the .BTF.ext section +// to the start of its CO-RE relocation entries. +func (h *btfExtHeader) coreReloStart(ch *btfExtCOREHeader) int64 { + return int64(h.HdrLen + ch.COREReloOff) +} + +// btfExtCOREHeader is found right after the btfExtHeader when its HdrLen +// field is larger than its size. +type btfExtCOREHeader struct { + COREReloOff uint32 + COREReloLen uint32 +} + +// parseBTFExtCOREHeader parses the tail of the .BTF.ext header. If additional +// header bytes are present, extHeader.HdrLen will be larger than the struct, +// indicating the presence of a CO-RE extension header. +func parseBTFExtCOREHeader(r io.Reader, bo binary.ByteOrder, extHeader *btfExtHeader) (*btfExtCOREHeader, error) { + extHdrSize := int64(binary.Size(&extHeader)) + remainder := int64(extHeader.HdrLen) - extHdrSize + + if remainder == 0 { + return nil, nil + } + + var coreHeader btfExtCOREHeader + if err := binary.Read(r, bo, &coreHeader); err != nil { + return nil, fmt.Errorf("can't read header: %v", err) + } + + return &coreHeader, nil +} + +type btfExtInfoSec struct { + SecNameOff uint32 + NumInfo uint32 +} + +// parseExtInfoSec parses a btf_ext_info_sec header within .BTF.ext, +// appearing within func_info and line_info sub-sections. +// These headers appear once for each program section in the ELF and are +// followed by one or more func/line_info records for the section. +func parseExtInfoSec(r io.Reader, bo binary.ByteOrder, strings *stringTable) (string, *btfExtInfoSec, error) { + var infoHeader btfExtInfoSec + if err := binary.Read(r, bo, &infoHeader); err != nil { + return "", nil, fmt.Errorf("read ext info header: %w", err) + } + + secName, err := strings.Lookup(infoHeader.SecNameOff) + if err != nil { + return "", nil, fmt.Errorf("get section name: %w", err) + } + if secName == "" { + return "", nil, fmt.Errorf("extinfo header refers to empty section name") + } + + if infoHeader.NumInfo == 0 { + return "", nil, fmt.Errorf("section %s has zero records", secName) + } + + return secName, &infoHeader, nil +} + +// parseExtInfoRecordSize parses the uint32 at the beginning of a func_infos +// or line_infos segment that describes the length of all extInfoRecords in +// that segment. +func parseExtInfoRecordSize(r io.Reader, bo binary.ByteOrder) (uint32, error) { + const maxRecordSize = 256 + + var recordSize uint32 + if err := binary.Read(r, bo, &recordSize); err != nil { + return 0, fmt.Errorf("can't read record size: %v", err) + } + + if recordSize < 4 { + // Need at least InsnOff worth of bytes per record. + return 0, errors.New("record size too short") + } + if recordSize > maxRecordSize { + return 0, fmt.Errorf("record size %v exceeds %v", recordSize, maxRecordSize) + } + + return recordSize, nil +} + +// The size of a FuncInfo in BTF wire format. +var FuncInfoSize = uint32(binary.Size(bpfFuncInfo{})) + +type funcInfo struct { + fn *Func + offset asm.RawInstructionOffset +} + +type bpfFuncInfo struct { + // Instruction offset of the function within an ELF section. + InsnOff uint32 + TypeID TypeID +} + +func newFuncInfo(fi bpfFuncInfo, ts types) (*funcInfo, error) { + typ, err := ts.ByID(fi.TypeID) + if err != nil { + return nil, err + } + + fn, ok := typ.(*Func) + if !ok { + return nil, fmt.Errorf("type ID %d is a %T, but expected a Func", fi.TypeID, typ) + } + + // C doesn't have anonymous functions, but check just in case. + if fn.Name == "" { + return nil, fmt.Errorf("func with type ID %d doesn't have a name", fi.TypeID) + } + + return &funcInfo{ + fn, + asm.RawInstructionOffset(fi.InsnOff), + }, nil +} + +func newFuncInfos(bfis []bpfFuncInfo, ts types) ([]funcInfo, error) { + fis := make([]funcInfo, 0, len(bfis)) + for _, bfi := range bfis { + fi, err := newFuncInfo(bfi, ts) + if err != nil { + return nil, fmt.Errorf("offset %d: %w", bfi.InsnOff, err) + } + fis = append(fis, *fi) + } + sort.Slice(fis, func(i, j int) bool { + return fis[i].offset <= fis[j].offset + }) + return fis, nil +} + +// marshal into the BTF wire format. +func (fi *funcInfo) marshal(w io.Writer, typeID func(Type) (TypeID, error)) error { + id, err := typeID(fi.fn) + if err != nil { + return err + } + bfi := bpfFuncInfo{ + InsnOff: uint32(fi.offset), + TypeID: id, + } + return binary.Write(w, internal.NativeEndian, &bfi) +} + +// parseLineInfos parses a func_info sub-section within .BTF.ext ito a map of +// func infos indexed by section name. +func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfFuncInfo, error) { + recordSize, err := parseExtInfoRecordSize(r, bo) + if err != nil { + return nil, err + } + + result := make(map[string][]bpfFuncInfo) + for { + secName, infoHeader, err := parseExtInfoSec(r, bo, strings) + if errors.Is(err, io.EOF) { + return result, nil + } + if err != nil { + return nil, err + } + + records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo) + if err != nil { + return nil, fmt.Errorf("section %v: %w", secName, err) + } + + result[secName] = records + } +} + +// parseFuncInfoRecords parses a stream of func_infos into a funcInfos. +// These records appear after a btf_ext_info_sec header in the func_info +// sub-section of .BTF.ext. +func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfFuncInfo, error) { + var out []bpfFuncInfo + var fi bpfFuncInfo + + if exp, got := FuncInfoSize, recordSize; exp != got { + // BTF blob's record size is longer than we know how to parse. + return nil, fmt.Errorf("expected FuncInfo record size %d, but BTF blob contains %d", exp, got) + } + + for i := uint32(0); i < recordNum; i++ { + if err := binary.Read(r, bo, &fi); err != nil { + return nil, fmt.Errorf("can't read function info: %v", err) + } + + if fi.InsnOff%asm.InstructionSize != 0 { + return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff) + } + + // ELF tracks offset in bytes, the kernel expects raw BPF instructions. + // Convert as early as possible. + fi.InsnOff /= asm.InstructionSize + + out = append(out, fi) + } + + return out, nil +} + +var LineInfoSize = uint32(binary.Size(bpfLineInfo{})) + +// Line represents the location and contents of a single line of source +// code a BPF ELF was compiled from. +type Line struct { + fileName string + line string + lineNumber uint32 + lineColumn uint32 + + // TODO: We should get rid of the fields below, but for that we need to be + // able to write BTF. + + fileNameOff uint32 + lineOff uint32 +} + +func (li *Line) FileName() string { + return li.fileName +} + +func (li *Line) Line() string { + return li.line +} + +func (li *Line) LineNumber() uint32 { + return li.lineNumber +} + +func (li *Line) LineColumn() uint32 { + return li.lineColumn +} + +func (li *Line) String() string { + return li.line +} + +type lineInfo struct { + line *Line + offset asm.RawInstructionOffset +} + +// Constants for the format of bpfLineInfo.LineCol. +const ( + bpfLineShift = 10 + bpfLineMax = (1 << (32 - bpfLineShift)) - 1 + bpfColumnMax = (1 << bpfLineShift) - 1 +) + +type bpfLineInfo struct { + // Instruction offset of the line within the whole instruction stream, in instructions. + InsnOff uint32 + FileNameOff uint32 + LineOff uint32 + LineCol uint32 +} + +func newLineInfo(li bpfLineInfo, strings *stringTable) (*lineInfo, error) { + line, err := strings.Lookup(li.LineOff) + if err != nil { + return nil, fmt.Errorf("lookup of line: %w", err) + } + + fileName, err := strings.Lookup(li.FileNameOff) + if err != nil { + return nil, fmt.Errorf("lookup of filename: %w", err) + } + + lineNumber := li.LineCol >> bpfLineShift + lineColumn := li.LineCol & bpfColumnMax + + return &lineInfo{ + &Line{ + fileName, + line, + lineNumber, + lineColumn, + li.FileNameOff, + li.LineOff, + }, + asm.RawInstructionOffset(li.InsnOff), + }, nil +} + +func newLineInfos(blis []bpfLineInfo, strings *stringTable) ([]lineInfo, error) { + lis := make([]lineInfo, 0, len(blis)) + for _, bli := range blis { + li, err := newLineInfo(bli, strings) + if err != nil { + return nil, fmt.Errorf("offset %d: %w", bli.InsnOff, err) + } + lis = append(lis, *li) + } + sort.Slice(lis, func(i, j int) bool { + return lis[i].offset <= lis[j].offset + }) + return lis, nil +} + +// marshal writes the binary representation of the LineInfo to w. +func (li *lineInfo) marshal(w io.Writer) error { + line := li.line + if line.lineNumber > bpfLineMax { + return fmt.Errorf("line %d exceeds %d", line.lineNumber, bpfLineMax) + } + + if line.lineColumn > bpfColumnMax { + return fmt.Errorf("column %d exceeds %d", line.lineColumn, bpfColumnMax) + } + + bli := bpfLineInfo{ + uint32(li.offset), + line.fileNameOff, + line.lineOff, + (line.lineNumber << bpfLineShift) | line.lineColumn, + } + return binary.Write(w, internal.NativeEndian, &bli) +} + +// parseLineInfos parses a line_info sub-section within .BTF.ext ito a map of +// line infos indexed by section name. +func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfLineInfo, error) { + recordSize, err := parseExtInfoRecordSize(r, bo) + if err != nil { + return nil, err + } + + result := make(map[string][]bpfLineInfo) + for { + secName, infoHeader, err := parseExtInfoSec(r, bo, strings) + if errors.Is(err, io.EOF) { + return result, nil + } + if err != nil { + return nil, err + } + + records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo) + if err != nil { + return nil, fmt.Errorf("section %v: %w", secName, err) + } + + result[secName] = records + } +} + +// parseLineInfoRecords parses a stream of line_infos into a lineInfos. +// These records appear after a btf_ext_info_sec header in the line_info +// sub-section of .BTF.ext. +func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfLineInfo, error) { + var out []bpfLineInfo + var li bpfLineInfo + + if exp, got := uint32(binary.Size(li)), recordSize; exp != got { + // BTF blob's record size is longer than we know how to parse. + return nil, fmt.Errorf("expected LineInfo record size %d, but BTF blob contains %d", exp, got) + } + + for i := uint32(0); i < recordNum; i++ { + if err := binary.Read(r, bo, &li); err != nil { + return nil, fmt.Errorf("can't read line info: %v", err) + } + + if li.InsnOff%asm.InstructionSize != 0 { + return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff) + } + + // ELF tracks offset in bytes, the kernel expects raw BPF instructions. + // Convert as early as possible. + li.InsnOff /= asm.InstructionSize + + out = append(out, li) + } + + return out, nil +} + +// bpfCORERelo matches the kernel's struct bpf_core_relo. +type bpfCORERelo struct { + InsnOff uint32 + TypeID TypeID + AccessStrOff uint32 + Kind coreKind +} + +type CORERelocation struct { + typ Type + accessor coreAccessor + kind coreKind +} + +func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation { + relo, _ := ins.Metadata.Get(coreRelocationMeta{}).(*CORERelocation) + return relo +} + +type coreRelocationInfo struct { + relo *CORERelocation + offset asm.RawInstructionOffset +} + +func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreRelocationInfo, error) { + typ, err := ts.ByID(relo.TypeID) + if err != nil { + return nil, err + } + + accessorStr, err := strings.Lookup(relo.AccessStrOff) + if err != nil { + return nil, err + } + + accessor, err := parseCOREAccessor(accessorStr) + if err != nil { + return nil, fmt.Errorf("accessor %q: %s", accessorStr, err) + } + + return &coreRelocationInfo{ + &CORERelocation{ + typ, + accessor, + relo.Kind, + }, + asm.RawInstructionOffset(relo.InsnOff), + }, nil +} + +func newRelocationInfos(brs []bpfCORERelo, ts types, strings *stringTable) ([]coreRelocationInfo, error) { + rs := make([]coreRelocationInfo, 0, len(brs)) + for _, br := range brs { + relo, err := newRelocationInfo(br, ts, strings) + if err != nil { + return nil, fmt.Errorf("offset %d: %w", br.InsnOff, err) + } + rs = append(rs, *relo) + } + sort.Slice(rs, func(i, j int) bool { + return rs[i].offset < rs[j].offset + }) + return rs, nil +} + +var extInfoReloSize = binary.Size(bpfCORERelo{}) + +// parseCORERelos parses a core_relos sub-section within .BTF.ext ito a map of +// CO-RE relocations indexed by section name. +func parseCORERelos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfCORERelo, error) { + recordSize, err := parseExtInfoRecordSize(r, bo) + if err != nil { + return nil, err + } + + if recordSize != uint32(extInfoReloSize) { + return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize) + } + + result := make(map[string][]bpfCORERelo) + for { + secName, infoHeader, err := parseExtInfoSec(r, bo, strings) + if errors.Is(err, io.EOF) { + return result, nil + } + if err != nil { + return nil, err + } + + records, err := parseCOREReloRecords(r, bo, recordSize, infoHeader.NumInfo) + if err != nil { + return nil, fmt.Errorf("section %v: %w", secName, err) + } + + result[secName] = records + } +} + +// parseCOREReloRecords parses a stream of CO-RE relocation entries into a +// coreRelos. These records appear after a btf_ext_info_sec header in the +// core_relos sub-section of .BTF.ext. +func parseCOREReloRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfCORERelo, error) { + var out []bpfCORERelo + + var relo bpfCORERelo + for i := uint32(0); i < recordNum; i++ { + if err := binary.Read(r, bo, &relo); err != nil { + return nil, fmt.Errorf("can't read CO-RE relocation: %v", err) + } + + if relo.InsnOff%asm.InstructionSize != 0 { + return nil, fmt.Errorf("offset %v is not aligned with instruction size", relo.InsnOff) + } + + // ELF tracks offset in bytes, the kernel expects raw BPF instructions. + // Convert as early as possible. + relo.InsnOff /= asm.InstructionSize + + out = append(out, relo) + } + + return out, nil +} diff --git a/vendor/github.com/cilium/ebpf/btf/format.go b/vendor/github.com/cilium/ebpf/btf/format.go new file mode 100644 index 000000000..e7688a2a6 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/format.go @@ -0,0 +1,319 @@ +package btf + +import ( + "errors" + "fmt" + "strings" +) + +var errNestedTooDeep = errors.New("nested too deep") + +// GoFormatter converts a Type to Go syntax. +// +// A zero GoFormatter is valid to use. +type GoFormatter struct { + w strings.Builder + + // Types present in this map are referred to using the given name if they + // are encountered when outputting another type. + Names map[Type]string + + // Identifier is called for each field of struct-like types. By default the + // field name is used as is. + Identifier func(string) string + + // EnumIdentifier is called for each element of an enum. By default the + // name of the enum type is concatenated with Identifier(element). + EnumIdentifier func(name, element string) string +} + +// TypeDeclaration generates a Go type declaration for a BTF type. +func (gf *GoFormatter) TypeDeclaration(name string, typ Type) (string, error) { + gf.w.Reset() + if err := gf.writeTypeDecl(name, typ); err != nil { + return "", err + } + return gf.w.String(), nil +} + +func (gf *GoFormatter) identifier(s string) string { + if gf.Identifier != nil { + return gf.Identifier(s) + } + + return s +} + +func (gf *GoFormatter) enumIdentifier(name, element string) string { + if gf.EnumIdentifier != nil { + return gf.EnumIdentifier(name, element) + } + + return name + gf.identifier(element) +} + +// writeTypeDecl outputs a declaration of the given type. +// +// It encodes https://golang.org/ref/spec#Type_declarations: +// +// type foo struct { bar uint32; } +// type bar int32 +func (gf *GoFormatter) writeTypeDecl(name string, typ Type) error { + if name == "" { + return fmt.Errorf("need a name for type %s", typ) + } + + switch v := skipQualifiers(typ).(type) { + case *Enum: + fmt.Fprintf(&gf.w, "type %s ", name) + switch v.Size { + case 1: + gf.w.WriteString("int8") + case 2: + gf.w.WriteString("int16") + case 4: + gf.w.WriteString("int32") + case 8: + gf.w.WriteString("int64") + default: + return fmt.Errorf("%s: invalid enum size %d", typ, v.Size) + } + + if len(v.Values) == 0 { + return nil + } + + gf.w.WriteString("; const ( ") + for _, ev := range v.Values { + id := gf.enumIdentifier(name, ev.Name) + fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, ev.Value) + } + gf.w.WriteString(")") + + return nil + + default: + fmt.Fprintf(&gf.w, "type %s ", name) + return gf.writeTypeLit(v, 0) + } +} + +// writeType outputs the name of a named type or a literal describing the type. +// +// It encodes https://golang.org/ref/spec#Types. +// +// foo (if foo is a named type) +// uint32 +func (gf *GoFormatter) writeType(typ Type, depth int) error { + typ = skipQualifiers(typ) + + name := gf.Names[typ] + if name != "" { + gf.w.WriteString(name) + return nil + } + + return gf.writeTypeLit(typ, depth) +} + +// writeTypeLit outputs a literal describing the type. +// +// The function ignores named types. +// +// It encodes https://golang.org/ref/spec#TypeLit. +// +// struct { bar uint32; } +// uint32 +func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error { + depth++ + if depth > maxTypeDepth { + return errNestedTooDeep + } + + var err error + switch v := skipQualifiers(typ).(type) { + case *Int: + gf.writeIntLit(v) + + case *Enum: + gf.w.WriteString("int32") + + case *Typedef: + err = gf.writeType(v.Type, depth) + + case *Array: + fmt.Fprintf(&gf.w, "[%d]", v.Nelems) + err = gf.writeType(v.Type, depth) + + case *Struct: + err = gf.writeStructLit(v.Size, v.Members, depth) + + case *Union: + // Always choose the first member to represent the union in Go. + err = gf.writeStructLit(v.Size, v.Members[:1], depth) + + case *Datasec: + err = gf.writeDatasecLit(v, depth) + + default: + return fmt.Errorf("type %T: %w", v, ErrNotSupported) + } + + if err != nil { + return fmt.Errorf("%s: %w", typ, err) + } + + return nil +} + +func (gf *GoFormatter) writeIntLit(i *Int) { + // NB: Encoding.IsChar is ignored. + if i.Encoding.IsBool() && i.Size == 1 { + gf.w.WriteString("bool") + return + } + + bits := i.Size * 8 + if i.Encoding.IsSigned() { + fmt.Fprintf(&gf.w, "int%d", bits) + } else { + fmt.Fprintf(&gf.w, "uint%d", bits) + } +} + +func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int) error { + gf.w.WriteString("struct { ") + + prevOffset := uint32(0) + skippedBitfield := false + for i, m := range members { + if m.BitfieldSize > 0 { + skippedBitfield = true + continue + } + + offset := m.Offset.Bytes() + if n := offset - prevOffset; skippedBitfield && n > 0 { + fmt.Fprintf(&gf.w, "_ [%d]byte /* unsupported bitfield */; ", n) + } else { + gf.writePadding(n) + } + + size, err := Sizeof(m.Type) + if err != nil { + return fmt.Errorf("field %d: %w", i, err) + } + prevOffset = offset + uint32(size) + + if err := gf.writeStructField(m, depth); err != nil { + return fmt.Errorf("field %d: %w", i, err) + } + } + + gf.writePadding(size - prevOffset) + gf.w.WriteString("}") + return nil +} + +func (gf *GoFormatter) writeStructField(m Member, depth int) error { + if m.BitfieldSize > 0 { + return fmt.Errorf("bitfields are not supported") + } + if m.Offset%8 != 0 { + return fmt.Errorf("unsupported offset %d", m.Offset) + } + + if m.Name == "" { + // Special case a nested anonymous union like + // struct foo { union { int bar; int baz }; } + // by replacing the whole union with its first member. + union, ok := m.Type.(*Union) + if !ok { + return fmt.Errorf("anonymous fields are not supported") + + } + + if len(union.Members) == 0 { + return errors.New("empty anonymous union") + } + + depth++ + if depth > maxTypeDepth { + return errNestedTooDeep + } + + m := union.Members[0] + size, err := Sizeof(m.Type) + if err != nil { + return err + } + + if err := gf.writeStructField(m, depth); err != nil { + return err + } + + gf.writePadding(union.Size - uint32(size)) + return nil + + } + + fmt.Fprintf(&gf.w, "%s ", gf.identifier(m.Name)) + + if err := gf.writeType(m.Type, depth); err != nil { + return err + } + + gf.w.WriteString("; ") + return nil +} + +func (gf *GoFormatter) writeDatasecLit(ds *Datasec, depth int) error { + gf.w.WriteString("struct { ") + + prevOffset := uint32(0) + for i, vsi := range ds.Vars { + v := vsi.Type.(*Var) + if v.Linkage != GlobalVar { + // Ignore static, extern, etc. for now. + continue + } + + if v.Name == "" { + return fmt.Errorf("variable %d: empty name", i) + } + + gf.writePadding(vsi.Offset - prevOffset) + prevOffset = vsi.Offset + vsi.Size + + fmt.Fprintf(&gf.w, "%s ", gf.identifier(v.Name)) + + if err := gf.writeType(v.Type, depth); err != nil { + return fmt.Errorf("variable %d: %w", i, err) + } + + gf.w.WriteString("; ") + } + + gf.writePadding(ds.Size - prevOffset) + gf.w.WriteString("}") + return nil +} + +func (gf *GoFormatter) writePadding(bytes uint32) { + if bytes > 0 { + fmt.Fprintf(&gf.w, "_ [%d]byte; ", bytes) + } +} + +func skipQualifiers(typ Type) Type { + result := typ + for depth := 0; depth <= maxTypeDepth; depth++ { + switch v := (result).(type) { + case qualifier: + result = v.qualify() + default: + return result + } + } + return &cycle{typ} +} diff --git a/vendor/github.com/cilium/ebpf/btf/handle.go b/vendor/github.com/cilium/ebpf/btf/handle.go new file mode 100644 index 000000000..128e9b35c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/handle.go @@ -0,0 +1,121 @@ +package btf + +import ( + "errors" + "fmt" + "os" + + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// HandleInfo describes a Handle. +type HandleInfo struct { + // ID of this handle in the kernel. The ID is only valid as long as the + // associated handle is kept alive. + ID ID + + // Name is an identifying name for the BTF, currently only used by the + // kernel. + Name string + + // IsKernel is true if the BTF originated with the kernel and not + // userspace. + IsKernel bool + + // Size of the raw BTF in bytes. + size uint32 +} + +func newHandleInfoFromFD(fd *sys.FD) (*HandleInfo, error) { + // We invoke the syscall once with a empty BTF and name buffers to get size + // information to allocate buffers. Then we invoke it a second time with + // buffers to receive the data. + var btfInfo sys.BtfInfo + if err := sys.ObjInfo(fd, &btfInfo); err != nil { + return nil, fmt.Errorf("get BTF info for fd %s: %w", fd, err) + } + + if btfInfo.NameLen > 0 { + // NameLen doesn't account for the terminating NUL. + btfInfo.NameLen++ + } + + // Don't pull raw BTF by default, since it may be quite large. + btfSize := btfInfo.BtfSize + btfInfo.BtfSize = 0 + + nameBuffer := make([]byte, btfInfo.NameLen) + btfInfo.Name, btfInfo.NameLen = sys.NewSlicePointerLen(nameBuffer) + if err := sys.ObjInfo(fd, &btfInfo); err != nil { + return nil, err + } + + return &HandleInfo{ + ID: ID(btfInfo.Id), + Name: unix.ByteSliceToString(nameBuffer), + IsKernel: btfInfo.KernelBtf != 0, + size: btfSize, + }, nil +} + +// IsModule returns true if the BTF is for the kernel itself. +func (i *HandleInfo) IsVmlinux() bool { + return i.IsKernel && i.Name == "vmlinux" +} + +// IsModule returns true if the BTF is for a kernel module. +func (i *HandleInfo) IsModule() bool { + return i.IsKernel && i.Name != "vmlinux" +} + +// HandleIterator allows enumerating BTF blobs loaded into the kernel. +type HandleIterator struct { + // The ID of the last retrieved handle. Only valid after a call to Next. + ID ID + err error +} + +// Next retrieves a handle for the next BTF blob. +// +// [Handle.Close] is called if *handle is non-nil to avoid leaking fds. +// +// Returns true if another BTF blob was found. Call [HandleIterator.Err] after +// the function returns false. +func (it *HandleIterator) Next(handle **Handle) bool { + if *handle != nil { + (*handle).Close() + *handle = nil + } + + id := it.ID + for { + attr := &sys.BtfGetNextIdAttr{Id: id} + err := sys.BtfGetNextId(attr) + if errors.Is(err, os.ErrNotExist) { + // There are no more BTF objects. + return false + } else if err != nil { + it.err = fmt.Errorf("get next BTF ID: %w", err) + return false + } + + id = attr.NextId + *handle, err = NewHandleFromID(id) + if errors.Is(err, os.ErrNotExist) { + // Try again with the next ID. + continue + } else if err != nil { + it.err = fmt.Errorf("retrieve handle for ID %d: %w", id, err) + return false + } + + it.ID = id + return true + } +} + +// Err returns an error if iteration failed for some reason. +func (it *HandleIterator) Err() error { + return it.err +} diff --git a/vendor/github.com/cilium/ebpf/btf/strings.go b/vendor/github.com/cilium/ebpf/btf/strings.go new file mode 100644 index 000000000..67626e0dd --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/strings.go @@ -0,0 +1,128 @@ +package btf + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" +) + +type stringTable struct { + base *stringTable + offsets []uint32 + strings []string +} + +// sizedReader is implemented by bytes.Reader, io.SectionReader, strings.Reader, etc. +type sizedReader interface { + io.Reader + Size() int64 +} + +func readStringTable(r sizedReader, base *stringTable) (*stringTable, error) { + // When parsing split BTF's string table, the first entry offset is derived + // from the last entry offset of the base BTF. + firstStringOffset := uint32(0) + if base != nil { + idx := len(base.offsets) - 1 + firstStringOffset = base.offsets[idx] + uint32(len(base.strings[idx])) + 1 + } + + // Derived from vmlinux BTF. + const averageStringLength = 16 + + n := int(r.Size() / averageStringLength) + offsets := make([]uint32, 0, n) + strings := make([]string, 0, n) + + offset := firstStringOffset + scanner := bufio.NewScanner(r) + scanner.Split(splitNull) + for scanner.Scan() { + str := scanner.Text() + offsets = append(offsets, offset) + strings = append(strings, str) + offset += uint32(len(str)) + 1 + } + if err := scanner.Err(); err != nil { + return nil, err + } + + if len(strings) == 0 { + return nil, errors.New("string table is empty") + } + + if firstStringOffset == 0 && strings[0] != "" { + return nil, errors.New("first item in string table is non-empty") + } + + return &stringTable{base, offsets, strings}, nil +} + +func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) { + i := bytes.IndexByte(data, 0) + if i == -1 { + if atEOF && len(data) > 0 { + return 0, nil, errors.New("string table isn't null terminated") + } + return 0, nil, nil + } + + return i + 1, data[:i], nil +} + +func (st *stringTable) Lookup(offset uint32) (string, error) { + if st.base != nil && offset <= st.base.offsets[len(st.base.offsets)-1] { + return st.base.lookup(offset) + } + return st.lookup(offset) +} + +func (st *stringTable) lookup(offset uint32) (string, error) { + i := search(st.offsets, offset) + if i == len(st.offsets) || st.offsets[i] != offset { + return "", fmt.Errorf("offset %d isn't start of a string", offset) + } + + return st.strings[i], nil +} + +func (st *stringTable) Length() int { + last := len(st.offsets) - 1 + return int(st.offsets[last]) + len(st.strings[last]) + 1 +} + +func (st *stringTable) Marshal(w io.Writer) error { + for _, str := range st.strings { + _, err := io.WriteString(w, str) + if err != nil { + return err + } + _, err = w.Write([]byte{0}) + if err != nil { + return err + } + } + return nil +} + +// search is a copy of sort.Search specialised for uint32. +// +// Licensed under https://go.dev/LICENSE +func search(ints []uint32, needle uint32) int { + // Define f(-1) == false and f(n) == true. + // Invariant: f(i-1) == false, f(j) == true. + i, j := 0, len(ints) + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + // i ≤ h < j + if !(ints[h] >= needle) { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i. + return i +} diff --git a/vendor/github.com/cilium/ebpf/btf/types.go b/vendor/github.com/cilium/ebpf/btf/types.go new file mode 100644 index 000000000..402a363c2 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/types.go @@ -0,0 +1,1212 @@ +package btf + +import ( + "fmt" + "io" + "math" + "reflect" + "strings" + + "github.com/cilium/ebpf/asm" +) + +const maxTypeDepth = 32 + +// TypeID identifies a type in a BTF section. +type TypeID uint32 + +// Type represents a type described by BTF. +type Type interface { + // Type can be formatted using the %s and %v verbs. %s outputs only the + // identity of the type, without any detail. %v outputs additional detail. + // + // Use the '+' flag to include the address of the type. + // + // Use the width to specify how many levels of detail to output, for example + // %1v will output detail for the root type and a short description of its + // children. %2v would output details of the root type and its children + // as well as a short description of the grandchildren. + fmt.Formatter + + // Name of the type, empty for anonymous types and types that cannot + // carry a name, like Void and Pointer. + TypeName() string + + // Make a copy of the type, without copying Type members. + copy() Type + + // Enumerate all nested Types. Repeated calls must visit nested + // types in the same order. + walk(*typeDeque) +} + +var ( + _ Type = (*Int)(nil) + _ Type = (*Struct)(nil) + _ Type = (*Union)(nil) + _ Type = (*Enum)(nil) + _ Type = (*Fwd)(nil) + _ Type = (*Func)(nil) + _ Type = (*Typedef)(nil) + _ Type = (*Var)(nil) + _ Type = (*Datasec)(nil) + _ Type = (*Float)(nil) +) + +// types is a list of Type. +// +// The order determines the ID of a type. +type types []Type + +func (ts types) ByID(id TypeID) (Type, error) { + if int(id) > len(ts) { + return nil, fmt.Errorf("type ID %d: %w", id, ErrNotFound) + } + return ts[id], nil +} + +// Void is the unit type of BTF. +type Void struct{} + +func (v *Void) Format(fs fmt.State, verb rune) { formatType(fs, verb, v) } +func (v *Void) TypeName() string { return "" } +func (v *Void) size() uint32 { return 0 } +func (v *Void) copy() Type { return (*Void)(nil) } +func (v *Void) walk(*typeDeque) {} + +type IntEncoding byte + +const ( + Signed IntEncoding = 1 << iota + Char + Bool +) + +func (ie IntEncoding) IsSigned() bool { + return ie&Signed != 0 +} + +func (ie IntEncoding) IsChar() bool { + return ie&Char != 0 +} + +func (ie IntEncoding) IsBool() bool { + return ie&Bool != 0 +} + +func (ie IntEncoding) String() string { + switch { + case ie.IsChar() && ie.IsSigned(): + return "char" + case ie.IsChar() && !ie.IsSigned(): + return "uchar" + case ie.IsBool(): + return "bool" + case ie.IsSigned(): + return "signed" + default: + return "unsigned" + } +} + +// Int is an integer of a given length. +// +// See https://www.kernel.org/doc/html/latest/bpf/btf.html#btf-kind-int +type Int struct { + Name string + + // The size of the integer in bytes. + Size uint32 + Encoding IntEncoding +} + +func (i *Int) Format(fs fmt.State, verb rune) { + formatType(fs, verb, i, i.Encoding, "size=", i.Size*8) +} + +func (i *Int) TypeName() string { return i.Name } +func (i *Int) size() uint32 { return i.Size } +func (i *Int) walk(*typeDeque) {} +func (i *Int) copy() Type { + cpy := *i + return &cpy +} + +// Pointer is a pointer to another type. +type Pointer struct { + Target Type +} + +func (p *Pointer) Format(fs fmt.State, verb rune) { + formatType(fs, verb, p, "target=", p.Target) +} + +func (p *Pointer) TypeName() string { return "" } +func (p *Pointer) size() uint32 { return 8 } +func (p *Pointer) walk(tdq *typeDeque) { tdq.push(&p.Target) } +func (p *Pointer) copy() Type { + cpy := *p + return &cpy +} + +// Array is an array with a fixed number of elements. +type Array struct { + Index Type + Type Type + Nelems uint32 +} + +func (arr *Array) Format(fs fmt.State, verb rune) { + formatType(fs, verb, arr, "index=", arr.Index, "type=", arr.Type, "n=", arr.Nelems) +} + +func (arr *Array) TypeName() string { return "" } + +func (arr *Array) walk(tdq *typeDeque) { + tdq.push(&arr.Index) + tdq.push(&arr.Type) +} + +func (arr *Array) copy() Type { + cpy := *arr + return &cpy +} + +// Struct is a compound type of consecutive members. +type Struct struct { + Name string + // The size of the struct including padding, in bytes + Size uint32 + Members []Member +} + +func (s *Struct) Format(fs fmt.State, verb rune) { + formatType(fs, verb, s, "fields=", len(s.Members)) +} + +func (s *Struct) TypeName() string { return s.Name } + +func (s *Struct) size() uint32 { return s.Size } + +func (s *Struct) walk(tdq *typeDeque) { + for i := range s.Members { + tdq.push(&s.Members[i].Type) + } +} + +func (s *Struct) copy() Type { + cpy := *s + cpy.Members = copyMembers(s.Members) + return &cpy +} + +func (s *Struct) members() []Member { + return s.Members +} + +// Union is a compound type where members occupy the same memory. +type Union struct { + Name string + // The size of the union including padding, in bytes. + Size uint32 + Members []Member +} + +func (u *Union) Format(fs fmt.State, verb rune) { + formatType(fs, verb, u, "fields=", len(u.Members)) +} + +func (u *Union) TypeName() string { return u.Name } + +func (u *Union) size() uint32 { return u.Size } + +func (u *Union) walk(tdq *typeDeque) { + for i := range u.Members { + tdq.push(&u.Members[i].Type) + } +} + +func (u *Union) copy() Type { + cpy := *u + cpy.Members = copyMembers(u.Members) + return &cpy +} + +func (u *Union) members() []Member { + return u.Members +} + +func copyMembers(orig []Member) []Member { + cpy := make([]Member, len(orig)) + copy(cpy, orig) + return cpy +} + +type composite interface { + members() []Member +} + +var ( + _ composite = (*Struct)(nil) + _ composite = (*Union)(nil) +) + +// A value in bits. +type Bits uint32 + +// Bytes converts a bit value into bytes. +func (b Bits) Bytes() uint32 { + return uint32(b / 8) +} + +// Member is part of a Struct or Union. +// +// It is not a valid Type. +type Member struct { + Name string + Type Type + Offset Bits + BitfieldSize Bits +} + +// Enum lists possible values. +type Enum struct { + Name string + // Size of the enum value in bytes. + Size uint32 + Values []EnumValue +} + +func (e *Enum) Format(fs fmt.State, verb rune) { + formatType(fs, verb, e, "size=", e.Size, "values=", len(e.Values)) +} + +func (e *Enum) TypeName() string { return e.Name } + +// EnumValue is part of an Enum +// +// Is is not a valid Type +type EnumValue struct { + Name string + Value int32 +} + +func (e *Enum) size() uint32 { return e.Size } +func (e *Enum) walk(*typeDeque) {} +func (e *Enum) copy() Type { + cpy := *e + cpy.Values = make([]EnumValue, len(e.Values)) + copy(cpy.Values, e.Values) + return &cpy +} + +// FwdKind is the type of forward declaration. +type FwdKind int + +// Valid types of forward declaration. +const ( + FwdStruct FwdKind = iota + FwdUnion +) + +func (fk FwdKind) String() string { + switch fk { + case FwdStruct: + return "struct" + case FwdUnion: + return "union" + default: + return fmt.Sprintf("%T(%d)", fk, int(fk)) + } +} + +// Fwd is a forward declaration of a Type. +type Fwd struct { + Name string + Kind FwdKind +} + +func (f *Fwd) Format(fs fmt.State, verb rune) { + formatType(fs, verb, f, f.Kind) +} + +func (f *Fwd) TypeName() string { return f.Name } + +func (f *Fwd) walk(*typeDeque) {} +func (f *Fwd) copy() Type { + cpy := *f + return &cpy +} + +// Typedef is an alias of a Type. +type Typedef struct { + Name string + Type Type +} + +func (td *Typedef) Format(fs fmt.State, verb rune) { + formatType(fs, verb, td, td.Type) +} + +func (td *Typedef) TypeName() string { return td.Name } + +func (td *Typedef) walk(tdq *typeDeque) { tdq.push(&td.Type) } +func (td *Typedef) copy() Type { + cpy := *td + return &cpy +} + +// Volatile is a qualifier. +type Volatile struct { + Type Type +} + +func (v *Volatile) Format(fs fmt.State, verb rune) { + formatType(fs, verb, v, v.Type) +} + +func (v *Volatile) TypeName() string { return "" } + +func (v *Volatile) qualify() Type { return v.Type } +func (v *Volatile) walk(tdq *typeDeque) { tdq.push(&v.Type) } +func (v *Volatile) copy() Type { + cpy := *v + return &cpy +} + +// Const is a qualifier. +type Const struct { + Type Type +} + +func (c *Const) Format(fs fmt.State, verb rune) { + formatType(fs, verb, c, c.Type) +} + +func (c *Const) TypeName() string { return "" } + +func (c *Const) qualify() Type { return c.Type } +func (c *Const) walk(tdq *typeDeque) { tdq.push(&c.Type) } +func (c *Const) copy() Type { + cpy := *c + return &cpy +} + +// Restrict is a qualifier. +type Restrict struct { + Type Type +} + +func (r *Restrict) Format(fs fmt.State, verb rune) { + formatType(fs, verb, r, r.Type) +} + +func (r *Restrict) TypeName() string { return "" } + +func (r *Restrict) qualify() Type { return r.Type } +func (r *Restrict) walk(tdq *typeDeque) { tdq.push(&r.Type) } +func (r *Restrict) copy() Type { + cpy := *r + return &cpy +} + +// Func is a function definition. +type Func struct { + Name string + Type Type + Linkage FuncLinkage +} + +func FuncMetadata(ins *asm.Instruction) *Func { + fn, _ := ins.Metadata.Get(funcInfoMeta{}).(*Func) + return fn +} + +func (f *Func) Format(fs fmt.State, verb rune) { + formatType(fs, verb, f, f.Linkage, "proto=", f.Type) +} + +func (f *Func) TypeName() string { return f.Name } + +func (f *Func) walk(tdq *typeDeque) { tdq.push(&f.Type) } +func (f *Func) copy() Type { + cpy := *f + return &cpy +} + +// FuncProto is a function declaration. +type FuncProto struct { + Return Type + Params []FuncParam +} + +func (fp *FuncProto) Format(fs fmt.State, verb rune) { + formatType(fs, verb, fp, "args=", len(fp.Params), "return=", fp.Return) +} + +func (fp *FuncProto) TypeName() string { return "" } + +func (fp *FuncProto) walk(tdq *typeDeque) { + tdq.push(&fp.Return) + for i := range fp.Params { + tdq.push(&fp.Params[i].Type) + } +} + +func (fp *FuncProto) copy() Type { + cpy := *fp + cpy.Params = make([]FuncParam, len(fp.Params)) + copy(cpy.Params, fp.Params) + return &cpy +} + +type FuncParam struct { + Name string + Type Type +} + +// Var is a global variable. +type Var struct { + Name string + Type Type + Linkage VarLinkage +} + +func (v *Var) Format(fs fmt.State, verb rune) { + formatType(fs, verb, v, v.Linkage) +} + +func (v *Var) TypeName() string { return v.Name } + +func (v *Var) walk(tdq *typeDeque) { tdq.push(&v.Type) } +func (v *Var) copy() Type { + cpy := *v + return &cpy +} + +// Datasec is a global program section containing data. +type Datasec struct { + Name string + Size uint32 + Vars []VarSecinfo +} + +func (ds *Datasec) Format(fs fmt.State, verb rune) { + formatType(fs, verb, ds) +} + +func (ds *Datasec) TypeName() string { return ds.Name } + +func (ds *Datasec) size() uint32 { return ds.Size } + +func (ds *Datasec) walk(tdq *typeDeque) { + for i := range ds.Vars { + tdq.push(&ds.Vars[i].Type) + } +} + +func (ds *Datasec) copy() Type { + cpy := *ds + cpy.Vars = make([]VarSecinfo, len(ds.Vars)) + copy(cpy.Vars, ds.Vars) + return &cpy +} + +// VarSecinfo describes variable in a Datasec. +// +// It is not a valid Type. +type VarSecinfo struct { + Type Type + Offset uint32 + Size uint32 +} + +// Float is a float of a given length. +type Float struct { + Name string + + // The size of the float in bytes. + Size uint32 +} + +func (f *Float) Format(fs fmt.State, verb rune) { + formatType(fs, verb, f, "size=", f.Size*8) +} + +func (f *Float) TypeName() string { return f.Name } +func (f *Float) size() uint32 { return f.Size } +func (f *Float) walk(*typeDeque) {} +func (f *Float) copy() Type { + cpy := *f + return &cpy +} + +// cycle is a type which had to be elided since it exceeded maxTypeDepth. +type cycle struct { + root Type +} + +func (c *cycle) ID() TypeID { return math.MaxUint32 } +func (c *cycle) Format(fs fmt.State, verb rune) { formatType(fs, verb, c, "root=", c.root) } +func (c *cycle) TypeName() string { return "" } +func (c *cycle) walk(*typeDeque) {} +func (c *cycle) copy() Type { + cpy := *c + return &cpy +} + +type sizer interface { + size() uint32 +} + +var ( + _ sizer = (*Int)(nil) + _ sizer = (*Pointer)(nil) + _ sizer = (*Struct)(nil) + _ sizer = (*Union)(nil) + _ sizer = (*Enum)(nil) + _ sizer = (*Datasec)(nil) +) + +type qualifier interface { + qualify() Type +} + +var ( + _ qualifier = (*Const)(nil) + _ qualifier = (*Restrict)(nil) + _ qualifier = (*Volatile)(nil) +) + +// Sizeof returns the size of a type in bytes. +// +// Returns an error if the size can't be computed. +func Sizeof(typ Type) (int, error) { + var ( + n = int64(1) + elem int64 + ) + + for i := 0; i < maxTypeDepth; i++ { + switch v := typ.(type) { + case *Array: + if n > 0 && int64(v.Nelems) > math.MaxInt64/n { + return 0, fmt.Errorf("type %s: overflow", typ) + } + + // Arrays may be of zero length, which allows + // n to be zero as well. + n *= int64(v.Nelems) + typ = v.Type + continue + + case sizer: + elem = int64(v.size()) + + case *Typedef: + typ = v.Type + continue + + case qualifier: + typ = v.qualify() + continue + + default: + return 0, fmt.Errorf("unsized type %T", typ) + } + + if n > 0 && elem > math.MaxInt64/n { + return 0, fmt.Errorf("type %s: overflow", typ) + } + + size := n * elem + if int64(int(size)) != size { + return 0, fmt.Errorf("type %s: overflow", typ) + } + + return int(size), nil + } + + return 0, fmt.Errorf("type %s: exceeded type depth", typ) +} + +// alignof returns the alignment of a type. +// +// Currently only supports the subset of types necessary for bitfield relocations. +func alignof(typ Type) (int, error) { + switch t := UnderlyingType(typ).(type) { + case *Enum: + return int(t.size()), nil + case *Int: + return int(t.Size), nil + default: + return 0, fmt.Errorf("can't calculate alignment of %T", t) + } +} + +// Transformer modifies a given Type and returns the result. +// +// For example, UnderlyingType removes any qualifiers or typedefs from a type. +// See the example on Copy for how to use a transform. +type Transformer func(Type) Type + +// Copy a Type recursively. +// +// typ may form a cycle. If transform is not nil, it is called with the +// to be copied type, and the returned value is copied instead. +func Copy(typ Type, transform Transformer) Type { + copies := make(copier) + copies.copy(&typ, transform) + return typ +} + +// copy a slice of Types recursively. +// +// See Copy for the semantics. +func copyTypes(types []Type, transform Transformer) []Type { + result := make([]Type, len(types)) + copy(result, types) + + copies := make(copier) + for i := range result { + copies.copy(&result[i], transform) + } + + return result +} + +type copier map[Type]Type + +func (c copier) copy(typ *Type, transform Transformer) { + var work typeDeque + for t := typ; t != nil; t = work.pop() { + // *t is the identity of the type. + if cpy := c[*t]; cpy != nil { + *t = cpy + continue + } + + var cpy Type + if transform != nil { + cpy = transform(*t).copy() + } else { + cpy = (*t).copy() + } + + c[*t] = cpy + *t = cpy + + // Mark any nested types for copying. + cpy.walk(&work) + } +} + +// typeDeque keeps track of pointers to types which still +// need to be visited. +type typeDeque struct { + types []*Type + read, write uint64 + mask uint64 +} + +func (dq *typeDeque) empty() bool { + return dq.read == dq.write +} + +// push adds a type to the stack. +func (dq *typeDeque) push(t *Type) { + if dq.write-dq.read < uint64(len(dq.types)) { + dq.types[dq.write&dq.mask] = t + dq.write++ + return + } + + new := len(dq.types) * 2 + if new == 0 { + new = 8 + } + + types := make([]*Type, new) + pivot := dq.read & dq.mask + n := copy(types, dq.types[pivot:]) + n += copy(types[n:], dq.types[:pivot]) + types[n] = t + + dq.types = types + dq.mask = uint64(new) - 1 + dq.read, dq.write = 0, uint64(n+1) +} + +// shift returns the first element or null. +func (dq *typeDeque) shift() *Type { + if dq.empty() { + return nil + } + + index := dq.read & dq.mask + t := dq.types[index] + dq.types[index] = nil + dq.read++ + return t +} + +// pop returns the last element or null. +func (dq *typeDeque) pop() *Type { + if dq.empty() { + return nil + } + + dq.write-- + index := dq.write & dq.mask + t := dq.types[index] + dq.types[index] = nil + return t +} + +// all returns all elements. +// +// The deque is empty after calling this method. +func (dq *typeDeque) all() []*Type { + length := dq.write - dq.read + types := make([]*Type, 0, length) + for t := dq.shift(); t != nil; t = dq.shift() { + types = append(types, t) + } + return types +} + +// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns +// it into a graph of Types connected via pointers. +// +// If baseTypes are provided, then the raw types are +// considered to be of a split BTF (e.g., a kernel module). +// +// Returns a slice of types indexed by TypeID. Since BTF ignores compilation +// units, multiple types may share the same name. A Type may form a cyclic graph +// by pointing at itself. +func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTable) ([]Type, error) { + types := make([]Type, 0, len(rawTypes)+1) // +1 for Void added to base types + + typeIDOffset := TypeID(1) // Void is TypeID(0), so the rest starts from TypeID(1) + + if baseTypes == nil { + // Void is defined to always be type ID 0, and is thus omitted from BTF. + types = append(types, (*Void)(nil)) + } else { + // For split BTF, the next ID is max base BTF type ID + 1 + typeIDOffset = TypeID(len(baseTypes)) + } + + type fixupDef struct { + id TypeID + typ *Type + } + + var fixups []fixupDef + fixup := func(id TypeID, typ *Type) { + if id < TypeID(len(baseTypes)) { + *typ = baseTypes[id] + return + } + + idx := id + if baseTypes != nil { + idx = id - TypeID(len(baseTypes)) + } + if idx < TypeID(len(types)) { + // We've already inflated this type, fix it up immediately. + *typ = types[idx] + return + } + fixups = append(fixups, fixupDef{id, typ}) + } + + type assertion struct { + typ *Type + want reflect.Type + } + + var assertions []assertion + assert := func(typ *Type, want reflect.Type) error { + if *typ != nil { + // The type has already been fixed up, check the type immediately. + if reflect.TypeOf(*typ) != want { + return fmt.Errorf("expected %s, got %T", want, *typ) + } + return nil + } + assertions = append(assertions, assertion{typ, want}) + return nil + } + + type bitfieldFixupDef struct { + id TypeID + m *Member + } + + var ( + legacyBitfields = make(map[TypeID][2]Bits) // offset, size + bitfieldFixups []bitfieldFixupDef + ) + convertMembers := func(raw []btfMember, kindFlag bool) ([]Member, error) { + // NB: The fixup below relies on pre-allocating this array to + // work, since otherwise append might re-allocate members. + members := make([]Member, 0, len(raw)) + for i, btfMember := range raw { + name, err := rawStrings.Lookup(btfMember.NameOff) + if err != nil { + return nil, fmt.Errorf("can't get name for member %d: %w", i, err) + } + + members = append(members, Member{ + Name: name, + Offset: Bits(btfMember.Offset), + }) + + m := &members[i] + fixup(raw[i].Type, &m.Type) + + if kindFlag { + m.BitfieldSize = Bits(btfMember.Offset >> 24) + m.Offset &= 0xffffff + // We ignore legacy bitfield definitions if the current composite + // is a new-style bitfield. This is kind of safe since offset and + // size on the type of the member must be zero if kindFlat is set + // according to spec. + continue + } + + // This may be a legacy bitfield, try to fix it up. + data, ok := legacyBitfields[raw[i].Type] + if ok { + // Bingo! + m.Offset += data[0] + m.BitfieldSize = data[1] + continue + } + + if m.Type != nil { + // We couldn't find a legacy bitfield, but we know that the member's + // type has already been inflated. Hence we know that it can't be + // a legacy bitfield and there is nothing left to do. + continue + } + + // We don't have fixup data, and the type we're pointing + // at hasn't been inflated yet. No choice but to defer + // the fixup. + bitfieldFixups = append(bitfieldFixups, bitfieldFixupDef{ + raw[i].Type, + m, + }) + } + return members, nil + } + + for i, raw := range rawTypes { + var ( + id = typeIDOffset + TypeID(i) + typ Type + ) + + name, err := rawStrings.Lookup(raw.NameOff) + if err != nil { + return nil, fmt.Errorf("get name for type id %d: %w", id, err) + } + + switch raw.Kind() { + case kindInt: + size := raw.Size() + bi := raw.data.(*btfInt) + if bi.Offset() > 0 || bi.Bits().Bytes() != size { + legacyBitfields[id] = [2]Bits{bi.Offset(), bi.Bits()} + } + typ = &Int{name, raw.Size(), bi.Encoding()} + + case kindPointer: + ptr := &Pointer{nil} + fixup(raw.Type(), &ptr.Target) + typ = ptr + + case kindArray: + btfArr := raw.data.(*btfArray) + arr := &Array{nil, nil, btfArr.Nelems} + fixup(btfArr.IndexType, &arr.Index) + fixup(btfArr.Type, &arr.Type) + typ = arr + + case kindStruct: + members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag()) + if err != nil { + return nil, fmt.Errorf("struct %s (id %d): %w", name, id, err) + } + typ = &Struct{name, raw.Size(), members} + + case kindUnion: + members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag()) + if err != nil { + return nil, fmt.Errorf("union %s (id %d): %w", name, id, err) + } + typ = &Union{name, raw.Size(), members} + + case kindEnum: + rawvals := raw.data.([]btfEnum) + vals := make([]EnumValue, 0, len(rawvals)) + for i, btfVal := range rawvals { + name, err := rawStrings.Lookup(btfVal.NameOff) + if err != nil { + return nil, fmt.Errorf("get name for enum value %d: %s", i, err) + } + vals = append(vals, EnumValue{ + Name: name, + Value: btfVal.Val, + }) + } + typ = &Enum{name, raw.Size(), vals} + + case kindForward: + if raw.KindFlag() { + typ = &Fwd{name, FwdUnion} + } else { + typ = &Fwd{name, FwdStruct} + } + + case kindTypedef: + typedef := &Typedef{name, nil} + fixup(raw.Type(), &typedef.Type) + typ = typedef + + case kindVolatile: + volatile := &Volatile{nil} + fixup(raw.Type(), &volatile.Type) + typ = volatile + + case kindConst: + cnst := &Const{nil} + fixup(raw.Type(), &cnst.Type) + typ = cnst + + case kindRestrict: + restrict := &Restrict{nil} + fixup(raw.Type(), &restrict.Type) + typ = restrict + + case kindFunc: + fn := &Func{name, nil, raw.Linkage()} + fixup(raw.Type(), &fn.Type) + if err := assert(&fn.Type, reflect.TypeOf((*FuncProto)(nil))); err != nil { + return nil, err + } + typ = fn + + case kindFuncProto: + rawparams := raw.data.([]btfParam) + params := make([]FuncParam, 0, len(rawparams)) + for i, param := range rawparams { + name, err := rawStrings.Lookup(param.NameOff) + if err != nil { + return nil, fmt.Errorf("get name for func proto parameter %d: %s", i, err) + } + params = append(params, FuncParam{ + Name: name, + }) + } + for i := range params { + fixup(rawparams[i].Type, ¶ms[i].Type) + } + + fp := &FuncProto{nil, params} + fixup(raw.Type(), &fp.Return) + typ = fp + + case kindVar: + variable := raw.data.(*btfVariable) + v := &Var{name, nil, VarLinkage(variable.Linkage)} + fixup(raw.Type(), &v.Type) + typ = v + + case kindDatasec: + btfVars := raw.data.([]btfVarSecinfo) + vars := make([]VarSecinfo, 0, len(btfVars)) + for _, btfVar := range btfVars { + vars = append(vars, VarSecinfo{ + Offset: btfVar.Offset, + Size: btfVar.Size, + }) + } + for i := range vars { + fixup(btfVars[i].Type, &vars[i].Type) + if err := assert(&vars[i].Type, reflect.TypeOf((*Var)(nil))); err != nil { + return nil, err + } + } + typ = &Datasec{name, raw.SizeType, vars} + + case kindFloat: + typ = &Float{name, raw.Size()} + + default: + return nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind()) + } + + types = append(types, typ) + } + + for _, fixup := range fixups { + i := int(fixup.id) + if i >= len(types)+len(baseTypes) { + return nil, fmt.Errorf("reference to invalid type id: %d", fixup.id) + } + if i < len(baseTypes) { + return nil, fmt.Errorf("fixup for base type id %d is not expected", i) + } + + *fixup.typ = types[i-len(baseTypes)] + } + + for _, bitfieldFixup := range bitfieldFixups { + if bitfieldFixup.id < TypeID(len(baseTypes)) { + return nil, fmt.Errorf("bitfield fixup from split to base types is not expected") + } + + data, ok := legacyBitfields[bitfieldFixup.id] + if ok { + // This is indeed a legacy bitfield, fix it up. + bitfieldFixup.m.Offset += data[0] + bitfieldFixup.m.BitfieldSize = data[1] + } + } + + for _, assertion := range assertions { + if reflect.TypeOf(*assertion.typ) != assertion.want { + return nil, fmt.Errorf("expected %s, got %T", assertion.want, *assertion.typ) + } + } + + return types, nil +} + +// essentialName represents the name of a BTF type stripped of any flavor +// suffixes after a ___ delimiter. +type essentialName string + +// newEssentialName returns name without a ___ suffix. +// +// CO-RE has the concept of 'struct flavors', which are used to deal with +// changes in kernel data structures. Anything after three underscores +// in a type name is ignored for the purpose of finding a candidate type +// in the kernel's BTF. +func newEssentialName(name string) essentialName { + if name == "" { + return "" + } + lastIdx := strings.LastIndex(name, "___") + if lastIdx > 0 { + return essentialName(name[:lastIdx]) + } + return essentialName(name) +} + +// UnderlyingType skips qualifiers and Typedefs. +func UnderlyingType(typ Type) Type { + result := typ + for depth := 0; depth <= maxTypeDepth; depth++ { + switch v := (result).(type) { + case qualifier: + result = v.qualify() + case *Typedef: + result = v.Type + default: + return result + } + } + return &cycle{typ} +} + +type formatState struct { + fmt.State + depth int +} + +// formattableType is a subset of Type, to ease unit testing of formatType. +type formattableType interface { + fmt.Formatter + TypeName() string +} + +// formatType formats a type in a canonical form. +// +// Handles cyclical types by only printing cycles up to a certain depth. Elements +// in extra are separated by spaces unless the preceding element is a string +// ending in '='. +func formatType(f fmt.State, verb rune, t formattableType, extra ...interface{}) { + if verb != 'v' && verb != 's' { + fmt.Fprintf(f, "{UNRECOGNIZED: %c}", verb) + return + } + + // This is the same as %T, but elides the package name. Assumes that + // formattableType is implemented by a pointer receiver. + goTypeName := reflect.TypeOf(t).Elem().Name() + _, _ = io.WriteString(f, goTypeName) + + if name := t.TypeName(); name != "" { + // Output BTF type name if present. + fmt.Fprintf(f, ":%q", name) + } + + if f.Flag('+') { + // Output address if requested. + fmt.Fprintf(f, ":%#p", t) + } + + if verb == 's' { + // %s omits details. + return + } + + var depth int + if ps, ok := f.(*formatState); ok { + depth = ps.depth + f = ps.State + } + + maxDepth, ok := f.Width() + if !ok { + maxDepth = 0 + } + + if depth > maxDepth { + // We've reached the maximum depth. This avoids infinite recursion even + // for cyclical types. + return + } + + if len(extra) == 0 { + return + } + + wantSpace := false + _, _ = io.WriteString(f, "[") + for _, arg := range extra { + if wantSpace { + _, _ = io.WriteString(f, " ") + } + + switch v := arg.(type) { + case string: + _, _ = io.WriteString(f, v) + wantSpace = len(v) > 0 && v[len(v)-1] != '=' + continue + + case formattableType: + v.Format(&formatState{f, depth + 1}, verb) + + default: + fmt.Fprint(f, arg) + } + + wantSpace = true + } + _, _ = io.WriteString(f, "]") +} diff --git a/vendor/github.com/cilium/ebpf/collection.go b/vendor/github.com/cilium/ebpf/collection.go new file mode 100644 index 000000000..8c2ddc380 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/collection.go @@ -0,0 +1,772 @@ +package ebpf + +import ( + "encoding/binary" + "errors" + "fmt" + "reflect" + "strings" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/btf" +) + +// CollectionOptions control loading a collection into the kernel. +// +// Maps and Programs are passed to NewMapWithOptions and NewProgramsWithOptions. +type CollectionOptions struct { + Maps MapOptions + Programs ProgramOptions + + // MapReplacements takes a set of Maps that will be used instead of + // creating new ones when loading the CollectionSpec. + // + // For each given Map, there must be a corresponding MapSpec in + // CollectionSpec.Maps, and its type, key/value size, max entries and flags + // must match the values of the MapSpec. + // + // The given Maps are Clone()d before being used in the Collection, so the + // caller can Close() them freely when they are no longer needed. + MapReplacements map[string]*Map +} + +// CollectionSpec describes a collection. +type CollectionSpec struct { + Maps map[string]*MapSpec + Programs map[string]*ProgramSpec + + // Types holds type information about Maps and Programs. + // Modifications to Types are currently undefined behaviour. + Types *btf.Spec + + // ByteOrder specifies whether the ELF was compiled for + // big-endian or little-endian architectures. + ByteOrder binary.ByteOrder +} + +// Copy returns a recursive copy of the spec. +func (cs *CollectionSpec) Copy() *CollectionSpec { + if cs == nil { + return nil + } + + cpy := CollectionSpec{ + Maps: make(map[string]*MapSpec, len(cs.Maps)), + Programs: make(map[string]*ProgramSpec, len(cs.Programs)), + ByteOrder: cs.ByteOrder, + Types: cs.Types, + } + + for name, spec := range cs.Maps { + cpy.Maps[name] = spec.Copy() + } + + for name, spec := range cs.Programs { + cpy.Programs[name] = spec.Copy() + } + + return &cpy +} + +// RewriteMaps replaces all references to specific maps. +// +// Use this function to use pre-existing maps instead of creating new ones +// when calling NewCollection. Any named maps are removed from CollectionSpec.Maps. +// +// Returns an error if a named map isn't used in at least one program. +// +// Deprecated: Pass CollectionOptions.MapReplacements when loading the Collection +// instead. +func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error { + for symbol, m := range maps { + // have we seen a program that uses this symbol / map + seen := false + for progName, progSpec := range cs.Programs { + err := progSpec.Instructions.AssociateMap(symbol, m) + + switch { + case err == nil: + seen = true + + case errors.Is(err, asm.ErrUnreferencedSymbol): + // Not all programs need to use the map + + default: + return fmt.Errorf("program %s: %w", progName, err) + } + } + + if !seen { + return fmt.Errorf("map %s not referenced by any programs", symbol) + } + + // Prevent NewCollection from creating rewritten maps + delete(cs.Maps, symbol) + } + + return nil +} + +// RewriteConstants replaces the value of multiple constants. +// +// The constant must be defined like so in the C program: +// +// volatile const type foobar; +// volatile const type foobar = default; +// +// Replacement values must be of the same length as the C sizeof(type). +// If necessary, they are marshalled according to the same rules as +// map values. +// +// From Linux 5.5 the verifier will use constants to eliminate dead code. +// +// Returns an error if a constant doesn't exist. +func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error { + replaced := make(map[string]bool) + + for name, spec := range cs.Maps { + if !strings.HasPrefix(name, ".rodata") { + continue + } + + b, ds, err := spec.dataSection() + if errors.Is(err, errMapNoBTFValue) { + // Data sections without a BTF Datasec are valid, but don't support + // constant replacements. + continue + } + if err != nil { + return fmt.Errorf("map %s: %w", name, err) + } + + // MapSpec.Copy() performs a shallow copy. Fully copy the byte slice + // to avoid any changes affecting other copies of the MapSpec. + cpy := make([]byte, len(b)) + copy(cpy, b) + + for _, v := range ds.Vars { + vname := v.Type.TypeName() + replacement, ok := consts[vname] + if !ok { + continue + } + + if replaced[vname] { + return fmt.Errorf("section %s: duplicate variable %s", name, vname) + } + + if int(v.Offset+v.Size) > len(cpy) { + return fmt.Errorf("section %s: offset %d(+%d) for variable %s is out of bounds", name, v.Offset, v.Size, vname) + } + + b, err := marshalBytes(replacement, int(v.Size)) + if err != nil { + return fmt.Errorf("marshaling constant replacement %s: %w", vname, err) + } + + copy(cpy[v.Offset:v.Offset+v.Size], b) + + replaced[vname] = true + } + + spec.Contents[0] = MapKV{Key: uint32(0), Value: cpy} + } + + var missing []string + for c := range consts { + if !replaced[c] { + missing = append(missing, c) + } + } + + if len(missing) != 0 { + return fmt.Errorf("spec is missing one or more constants: %s", strings.Join(missing, ",")) + } + + return nil +} + +// Assign the contents of a CollectionSpec to a struct. +// +// This function is a shortcut to manually checking the presence +// of maps and programs in a CollectionSpec. Consider using bpf2go +// if this sounds useful. +// +// 'to' must be a pointer to a struct. A field of the +// struct is updated with values from Programs or Maps if it +// has an `ebpf` tag and its type is *ProgramSpec or *MapSpec. +// The tag's value specifies the name of the program or map as +// found in the CollectionSpec. +// +// struct { +// Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"` +// Bar *ebpf.MapSpec `ebpf:"bar_map"` +// Ignored int +// } +// +// Returns an error if any of the eBPF objects can't be found, or +// if the same MapSpec or ProgramSpec is assigned multiple times. +func (cs *CollectionSpec) Assign(to interface{}) error { + // Assign() only supports assigning ProgramSpecs and MapSpecs, + // so doesn't load any resources into the kernel. + getValue := func(typ reflect.Type, name string) (interface{}, error) { + switch typ { + + case reflect.TypeOf((*ProgramSpec)(nil)): + if p := cs.Programs[name]; p != nil { + return p, nil + } + return nil, fmt.Errorf("missing program %q", name) + + case reflect.TypeOf((*MapSpec)(nil)): + if m := cs.Maps[name]; m != nil { + return m, nil + } + return nil, fmt.Errorf("missing map %q", name) + + default: + return nil, fmt.Errorf("unsupported type %s", typ) + } + } + + return assignValues(to, getValue) +} + +// LoadAndAssign loads Maps and Programs into the kernel and assigns them +// to a struct. +// +// Omitting Map/Program.Close() during application shutdown is an error. +// See the package documentation for details around Map and Program lifecycle. +// +// This function is a shortcut to manually checking the presence +// of maps and programs in a CollectionSpec. Consider using bpf2go +// if this sounds useful. +// +// 'to' must be a pointer to a struct. A field of the struct is updated with +// a Program or Map if it has an `ebpf` tag and its type is *Program or *Map. +// The tag's value specifies the name of the program or map as found in the +// CollectionSpec. Before updating the struct, the requested objects and their +// dependent resources are loaded into the kernel and populated with values if +// specified. +// +// struct { +// Foo *ebpf.Program `ebpf:"xdp_foo"` +// Bar *ebpf.Map `ebpf:"bar_map"` +// Ignored int +// } +// +// opts may be nil. +// +// Returns an error if any of the fields can't be found, or +// if the same Map or Program is assigned multiple times. +func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions) error { + loader, err := newCollectionLoader(cs, opts) + if err != nil { + return err + } + defer loader.close() + + // Support assigning Programs and Maps, lazy-loading the required objects. + assignedMaps := make(map[string]bool) + assignedProgs := make(map[string]bool) + + getValue := func(typ reflect.Type, name string) (interface{}, error) { + switch typ { + + case reflect.TypeOf((*Program)(nil)): + assignedProgs[name] = true + return loader.loadProgram(name) + + case reflect.TypeOf((*Map)(nil)): + assignedMaps[name] = true + return loader.loadMap(name) + + default: + return nil, fmt.Errorf("unsupported type %s", typ) + } + } + + // Load the Maps and Programs requested by the annotated struct. + if err := assignValues(to, getValue); err != nil { + return err + } + + // Populate the requested maps. Has a chance of lazy-loading other dependent maps. + if err := loader.populateMaps(); err != nil { + return err + } + + // Evaluate the loader's objects after all (lazy)loading has taken place. + for n, m := range loader.maps { + switch m.typ { + case ProgramArray: + // Require all lazy-loaded ProgramArrays to be assigned to the given object. + // The kernel empties a ProgramArray once the last user space reference + // to it closes, which leads to failed tail calls. Combined with the library + // closing map fds via GC finalizers this can lead to surprising behaviour. + // Only allow unassigned ProgramArrays when the library hasn't pre-populated + // any entries from static value declarations. At this point, we know the map + // is empty and there's no way for the caller to interact with the map going + // forward. + if !assignedMaps[n] && len(cs.Maps[n].Contents) > 0 { + return fmt.Errorf("ProgramArray %s must be assigned to prevent missed tail calls", n) + } + } + } + + // Prevent loader.cleanup() from closing assigned Maps and Programs. + for m := range assignedMaps { + delete(loader.maps, m) + } + for p := range assignedProgs { + delete(loader.programs, p) + } + + return nil +} + +// Collection is a collection of Programs and Maps associated +// with their symbols +type Collection struct { + Programs map[string]*Program + Maps map[string]*Map +} + +// NewCollection creates a Collection from the given spec, creating and +// loading its declared resources into the kernel. +// +// Omitting Collection.Close() during application shutdown is an error. +// See the package documentation for details around Map and Program lifecycle. +func NewCollection(spec *CollectionSpec) (*Collection, error) { + return NewCollectionWithOptions(spec, CollectionOptions{}) +} + +// NewCollectionWithOptions creates a Collection from the given spec using +// options, creating and loading its declared resources into the kernel. +// +// Omitting Collection.Close() during application shutdown is an error. +// See the package documentation for details around Map and Program lifecycle. +func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) { + loader, err := newCollectionLoader(spec, &opts) + if err != nil { + return nil, err + } + defer loader.close() + + // Create maps first, as their fds need to be linked into programs. + for mapName := range spec.Maps { + if _, err := loader.loadMap(mapName); err != nil { + return nil, err + } + } + + for progName, prog := range spec.Programs { + if prog.Type == UnspecifiedProgram { + continue + } + + if _, err := loader.loadProgram(progName); err != nil { + return nil, err + } + } + + // Maps can contain Program and Map stubs, so populate them after + // all Maps and Programs have been successfully loaded. + if err := loader.populateMaps(); err != nil { + return nil, err + } + + // Prevent loader.cleanup from closing maps and programs. + maps, progs := loader.maps, loader.programs + loader.maps, loader.programs = nil, nil + + return &Collection{ + progs, + maps, + }, nil +} + +type handleCache struct { + btfHandles map[*btf.Spec]*btf.Handle +} + +func newHandleCache() *handleCache { + return &handleCache{ + btfHandles: make(map[*btf.Spec]*btf.Handle), + } +} + +func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) { + if hc.btfHandles[spec] != nil { + return hc.btfHandles[spec], nil + } + + handle, err := btf.NewHandle(spec) + if err != nil { + return nil, err + } + + hc.btfHandles[spec] = handle + return handle, nil +} + +func (hc handleCache) close() { + for _, handle := range hc.btfHandles { + handle.Close() + } +} + +type collectionLoader struct { + coll *CollectionSpec + opts *CollectionOptions + maps map[string]*Map + programs map[string]*Program + handles *handleCache +} + +func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collectionLoader, error) { + if opts == nil { + opts = &CollectionOptions{} + } + + // Check for existing MapSpecs in the CollectionSpec for all provided replacement maps. + for name, m := range opts.MapReplacements { + spec, ok := coll.Maps[name] + if !ok { + return nil, fmt.Errorf("replacement map %s not found in CollectionSpec", name) + } + + if err := spec.checkCompatibility(m); err != nil { + return nil, fmt.Errorf("using replacement map %s: %w", spec.Name, err) + } + } + + return &collectionLoader{ + coll, + opts, + make(map[string]*Map), + make(map[string]*Program), + newHandleCache(), + }, nil +} + +// close all resources left over in the collectionLoader. +func (cl *collectionLoader) close() { + cl.handles.close() + for _, m := range cl.maps { + m.Close() + } + for _, p := range cl.programs { + p.Close() + } +} + +func (cl *collectionLoader) loadMap(mapName string) (*Map, error) { + if m := cl.maps[mapName]; m != nil { + return m, nil + } + + mapSpec := cl.coll.Maps[mapName] + if mapSpec == nil { + return nil, fmt.Errorf("missing map %s", mapName) + } + + if mapSpec.BTF != nil && cl.coll.Types != mapSpec.BTF { + return nil, fmt.Errorf("map %s: BTF doesn't match collection", mapName) + } + + if replaceMap, ok := cl.opts.MapReplacements[mapName]; ok { + // Clone the map to avoid closing user's map later on. + m, err := replaceMap.Clone() + if err != nil { + return nil, err + } + + cl.maps[mapName] = m + return m, nil + } + + m, err := newMapWithOptions(mapSpec, cl.opts.Maps, cl.handles) + if err != nil { + return nil, fmt.Errorf("map %s: %w", mapName, err) + } + + cl.maps[mapName] = m + return m, nil +} + +func (cl *collectionLoader) loadProgram(progName string) (*Program, error) { + if prog := cl.programs[progName]; prog != nil { + return prog, nil + } + + progSpec := cl.coll.Programs[progName] + if progSpec == nil { + return nil, fmt.Errorf("unknown program %s", progName) + } + + // Bail out early if we know the kernel is going to reject the program. + // This skips loading map dependencies, saving some cleanup work later. + if progSpec.Type == UnspecifiedProgram { + return nil, fmt.Errorf("cannot load program %s: program type is unspecified", progName) + } + + if progSpec.BTF != nil && cl.coll.Types != progSpec.BTF { + return nil, fmt.Errorf("program %s: BTF doesn't match collection", progName) + } + + progSpec = progSpec.Copy() + + // Rewrite any reference to a valid map in the program's instructions, + // which includes all of its dependencies. + for i := range progSpec.Instructions { + ins := &progSpec.Instructions[i] + + if !ins.IsLoadFromMap() || ins.Reference() == "" { + continue + } + + // Don't overwrite map loads containing non-zero map fd's, + // they can be manually included by the caller. + // Map FDs/IDs are placed in the lower 32 bits of Constant. + if int32(ins.Constant) > 0 { + continue + } + + m, err := cl.loadMap(ins.Reference()) + if err != nil { + return nil, fmt.Errorf("program %s: %w", progName, err) + } + + if err := ins.AssociateMap(m); err != nil { + return nil, fmt.Errorf("program %s: map %s: %w", progName, ins.Reference(), err) + } + } + + prog, err := newProgramWithOptions(progSpec, cl.opts.Programs, cl.handles) + if err != nil { + return nil, fmt.Errorf("program %s: %w", progName, err) + } + + cl.programs[progName] = prog + return prog, nil +} + +func (cl *collectionLoader) populateMaps() error { + for mapName, m := range cl.maps { + mapSpec, ok := cl.coll.Maps[mapName] + if !ok { + return fmt.Errorf("missing map spec %s", mapName) + } + + mapSpec = mapSpec.Copy() + + // MapSpecs that refer to inner maps or programs within the same + // CollectionSpec do so using strings. These strings are used as the key + // to look up the respective object in the Maps or Programs fields. + // Resolve those references to actual Map or Program resources that + // have been loaded into the kernel. + for i, kv := range mapSpec.Contents { + if objName, ok := kv.Value.(string); ok { + switch mapSpec.Type { + case ProgramArray: + // loadProgram is idempotent and could return an existing Program. + prog, err := cl.loadProgram(objName) + if err != nil { + return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err) + } + mapSpec.Contents[i] = MapKV{kv.Key, prog} + + case ArrayOfMaps, HashOfMaps: + // loadMap is idempotent and could return an existing Map. + innerMap, err := cl.loadMap(objName) + if err != nil { + return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err) + } + mapSpec.Contents[i] = MapKV{kv.Key, innerMap} + } + } + } + + // Populate and freeze the map if specified. + if err := m.finalize(mapSpec); err != nil { + return fmt.Errorf("populating map %s: %w", mapName, err) + } + } + + return nil +} + +// LoadCollection reads an object file and creates and loads its declared +// resources into the kernel. +// +// Omitting Collection.Close() during application shutdown is an error. +// See the package documentation for details around Map and Program lifecycle. +func LoadCollection(file string) (*Collection, error) { + spec, err := LoadCollectionSpec(file) + if err != nil { + return nil, err + } + return NewCollection(spec) +} + +// Close frees all maps and programs associated with the collection. +// +// The collection mustn't be used afterwards. +func (coll *Collection) Close() { + for _, prog := range coll.Programs { + prog.Close() + } + for _, m := range coll.Maps { + m.Close() + } +} + +// DetachMap removes the named map from the Collection. +// +// This means that a later call to Close() will not affect this map. +// +// Returns nil if no map of that name exists. +func (coll *Collection) DetachMap(name string) *Map { + m := coll.Maps[name] + delete(coll.Maps, name) + return m +} + +// DetachProgram removes the named program from the Collection. +// +// This means that a later call to Close() will not affect this program. +// +// Returns nil if no program of that name exists. +func (coll *Collection) DetachProgram(name string) *Program { + p := coll.Programs[name] + delete(coll.Programs, name) + return p +} + +// structField represents a struct field containing the ebpf struct tag. +type structField struct { + reflect.StructField + value reflect.Value +} + +// ebpfFields extracts field names tagged with 'ebpf' from a struct type. +// Keep track of visited types to avoid infinite recursion. +func ebpfFields(structVal reflect.Value, visited map[reflect.Type]bool) ([]structField, error) { + if visited == nil { + visited = make(map[reflect.Type]bool) + } + + structType := structVal.Type() + if structType.Kind() != reflect.Struct { + return nil, fmt.Errorf("%s is not a struct", structType) + } + + if visited[structType] { + return nil, fmt.Errorf("recursion on type %s", structType) + } + + fields := make([]structField, 0, structType.NumField()) + for i := 0; i < structType.NumField(); i++ { + field := structField{structType.Field(i), structVal.Field(i)} + + // If the field is tagged, gather it and move on. + name := field.Tag.Get("ebpf") + if name != "" { + fields = append(fields, field) + continue + } + + // If the field does not have an ebpf tag, but is a struct or a pointer + // to a struct, attempt to gather its fields as well. + var v reflect.Value + switch field.Type.Kind() { + case reflect.Ptr: + if field.Type.Elem().Kind() != reflect.Struct { + continue + } + + if field.value.IsNil() { + return nil, fmt.Errorf("nil pointer to %s", structType) + } + + // Obtain the destination type of the pointer. + v = field.value.Elem() + + case reflect.Struct: + // Reference the value's type directly. + v = field.value + + default: + continue + } + + inner, err := ebpfFields(v, visited) + if err != nil { + return nil, fmt.Errorf("field %s: %w", field.Name, err) + } + + fields = append(fields, inner...) + } + + return fields, nil +} + +// assignValues attempts to populate all fields of 'to' tagged with 'ebpf'. +// +// getValue is called for every tagged field of 'to' and must return the value +// to be assigned to the field with the given typ and name. +func assignValues(to interface{}, + getValue func(typ reflect.Type, name string) (interface{}, error)) error { + + toValue := reflect.ValueOf(to) + if toValue.Type().Kind() != reflect.Ptr { + return fmt.Errorf("%T is not a pointer to struct", to) + } + + if toValue.IsNil() { + return fmt.Errorf("nil pointer to %T", to) + } + + fields, err := ebpfFields(toValue.Elem(), nil) + if err != nil { + return err + } + + type elem struct { + // Either *Map or *Program + typ reflect.Type + name string + } + + assigned := make(map[elem]string) + for _, field := range fields { + // Get string value the field is tagged with. + tag := field.Tag.Get("ebpf") + if strings.Contains(tag, ",") { + return fmt.Errorf("field %s: ebpf tag contains a comma", field.Name) + } + + // Check if the eBPF object with the requested + // type and tag was already assigned elsewhere. + e := elem{field.Type, tag} + if af := assigned[e]; af != "" { + return fmt.Errorf("field %s: object %q was already assigned to %s", field.Name, tag, af) + } + + // Get the eBPF object referred to by the tag. + value, err := getValue(field.Type, tag) + if err != nil { + return fmt.Errorf("field %s: %w", field.Name, err) + } + + if !field.value.CanSet() { + return fmt.Errorf("field %s: can't set value", field.Name) + } + field.value.Set(reflect.ValueOf(value)) + + assigned[e] = field.Name + } + + return nil +} diff --git a/vendor/github.com/cilium/ebpf/doc.go b/vendor/github.com/cilium/ebpf/doc.go new file mode 100644 index 000000000..396b3394d --- /dev/null +++ b/vendor/github.com/cilium/ebpf/doc.go @@ -0,0 +1,25 @@ +// Package ebpf is a toolkit for working with eBPF programs. +// +// eBPF programs are small snippets of code which are executed directly +// in a VM in the Linux kernel, which makes them very fast and flexible. +// Many Linux subsystems now accept eBPF programs. This makes it possible +// to implement highly application specific logic inside the kernel, +// without having to modify the actual kernel itself. +// +// This package is designed for long-running processes which +// want to use eBPF to implement part of their application logic. It has no +// run-time dependencies outside of the library and the Linux kernel itself. +// eBPF code should be compiled ahead of time using clang, and shipped with +// your application as any other resource. +// +// Use the link subpackage to attach a loaded program to a hook in the kernel. +// +// Note that losing all references to Map and Program resources will cause +// their underlying file descriptors to be closed, potentially removing those +// objects from the kernel. Always retain a reference by e.g. deferring a +// Close() of a Collection or LoadAndAssign object until application exit. +// +// Special care needs to be taken when handling maps of type ProgramArray, +// as the kernel erases its contents when the last userspace or bpffs +// reference disappears, regardless of the map being in active use. +package ebpf diff --git a/vendor/github.com/cilium/ebpf/elf_reader.go b/vendor/github.com/cilium/ebpf/elf_reader.go new file mode 100644 index 000000000..df278895c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/elf_reader.go @@ -0,0 +1,1197 @@ +package ebpf + +import ( + "bufio" + "bytes" + "debug/elf" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "os" + "strings" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/unix" +) + +// elfCode is a convenience to reduce the amount of arguments that have to +// be passed around explicitly. You should treat its contents as immutable. +type elfCode struct { + *internal.SafeELFFile + sections map[elf.SectionIndex]*elfSection + license string + version uint32 + btf *btf.Spec + extInfo *btf.ExtInfos +} + +// LoadCollectionSpec parses an ELF file into a CollectionSpec. +func LoadCollectionSpec(file string) (*CollectionSpec, error) { + f, err := os.Open(file) + if err != nil { + return nil, err + } + defer f.Close() + + spec, err := LoadCollectionSpecFromReader(f) + if err != nil { + return nil, fmt.Errorf("file %s: %w", file, err) + } + return spec, nil +} + +// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec. +func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) { + f, err := internal.NewSafeELFFile(rd) + if err != nil { + return nil, err + } + + var ( + licenseSection *elf.Section + versionSection *elf.Section + sections = make(map[elf.SectionIndex]*elfSection) + relSections = make(map[elf.SectionIndex]*elf.Section) + ) + + // This is the target of relocations generated by inline assembly. + sections[elf.SHN_UNDEF] = newElfSection(new(elf.Section), undefSection) + + // Collect all the sections we're interested in. This includes relocations + // which we parse later. + for i, sec := range f.Sections { + idx := elf.SectionIndex(i) + + switch { + case strings.HasPrefix(sec.Name, "license"): + licenseSection = sec + case strings.HasPrefix(sec.Name, "version"): + versionSection = sec + case strings.HasPrefix(sec.Name, "maps"): + sections[idx] = newElfSection(sec, mapSection) + case sec.Name == ".maps": + sections[idx] = newElfSection(sec, btfMapSection) + case sec.Name == ".bss" || sec.Name == ".data" || strings.HasPrefix(sec.Name, ".rodata"): + sections[idx] = newElfSection(sec, dataSection) + case sec.Type == elf.SHT_REL: + // Store relocations under the section index of the target + relSections[elf.SectionIndex(sec.Info)] = sec + case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0: + sections[idx] = newElfSection(sec, programSection) + } + } + + license, err := loadLicense(licenseSection) + if err != nil { + return nil, fmt.Errorf("load license: %w", err) + } + + version, err := loadVersion(versionSection, f.ByteOrder) + if err != nil { + return nil, fmt.Errorf("load version: %w", err) + } + + btfSpec, btfExtInfo, err := btf.LoadSpecAndExtInfosFromReader(rd) + if err != nil && !errors.Is(err, btf.ErrNotFound) { + return nil, fmt.Errorf("load BTF: %w", err) + } + + ec := &elfCode{ + SafeELFFile: f, + sections: sections, + license: license, + version: version, + btf: btfSpec, + extInfo: btfExtInfo, + } + + symbols, err := f.Symbols() + if err != nil { + return nil, fmt.Errorf("load symbols: %v", err) + } + + ec.assignSymbols(symbols) + + if err := ec.loadRelocations(relSections, symbols); err != nil { + return nil, fmt.Errorf("load relocations: %w", err) + } + + // Collect all the various ways to define maps. + maps := make(map[string]*MapSpec) + if err := ec.loadMaps(maps); err != nil { + return nil, fmt.Errorf("load maps: %w", err) + } + + if err := ec.loadBTFMaps(maps); err != nil { + return nil, fmt.Errorf("load BTF maps: %w", err) + } + + if err := ec.loadDataSections(maps); err != nil { + return nil, fmt.Errorf("load data sections: %w", err) + } + + // Finally, collect programs and link them. + progs, err := ec.loadProgramSections() + if err != nil { + return nil, fmt.Errorf("load programs: %w", err) + } + + return &CollectionSpec{maps, progs, btfSpec, ec.ByteOrder}, nil +} + +func loadLicense(sec *elf.Section) (string, error) { + if sec == nil { + return "", nil + } + + data, err := sec.Data() + if err != nil { + return "", fmt.Errorf("section %s: %v", sec.Name, err) + } + return string(bytes.TrimRight(data, "\000")), nil +} + +func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) { + if sec == nil { + return 0, nil + } + + var version uint32 + if err := binary.Read(sec.Open(), bo, &version); err != nil { + return 0, fmt.Errorf("section %s: %v", sec.Name, err) + } + return version, nil +} + +type elfSectionKind int + +const ( + undefSection elfSectionKind = iota + mapSection + btfMapSection + programSection + dataSection +) + +type elfSection struct { + *elf.Section + kind elfSectionKind + // Offset from the start of the section to a symbol + symbols map[uint64]elf.Symbol + // Offset from the start of the section to a relocation, which points at + // a symbol in another section. + relocations map[uint64]elf.Symbol + // The number of relocations pointing at this section. + references int +} + +func newElfSection(section *elf.Section, kind elfSectionKind) *elfSection { + return &elfSection{ + section, + kind, + make(map[uint64]elf.Symbol), + make(map[uint64]elf.Symbol), + 0, + } +} + +// assignSymbols takes a list of symbols and assigns them to their +// respective sections, indexed by name. +func (ec *elfCode) assignSymbols(symbols []elf.Symbol) { + for _, symbol := range symbols { + symType := elf.ST_TYPE(symbol.Info) + symSection := ec.sections[symbol.Section] + if symSection == nil { + continue + } + + // Anonymous symbols only occur in debug sections which we don't process + // relocations for. Anonymous symbols are not referenced from other sections. + if symbol.Name == "" { + continue + } + + // Older versions of LLVM don't tag symbols correctly, so keep + // all NOTYPE ones. + switch symSection.kind { + case mapSection, btfMapSection, dataSection: + if symType != elf.STT_NOTYPE && symType != elf.STT_OBJECT { + continue + } + case programSection: + if symType != elf.STT_NOTYPE && symType != elf.STT_FUNC { + continue + } + // LLVM emits LBB_ (Local Basic Block) symbols that seem to be jump + // targets within sections, but BPF has no use for them. + if symType == elf.STT_NOTYPE && elf.ST_BIND(symbol.Info) == elf.STB_LOCAL && + strings.HasPrefix(symbol.Name, "LBB") { + continue + } + // Only collect symbols that occur in program/maps/data sections. + default: + continue + } + + symSection.symbols[symbol.Value] = symbol + } +} + +// loadRelocations iterates .rel* sections and extracts relocation entries for +// sections of interest. Makes sure relocations point at valid sections. +func (ec *elfCode) loadRelocations(relSections map[elf.SectionIndex]*elf.Section, symbols []elf.Symbol) error { + for idx, relSection := range relSections { + section := ec.sections[idx] + if section == nil { + continue + } + + rels, err := ec.loadSectionRelocations(relSection, symbols) + if err != nil { + return fmt.Errorf("relocation for section %q: %w", section.Name, err) + } + + for _, rel := range rels { + target := ec.sections[rel.Section] + if target == nil { + return fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported) + } + + if target.Flags&elf.SHF_STRINGS > 0 { + return fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported) + } + + target.references++ + } + + section.relocations = rels + } + + return nil +} + +// loadProgramSections iterates ec's sections and emits a ProgramSpec +// for each function it finds. +// +// The resulting map is indexed by function name. +func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) { + + progs := make(map[string]*ProgramSpec) + + // Generate a ProgramSpec for each function found in each program section. + var export []string + for _, sec := range ec.sections { + if sec.kind != programSection { + continue + } + + if len(sec.symbols) == 0 { + return nil, fmt.Errorf("section %v: missing symbols", sec.Name) + } + + funcs, err := ec.loadFunctions(sec) + if err != nil { + return nil, fmt.Errorf("section %v: %w", sec.Name, err) + } + + progType, attachType, progFlags, attachTo := getProgType(sec.Name) + + for name, insns := range funcs { + spec := &ProgramSpec{ + Name: name, + Type: progType, + Flags: progFlags, + AttachType: attachType, + AttachTo: attachTo, + SectionName: sec.Name, + License: ec.license, + KernelVersion: ec.version, + Instructions: insns, + ByteOrder: ec.ByteOrder, + BTF: ec.btf, + } + + // Function names must be unique within a single ELF blob. + if progs[name] != nil { + return nil, fmt.Errorf("duplicate program name %s", name) + } + progs[name] = spec + + if spec.SectionName != ".text" { + export = append(export, name) + } + } + } + + flattenPrograms(progs, export) + + // Hide programs (e.g. library functions) that were not explicitly emitted + // to an ELF section. These could be exposed in a separate CollectionSpec + // field later to allow them to be modified. + for n, p := range progs { + if p.SectionName == ".text" { + delete(progs, n) + } + } + + return progs, nil +} + +// loadFunctions extracts instruction streams from the given program section +// starting at each symbol in the section. The section's symbols must already +// be narrowed down to STT_NOTYPE (emitted by clang <8) or STT_FUNC. +// +// The resulting map is indexed by function name. +func (ec *elfCode) loadFunctions(section *elfSection) (map[string]asm.Instructions, error) { + r := bufio.NewReader(section.Open()) + + // Decode the section's instruction stream. + var insns asm.Instructions + if err := insns.Unmarshal(r, ec.ByteOrder); err != nil { + return nil, fmt.Errorf("decoding instructions for section %s: %w", section.Name, err) + } + if len(insns) == 0 { + return nil, fmt.Errorf("no instructions found in section %s", section.Name) + } + + iter := insns.Iterate() + for iter.Next() { + ins := iter.Ins + offset := iter.Offset.Bytes() + + // Tag Symbol Instructions. + if sym, ok := section.symbols[offset]; ok { + *ins = ins.WithSymbol(sym.Name) + } + + // Apply any relocations for the current instruction. + // If no relocation is present, resolve any section-relative function calls. + if rel, ok := section.relocations[offset]; ok { + if err := ec.relocateInstruction(ins, rel); err != nil { + return nil, fmt.Errorf("offset %d: relocating instruction: %w", offset, err) + } + } else { + if err := referenceRelativeJump(ins, offset, section.symbols); err != nil { + return nil, fmt.Errorf("offset %d: resolving relative jump: %w", offset, err) + } + } + } + + if ec.extInfo != nil { + ec.extInfo.Assign(insns, section.Name) + } + + return splitSymbols(insns) +} + +// referenceRelativeJump turns a relative jump to another bpf subprogram within +// the same ELF section into a Reference Instruction. +// +// Up to LLVM 9, calls to subprograms within the same ELF section are sometimes +// encoded using relative jumps instead of relocation entries. These jumps go +// out of bounds of the current program, so their targets must be memoized +// before the section's instruction stream is split. +// +// The relative jump Constant is blinded to -1 and the target Symbol is set as +// the Instruction's Reference so it can be resolved by the linker. +func referenceRelativeJump(ins *asm.Instruction, offset uint64, symbols map[uint64]elf.Symbol) error { + if !ins.IsFunctionReference() || ins.Constant == -1 { + return nil + } + + tgt := jumpTarget(offset, *ins) + sym := symbols[tgt].Name + if sym == "" { + return fmt.Errorf("no jump target found at offset %d", tgt) + } + + *ins = ins.WithReference(sym) + ins.Constant = -1 + + return nil +} + +// jumpTarget takes ins' offset within an instruction stream (in bytes) +// and returns its absolute jump destination (in bytes) within the +// instruction stream. +func jumpTarget(offset uint64, ins asm.Instruction) uint64 { + // A relative jump instruction describes the amount of raw BPF instructions + // to jump, convert the offset into bytes. + dest := ins.Constant * asm.InstructionSize + + // The starting point of the jump is the end of the current instruction. + dest += int64(offset + asm.InstructionSize) + + if dest < 0 { + return 0 + } + + return uint64(dest) +} + +func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error { + var ( + typ = elf.ST_TYPE(rel.Info) + bind = elf.ST_BIND(rel.Info) + name = rel.Name + ) + + target := ec.sections[rel.Section] + + switch target.kind { + case mapSection, btfMapSection: + if bind != elf.STB_GLOBAL { + return fmt.Errorf("possible erroneous static qualifier on map definition: found reference to %q", name) + } + + if typ != elf.STT_OBJECT && typ != elf.STT_NOTYPE { + // STT_NOTYPE is generated on clang < 8 which doesn't tag + // relocations appropriately. + return fmt.Errorf("map load: incorrect relocation type %v", typ) + } + + ins.Src = asm.PseudoMapFD + + case dataSection: + var offset uint32 + switch typ { + case elf.STT_SECTION: + if bind != elf.STB_LOCAL { + return fmt.Errorf("direct load: %s: unsupported section relocation %s", name, bind) + } + + // This is really a reference to a static symbol, which clang doesn't + // emit a symbol table entry for. Instead it encodes the offset in + // the instruction itself. + offset = uint32(uint64(ins.Constant)) + + case elf.STT_OBJECT: + // LLVM 9 emits OBJECT-LOCAL symbols for anonymous constants. + if bind != elf.STB_GLOBAL && bind != elf.STB_LOCAL { + return fmt.Errorf("direct load: %s: unsupported object relocation %s", name, bind) + } + + offset = uint32(rel.Value) + + case elf.STT_NOTYPE: + // LLVM 7 emits NOTYPE-LOCAL symbols for anonymous constants. + if bind != elf.STB_LOCAL { + return fmt.Errorf("direct load: %s: unsupported untyped relocation %s", name, bind) + } + + offset = uint32(rel.Value) + + default: + return fmt.Errorf("incorrect relocation type %v for direct map load", typ) + } + + // We rely on using the name of the data section as the reference. It + // would be nicer to keep the real name in case of an STT_OBJECT, but + // it's not clear how to encode that into Instruction. + name = target.Name + + // The kernel expects the offset in the second basic BPF instruction. + ins.Constant = int64(uint64(offset) << 32) + ins.Src = asm.PseudoMapValue + + case programSection: + switch opCode := ins.OpCode; { + case opCode.JumpOp() == asm.Call: + if ins.Src != asm.PseudoCall { + return fmt.Errorf("call: %s: incorrect source register", name) + } + + switch typ { + case elf.STT_NOTYPE, elf.STT_FUNC: + if bind != elf.STB_GLOBAL { + return fmt.Errorf("call: %s: unsupported binding: %s", name, bind) + } + + case elf.STT_SECTION: + if bind != elf.STB_LOCAL { + return fmt.Errorf("call: %s: unsupported binding: %s", name, bind) + } + + // The function we want to call is in the indicated section, + // at the offset encoded in the instruction itself. Reverse + // the calculation to find the real function we're looking for. + // A value of -1 references the first instruction in the section. + offset := int64(int32(ins.Constant)+1) * asm.InstructionSize + sym, ok := target.symbols[uint64(offset)] + if !ok { + return fmt.Errorf("call: no symbol at offset %d", offset) + } + + name = sym.Name + ins.Constant = -1 + + default: + return fmt.Errorf("call: %s: invalid symbol type %s", name, typ) + } + case opCode.IsDWordLoad(): + switch typ { + case elf.STT_FUNC: + if bind != elf.STB_GLOBAL { + return fmt.Errorf("load: %s: unsupported binding: %s", name, bind) + } + + case elf.STT_SECTION: + if bind != elf.STB_LOCAL { + return fmt.Errorf("load: %s: unsupported binding: %s", name, bind) + } + + // ins.Constant already contains the offset in bytes from the + // start of the section. This is different than a call to a + // static function. + + default: + return fmt.Errorf("load: %s: invalid symbol type %s", name, typ) + } + + sym, ok := target.symbols[uint64(ins.Constant)] + if !ok { + return fmt.Errorf("load: no symbol at offset %d", ins.Constant) + } + + name = sym.Name + ins.Constant = -1 + ins.Src = asm.PseudoFunc + + default: + return fmt.Errorf("neither a call nor a load instruction: %v", ins) + } + + case undefSection: + if bind != elf.STB_GLOBAL { + return fmt.Errorf("asm relocation: %s: unsupported binding: %s", name, bind) + } + + if typ != elf.STT_NOTYPE { + return fmt.Errorf("asm relocation: %s: unsupported type %s", name, typ) + } + + // There is nothing to do here but set ins.Reference. + + default: + return fmt.Errorf("relocation to %q: %w", target.Name, ErrNotSupported) + } + + *ins = ins.WithReference(name) + return nil +} + +func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error { + for _, sec := range ec.sections { + if sec.kind != mapSection { + continue + } + + nSym := len(sec.symbols) + if nSym == 0 { + return fmt.Errorf("section %v: no symbols", sec.Name) + } + + if sec.Size%uint64(nSym) != 0 { + return fmt.Errorf("section %v: map descriptors are not of equal size", sec.Name) + } + + var ( + r = bufio.NewReader(sec.Open()) + size = sec.Size / uint64(nSym) + ) + for i, offset := 0, uint64(0); i < nSym; i, offset = i+1, offset+size { + mapSym, ok := sec.symbols[offset] + if !ok { + return fmt.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset) + } + + mapName := mapSym.Name + if maps[mapName] != nil { + return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym) + } + + lr := io.LimitReader(r, int64(size)) + + spec := MapSpec{ + Name: SanitizeName(mapName, -1), + } + switch { + case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil: + return fmt.Errorf("map %s: missing type", mapName) + case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil: + return fmt.Errorf("map %s: missing key size", mapName) + case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil: + return fmt.Errorf("map %s: missing value size", mapName) + case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil: + return fmt.Errorf("map %s: missing max entries", mapName) + case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil: + return fmt.Errorf("map %s: missing flags", mapName) + } + + extra, err := io.ReadAll(lr) + if err != nil { + return fmt.Errorf("map %s: reading map tail: %w", mapName, err) + } + if len(extra) > 0 { + spec.Extra = bytes.NewReader(extra) + } + + if err := spec.clampPerfEventArraySize(); err != nil { + return fmt.Errorf("map %s: %w", mapName, err) + } + + maps[mapName] = &spec + } + } + + return nil +} + +// loadBTFMaps iterates over all ELF sections marked as BTF map sections +// (like .maps) and parses them into MapSpecs. Dump the .maps section and +// any relocations with `readelf -x .maps -r `. +func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error { + for _, sec := range ec.sections { + if sec.kind != btfMapSection { + continue + } + + if ec.btf == nil { + return fmt.Errorf("missing BTF") + } + + // Each section must appear as a DataSec in the ELF's BTF blob. + var ds *btf.Datasec + if err := ec.btf.TypeByName(sec.Name, &ds); err != nil { + return fmt.Errorf("cannot find section '%s' in BTF: %w", sec.Name, err) + } + + // Open a Reader to the ELF's raw section bytes so we can assert that all + // of them are zero on a per-map (per-Var) basis. For now, the section's + // sole purpose is to receive relocations, so all must be zero. + rs := sec.Open() + + for _, vs := range ds.Vars { + // BPF maps are declared as and assigned to global variables, + // so iterate over each Var in the DataSec and validate their types. + v, ok := vs.Type.(*btf.Var) + if !ok { + return fmt.Errorf("section %v: unexpected type %s", sec.Name, vs.Type) + } + name := string(v.Name) + + // The BTF metadata for each Var contains the full length of the map + // declaration, so read the corresponding amount of bytes from the ELF. + // This way, we can pinpoint which map declaration contains unexpected + // (and therefore unsupported) data. + _, err := io.Copy(internal.DiscardZeroes{}, io.LimitReader(rs, int64(vs.Size))) + if err != nil { + return fmt.Errorf("section %v: map %s: initializing BTF map definitions: %w", sec.Name, name, internal.ErrNotSupported) + } + + if maps[name] != nil { + return fmt.Errorf("section %v: map %s already exists", sec.Name, name) + } + + // Each Var representing a BTF map definition contains a Struct. + mapStruct, ok := v.Type.(*btf.Struct) + if !ok { + return fmt.Errorf("expected struct, got %s", v.Type) + } + + mapSpec, err := mapSpecFromBTF(sec, &vs, mapStruct, ec.btf, name, false) + if err != nil { + return fmt.Errorf("map %v: %w", name, err) + } + + if err := mapSpec.clampPerfEventArraySize(); err != nil { + return fmt.Errorf("map %v: %w", name, err) + } + + maps[name] = mapSpec + } + + // Drain the ELF section reader to make sure all bytes are accounted for + // with BTF metadata. + i, err := io.Copy(io.Discard, rs) + if err != nil { + return fmt.Errorf("section %v: unexpected error reading remainder of ELF section: %w", sec.Name, err) + } + if i > 0 { + return fmt.Errorf("section %v: %d unexpected remaining bytes in ELF section, invalid BTF?", sec.Name, i) + } + } + + return nil +} + +// mapSpecFromBTF produces a MapSpec based on a btf.Struct def representing +// a BTF map definition. The name and spec arguments will be copied to the +// resulting MapSpec, and inner must be true on any resursive invocations. +func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *btf.Spec, name string, inner bool) (*MapSpec, error) { + var ( + key, value btf.Type + keySize, valueSize uint32 + mapType MapType + flags, maxEntries uint32 + pinType PinType + innerMapSpec *MapSpec + contents []MapKV + err error + ) + + for i, member := range def.Members { + switch member.Name { + case "type": + mt, err := uintFromBTF(member.Type) + if err != nil { + return nil, fmt.Errorf("can't get type: %w", err) + } + mapType = MapType(mt) + + case "map_flags": + flags, err = uintFromBTF(member.Type) + if err != nil { + return nil, fmt.Errorf("can't get BTF map flags: %w", err) + } + + case "max_entries": + maxEntries, err = uintFromBTF(member.Type) + if err != nil { + return nil, fmt.Errorf("can't get BTF map max entries: %w", err) + } + + case "key": + if keySize != 0 { + return nil, errors.New("both key and key_size given") + } + + pk, ok := member.Type.(*btf.Pointer) + if !ok { + return nil, fmt.Errorf("key type is not a pointer: %T", member.Type) + } + + key = pk.Target + + size, err := btf.Sizeof(pk.Target) + if err != nil { + return nil, fmt.Errorf("can't get size of BTF key: %w", err) + } + + keySize = uint32(size) + + case "value": + if valueSize != 0 { + return nil, errors.New("both value and value_size given") + } + + vk, ok := member.Type.(*btf.Pointer) + if !ok { + return nil, fmt.Errorf("value type is not a pointer: %T", member.Type) + } + + value = vk.Target + + size, err := btf.Sizeof(vk.Target) + if err != nil { + return nil, fmt.Errorf("can't get size of BTF value: %w", err) + } + + valueSize = uint32(size) + + case "key_size": + // Key needs to be nil and keySize needs to be 0 for key_size to be + // considered a valid member. + if key != nil || keySize != 0 { + return nil, errors.New("both key and key_size given") + } + + keySize, err = uintFromBTF(member.Type) + if err != nil { + return nil, fmt.Errorf("can't get BTF key size: %w", err) + } + + case "value_size": + // Value needs to be nil and valueSize needs to be 0 for value_size to be + // considered a valid member. + if value != nil || valueSize != 0 { + return nil, errors.New("both value and value_size given") + } + + valueSize, err = uintFromBTF(member.Type) + if err != nil { + return nil, fmt.Errorf("can't get BTF value size: %w", err) + } + + case "pinning": + if inner { + return nil, errors.New("inner maps can't be pinned") + } + + pinning, err := uintFromBTF(member.Type) + if err != nil { + return nil, fmt.Errorf("can't get pinning: %w", err) + } + + pinType = PinType(pinning) + + case "values": + // The 'values' field in BTF map definitions is used for declaring map + // value types that are references to other BPF objects, like other maps + // or programs. It is always expected to be an array of pointers. + if i != len(def.Members)-1 { + return nil, errors.New("'values' must be the last member in a BTF map definition") + } + + if valueSize != 0 && valueSize != 4 { + return nil, errors.New("value_size must be 0 or 4") + } + valueSize = 4 + + valueType, err := resolveBTFArrayMacro(member.Type) + if err != nil { + return nil, fmt.Errorf("can't resolve type of member 'values': %w", err) + } + + switch t := valueType.(type) { + case *btf.Struct: + // The values member pointing to an array of structs means we're expecting + // a map-in-map declaration. + if mapType != ArrayOfMaps && mapType != HashOfMaps { + return nil, errors.New("outer map needs to be an array or a hash of maps") + } + if inner { + return nil, fmt.Errorf("nested inner maps are not supported") + } + + // This inner map spec is used as a map template, but it needs to be + // created as a traditional map before it can be used to do so. + // libbpf names the inner map template '.inner', but we + // opted for _inner to simplify validation logic. (dots only supported + // on kernels 5.2 and up) + // Pass the BTF spec from the parent object, since both parent and + // child must be created from the same BTF blob (on kernels that support BTF). + innerMapSpec, err = mapSpecFromBTF(es, vs, t, spec, name+"_inner", true) + if err != nil { + return nil, fmt.Errorf("can't parse BTF map definition of inner map: %w", err) + } + + case *btf.FuncProto: + // The values member contains an array of function pointers, meaning an + // autopopulated PROG_ARRAY. + if mapType != ProgramArray { + return nil, errors.New("map needs to be a program array") + } + + default: + return nil, fmt.Errorf("unsupported value type %q in 'values' field", t) + } + + contents, err = resolveBTFValuesContents(es, vs, member) + if err != nil { + return nil, fmt.Errorf("resolving values contents: %w", err) + } + + default: + return nil, fmt.Errorf("unrecognized field %s in BTF map definition", member.Name) + } + } + + if key == nil { + key = &btf.Void{} + } + if value == nil { + value = &btf.Void{} + } + + return &MapSpec{ + Name: SanitizeName(name, -1), + Type: MapType(mapType), + KeySize: keySize, + ValueSize: valueSize, + MaxEntries: maxEntries, + Flags: flags, + Key: key, + Value: value, + BTF: spec, + Pinning: pinType, + InnerMap: innerMapSpec, + Contents: contents, + }, nil +} + +// uintFromBTF resolves the __uint macro, which is a pointer to a sized +// array, e.g. for int (*foo)[10], this function will return 10. +func uintFromBTF(typ btf.Type) (uint32, error) { + ptr, ok := typ.(*btf.Pointer) + if !ok { + return 0, fmt.Errorf("not a pointer: %v", typ) + } + + arr, ok := ptr.Target.(*btf.Array) + if !ok { + return 0, fmt.Errorf("not a pointer to array: %v", typ) + } + + return arr.Nelems, nil +} + +// resolveBTFArrayMacro resolves the __array macro, which declares an array +// of pointers to a given type. This function returns the target Type of +// the pointers in the array. +func resolveBTFArrayMacro(typ btf.Type) (btf.Type, error) { + arr, ok := typ.(*btf.Array) + if !ok { + return nil, fmt.Errorf("not an array: %v", typ) + } + + ptr, ok := arr.Type.(*btf.Pointer) + if !ok { + return nil, fmt.Errorf("not an array of pointers: %v", typ) + } + + return ptr.Target, nil +} + +// resolveBTFValuesContents resolves relocations into ELF sections belonging +// to btf.VarSecinfo's. This can be used on the 'values' member in BTF map +// definitions to extract static declarations of map contents. +func resolveBTFValuesContents(es *elfSection, vs *btf.VarSecinfo, member btf.Member) ([]MapKV, error) { + // The elements of a .values pointer array are not encoded in BTF. + // Instead, relocations are generated into each array index. + // However, it's possible to leave certain array indices empty, so all + // indices' offsets need to be checked for emitted relocations. + + // The offset of the 'values' member within the _struct_ (in bits) + // is the starting point of the array. Convert to bytes. Add VarSecinfo + // offset to get the absolute position in the ELF blob. + start := member.Offset.Bytes() + vs.Offset + // 'values' is encoded in BTF as a zero (variable) length struct + // member, and its contents run until the end of the VarSecinfo. + // Add VarSecinfo offset to get the absolute position in the ELF blob. + end := vs.Size + vs.Offset + // The size of an address in this section. This determines the width of + // an index in the array. + align := uint32(es.SectionHeader.Addralign) + + // Check if variable-length section is aligned. + if (end-start)%align != 0 { + return nil, errors.New("unaligned static values section") + } + elems := (end - start) / align + + if elems == 0 { + return nil, nil + } + + contents := make([]MapKV, 0, elems) + + // k is the array index, off is its corresponding ELF section offset. + for k, off := uint32(0), start; k < elems; k, off = k+1, off+align { + r, ok := es.relocations[uint64(off)] + if !ok { + continue + } + + // Relocation exists for the current offset in the ELF section. + // Emit a value stub based on the type of relocation to be replaced by + // a real fd later in the pipeline before populating the map. + // Map keys are encoded in MapKV entries, so empty array indices are + // skipped here. + switch t := elf.ST_TYPE(r.Info); t { + case elf.STT_FUNC: + contents = append(contents, MapKV{uint32(k), r.Name}) + case elf.STT_OBJECT: + contents = append(contents, MapKV{uint32(k), r.Name}) + default: + return nil, fmt.Errorf("unknown relocation type %v", t) + } + } + + return contents, nil +} + +func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error { + for _, sec := range ec.sections { + if sec.kind != dataSection { + continue + } + + if sec.references == 0 { + // Prune data sections which are not referenced by any + // instructions. + continue + } + + data, err := sec.Data() + if err != nil { + return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err) + } + + if uint64(len(data)) > math.MaxUint32 { + return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name) + } + + mapSpec := &MapSpec{ + Name: SanitizeName(sec.Name, -1), + Type: Array, + KeySize: 4, + ValueSize: uint32(len(data)), + MaxEntries: 1, + Contents: []MapKV{{uint32(0), data}}, + } + + // It is possible for a data section to exist without a corresponding BTF Datasec + // if it only contains anonymous values like macro-defined arrays. + if ec.btf != nil { + var ds *btf.Datasec + if ec.btf.TypeByName(sec.Name, &ds) == nil { + // Assign the spec's key and BTF only if the Datasec lookup was successful. + mapSpec.BTF = ec.btf + mapSpec.Key = &btf.Void{} + mapSpec.Value = ds + } + } + + switch n := sec.Name; { + case strings.HasPrefix(n, ".rodata"): + mapSpec.Flags = unix.BPF_F_RDONLY_PROG + mapSpec.Freeze = true + case n == ".bss": + // The kernel already zero-initializes the map + mapSpec.Contents = nil + } + + maps[sec.Name] = mapSpec + } + return nil +} + +func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) { + types := []struct { + prefix string + progType ProgramType + attachType AttachType + progFlags uint32 + }{ + // Please update the types from libbpf.c and follow the order of it. + // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c + {"socket", SocketFilter, AttachNone, 0}, + {"sk_reuseport/migrate", SkReuseport, AttachSkReuseportSelectOrMigrate, 0}, + {"sk_reuseport", SkReuseport, AttachSkReuseportSelect, 0}, + {"kprobe/", Kprobe, AttachNone, 0}, + {"uprobe/", Kprobe, AttachNone, 0}, + {"kretprobe/", Kprobe, AttachNone, 0}, + {"uretprobe/", Kprobe, AttachNone, 0}, + {"tc", SchedCLS, AttachNone, 0}, + {"classifier", SchedCLS, AttachNone, 0}, + {"action", SchedACT, AttachNone, 0}, + {"tracepoint/", TracePoint, AttachNone, 0}, + {"tp/", TracePoint, AttachNone, 0}, + {"raw_tracepoint/", RawTracepoint, AttachNone, 0}, + {"raw_tp/", RawTracepoint, AttachNone, 0}, + {"raw_tracepoint.w/", RawTracepointWritable, AttachNone, 0}, + {"raw_tp.w/", RawTracepointWritable, AttachNone, 0}, + {"tp_btf/", Tracing, AttachTraceRawTp, 0}, + {"fentry/", Tracing, AttachTraceFEntry, 0}, + {"fmod_ret/", Tracing, AttachModifyReturn, 0}, + {"fexit/", Tracing, AttachTraceFExit, 0}, + {"fentry.s/", Tracing, AttachTraceFEntry, unix.BPF_F_SLEEPABLE}, + {"fmod_ret.s/", Tracing, AttachModifyReturn, unix.BPF_F_SLEEPABLE}, + {"fexit.s/", Tracing, AttachTraceFExit, unix.BPF_F_SLEEPABLE}, + {"freplace/", Extension, AttachNone, 0}, + {"lsm/", LSM, AttachLSMMac, 0}, + {"lsm.s/", LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE}, + {"iter/", Tracing, AttachTraceIter, 0}, + {"syscall", Syscall, AttachNone, 0}, + {"xdp_devmap/", XDP, AttachXDPDevMap, 0}, + {"xdp_cpumap/", XDP, AttachXDPCPUMap, 0}, + {"xdp", XDP, AttachNone, 0}, + {"perf_event", PerfEvent, AttachNone, 0}, + {"lwt_in", LWTIn, AttachNone, 0}, + {"lwt_out", LWTOut, AttachNone, 0}, + {"lwt_xmit", LWTXmit, AttachNone, 0}, + {"lwt_seg6local", LWTSeg6Local, AttachNone, 0}, + {"cgroup_skb/ingress", CGroupSKB, AttachCGroupInetIngress, 0}, + {"cgroup_skb/egress", CGroupSKB, AttachCGroupInetEgress, 0}, + {"cgroup/skb", CGroupSKB, AttachNone, 0}, + {"cgroup/sock_create", CGroupSock, AttachCGroupInetSockCreate, 0}, + {"cgroup/sock_release", CGroupSock, AttachCgroupInetSockRelease, 0}, + {"cgroup/sock", CGroupSock, AttachCGroupInetSockCreate, 0}, + {"cgroup/post_bind4", CGroupSock, AttachCGroupInet4PostBind, 0}, + {"cgroup/post_bind6", CGroupSock, AttachCGroupInet6PostBind, 0}, + {"cgroup/dev", CGroupDevice, AttachCGroupDevice, 0}, + {"sockops", SockOps, AttachCGroupSockOps, 0}, + {"sk_skb/stream_parser", SkSKB, AttachSkSKBStreamParser, 0}, + {"sk_skb/stream_verdict", SkSKB, AttachSkSKBStreamVerdict, 0}, + {"sk_skb", SkSKB, AttachNone, 0}, + {"sk_msg", SkMsg, AttachSkMsgVerdict, 0}, + {"lirc_mode2", LircMode2, AttachLircMode2, 0}, + {"flow_dissector", FlowDissector, AttachFlowDissector, 0}, + {"cgroup/bind4", CGroupSockAddr, AttachCGroupInet4Bind, 0}, + {"cgroup/bind6", CGroupSockAddr, AttachCGroupInet6Bind, 0}, + {"cgroup/connect4", CGroupSockAddr, AttachCGroupInet4Connect, 0}, + {"cgroup/connect6", CGroupSockAddr, AttachCGroupInet6Connect, 0}, + {"cgroup/sendmsg4", CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0}, + {"cgroup/sendmsg6", CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0}, + {"cgroup/recvmsg4", CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0}, + {"cgroup/recvmsg6", CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0}, + {"cgroup/getpeername4", CGroupSockAddr, AttachCgroupInet4GetPeername, 0}, + {"cgroup/getpeername6", CGroupSockAddr, AttachCgroupInet6GetPeername, 0}, + {"cgroup/getsockname4", CGroupSockAddr, AttachCgroupInet4GetSockname, 0}, + {"cgroup/getsockname6", CGroupSockAddr, AttachCgroupInet6GetSockname, 0}, + {"cgroup/sysctl", CGroupSysctl, AttachCGroupSysctl, 0}, + {"cgroup/getsockopt", CGroupSockopt, AttachCGroupGetsockopt, 0}, + {"cgroup/setsockopt", CGroupSockopt, AttachCGroupSetsockopt, 0}, + {"struct_ops+", StructOps, AttachNone, 0}, + {"sk_lookup/", SkLookup, AttachSkLookup, 0}, + + {"seccomp", SocketFilter, AttachNone, 0}, + } + + for _, t := range types { + if !strings.HasPrefix(sectionName, t.prefix) { + continue + } + + if !strings.HasSuffix(t.prefix, "/") { + return t.progType, t.attachType, t.progFlags, "" + } + + return t.progType, t.attachType, t.progFlags, sectionName[len(t.prefix):] + } + + return UnspecifiedProgram, AttachNone, 0, "" +} + +func (ec *elfCode) loadSectionRelocations(sec *elf.Section, symbols []elf.Symbol) (map[uint64]elf.Symbol, error) { + rels := make(map[uint64]elf.Symbol) + + if sec.Entsize < 16 { + return nil, fmt.Errorf("section %s: relocations are less than 16 bytes", sec.Name) + } + + r := bufio.NewReader(sec.Open()) + for off := uint64(0); off < sec.Size; off += sec.Entsize { + ent := io.LimitReader(r, int64(sec.Entsize)) + + var rel elf.Rel64 + if binary.Read(ent, ec.ByteOrder, &rel) != nil { + return nil, fmt.Errorf("can't parse relocation at offset %v", off) + } + + symNo := int(elf.R_SYM64(rel.Info) - 1) + if symNo >= len(symbols) { + return nil, fmt.Errorf("offset %d: symbol %d doesn't exist", off, symNo) + } + + symbol := symbols[symNo] + rels[rel.Off] = symbol + } + + return rels, nil +} diff --git a/vendor/github.com/cilium/ebpf/info.go b/vendor/github.com/cilium/ebpf/info.go new file mode 100644 index 000000000..ae77bc619 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/info.go @@ -0,0 +1,323 @@ +package ebpf + +import ( + "bufio" + "bytes" + "encoding/hex" + "errors" + "fmt" + "io" + "os" + "strings" + "syscall" + "time" + "unsafe" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// MapInfo describes a map. +type MapInfo struct { + Type MapType + id MapID + KeySize uint32 + ValueSize uint32 + MaxEntries uint32 + Flags uint32 + // Name as supplied by user space at load time. Available from 4.15. + Name string +} + +func newMapInfoFromFd(fd *sys.FD) (*MapInfo, error) { + var info sys.MapInfo + err := sys.ObjInfo(fd, &info) + if errors.Is(err, syscall.EINVAL) { + return newMapInfoFromProc(fd) + } + if err != nil { + return nil, err + } + + return &MapInfo{ + MapType(info.Type), + MapID(info.Id), + info.KeySize, + info.ValueSize, + info.MaxEntries, + info.MapFlags, + unix.ByteSliceToString(info.Name[:]), + }, nil +} + +func newMapInfoFromProc(fd *sys.FD) (*MapInfo, error) { + var mi MapInfo + err := scanFdInfo(fd, map[string]interface{}{ + "map_type": &mi.Type, + "key_size": &mi.KeySize, + "value_size": &mi.ValueSize, + "max_entries": &mi.MaxEntries, + "map_flags": &mi.Flags, + }) + if err != nil { + return nil, err + } + return &mi, nil +} + +// ID returns the map ID. +// +// Available from 4.13. +// +// The bool return value indicates whether this optional field is available. +func (mi *MapInfo) ID() (MapID, bool) { + return mi.id, mi.id > 0 +} + +// programStats holds statistics of a program. +type programStats struct { + // Total accumulated runtime of the program ins ns. + runtime time.Duration + // Total number of times the program was called. + runCount uint64 +} + +// ProgramInfo describes a program. +type ProgramInfo struct { + Type ProgramType + id ProgramID + // Truncated hash of the BPF bytecode. Available from 4.13. + Tag string + // Name as supplied by user space at load time. Available from 4.15. + Name string + + btf btf.ID + stats *programStats + + maps []MapID + insns []byte +} + +func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) { + var info sys.ProgInfo + err := sys.ObjInfo(fd, &info) + if errors.Is(err, syscall.EINVAL) { + return newProgramInfoFromProc(fd) + } + if err != nil { + return nil, err + } + + pi := ProgramInfo{ + Type: ProgramType(info.Type), + id: ProgramID(info.Id), + Tag: hex.EncodeToString(info.Tag[:]), + Name: unix.ByteSliceToString(info.Name[:]), + btf: btf.ID(info.BtfId), + stats: &programStats{ + runtime: time.Duration(info.RunTimeNs), + runCount: info.RunCnt, + }, + } + + // Start with a clean struct for the second call, otherwise we may get EFAULT. + var info2 sys.ProgInfo + + if info.NrMapIds > 0 { + pi.maps = make([]MapID, info.NrMapIds) + info2.NrMapIds = info.NrMapIds + info2.MapIds = sys.NewPointer(unsafe.Pointer(&pi.maps[0])) + } + + if info.XlatedProgLen > 0 { + pi.insns = make([]byte, info.XlatedProgLen) + info2.XlatedProgLen = info.XlatedProgLen + info2.XlatedProgInsns = sys.NewSlicePointer(pi.insns) + } + + if info.NrMapIds > 0 || info.XlatedProgLen > 0 { + if err := sys.ObjInfo(fd, &info2); err != nil { + return nil, err + } + } + + return &pi, nil +} + +func newProgramInfoFromProc(fd *sys.FD) (*ProgramInfo, error) { + var info ProgramInfo + err := scanFdInfo(fd, map[string]interface{}{ + "prog_type": &info.Type, + "prog_tag": &info.Tag, + }) + if errors.Is(err, errMissingFields) { + return nil, &internal.UnsupportedFeatureError{ + Name: "reading program info from /proc/self/fdinfo", + MinimumVersion: internal.Version{4, 10, 0}, + } + } + if err != nil { + return nil, err + } + + return &info, nil +} + +// ID returns the program ID. +// +// Available from 4.13. +// +// The bool return value indicates whether this optional field is available. +func (pi *ProgramInfo) ID() (ProgramID, bool) { + return pi.id, pi.id > 0 +} + +// BTFID returns the BTF ID associated with the program. +// +// The ID is only valid as long as the associated program is kept alive. +// Available from 5.0. +// +// The bool return value indicates whether this optional field is available and +// populated. (The field may be available but not populated if the kernel +// supports the field but the program was loaded without BTF information.) +func (pi *ProgramInfo) BTFID() (btf.ID, bool) { + return pi.btf, pi.btf > 0 +} + +// RunCount returns the total number of times the program was called. +// +// Can return 0 if the collection of statistics is not enabled. See EnableStats(). +// The bool return value indicates whether this optional field is available. +func (pi *ProgramInfo) RunCount() (uint64, bool) { + if pi.stats != nil { + return pi.stats.runCount, true + } + return 0, false +} + +// Runtime returns the total accumulated runtime of the program. +// +// Can return 0 if the collection of statistics is not enabled. See EnableStats(). +// The bool return value indicates whether this optional field is available. +func (pi *ProgramInfo) Runtime() (time.Duration, bool) { + if pi.stats != nil { + return pi.stats.runtime, true + } + return time.Duration(0), false +} + +// Instructions returns the 'xlated' instruction stream of the program +// after it has been verified and rewritten by the kernel. These instructions +// cannot be loaded back into the kernel as-is, this is mainly used for +// inspecting loaded programs for troubleshooting, dumping, etc. +// +// For example, map accesses are made to reference their kernel map IDs, +// not the FDs they had when the program was inserted. Note that before +// the introduction of bpf_insn_prepare_dump in kernel 4.16, xlated +// instructions were not sanitized, making the output even less reusable +// and less likely to round-trip or evaluate to the same program Tag. +// +// The first instruction is marked as a symbol using the Program's name. +// +// Available from 4.13. Requires CAP_BPF or equivalent. +func (pi *ProgramInfo) Instructions() (asm.Instructions, error) { + // If the calling process is not BPF-capable or if the kernel doesn't + // support getting xlated instructions, the field will be zero. + if len(pi.insns) == 0 { + return nil, fmt.Errorf("insufficient permissions or unsupported kernel: %w", ErrNotSupported) + } + + r := bytes.NewReader(pi.insns) + var insns asm.Instructions + if err := insns.Unmarshal(r, internal.NativeEndian); err != nil { + return nil, fmt.Errorf("unmarshaling instructions: %w", err) + } + + // Tag the first instruction with the name of the program, if available. + insns[0] = insns[0].WithSymbol(pi.Name) + + return insns, nil +} + +// MapIDs returns the maps related to the program. +// +// Available from 4.15. +// +// The bool return value indicates whether this optional field is available. +func (pi *ProgramInfo) MapIDs() ([]MapID, bool) { + return pi.maps, pi.maps != nil +} + +func scanFdInfo(fd *sys.FD, fields map[string]interface{}) error { + fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", fd.Int())) + if err != nil { + return err + } + defer fh.Close() + + if err := scanFdInfoReader(fh, fields); err != nil { + return fmt.Errorf("%s: %w", fh.Name(), err) + } + return nil +} + +var errMissingFields = errors.New("missing fields") + +func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error { + var ( + scanner = bufio.NewScanner(r) + scanned int + ) + + for scanner.Scan() { + parts := strings.SplitN(scanner.Text(), "\t", 2) + if len(parts) != 2 { + continue + } + + name := strings.TrimSuffix(parts[0], ":") + field, ok := fields[string(name)] + if !ok { + continue + } + + if n, err := fmt.Sscanln(parts[1], field); err != nil || n != 1 { + return fmt.Errorf("can't parse field %s: %v", name, err) + } + + scanned++ + } + + if err := scanner.Err(); err != nil { + return err + } + + if len(fields) > 0 && scanned == 0 { + return ErrNotSupported + } + + if scanned != len(fields) { + return errMissingFields + } + + return nil +} + +// EnableStats starts the measuring of the runtime +// and run counts of eBPF programs. +// +// Collecting statistics can have an impact on the performance. +// +// Requires at least 5.8. +func EnableStats(which uint32) (io.Closer, error) { + fd, err := sys.EnableStats(&sys.EnableStatsAttr{ + Type: which, + }) + if err != nil { + return nil, err + } + return fd, nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/align.go b/vendor/github.com/cilium/ebpf/internal/align.go new file mode 100644 index 000000000..8b4f2658e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/align.go @@ -0,0 +1,6 @@ +package internal + +// Align returns 'n' updated to 'alignment' boundary. +func Align(n, alignment int) int { + return (int(n) + alignment - 1) / alignment * alignment +} diff --git a/vendor/github.com/cilium/ebpf/internal/cpu.go b/vendor/github.com/cilium/ebpf/internal/cpu.go new file mode 100644 index 000000000..3affa1efb --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/cpu.go @@ -0,0 +1,62 @@ +package internal + +import ( + "fmt" + "os" + "strings" + "sync" +) + +var sysCPU struct { + once sync.Once + err error + num int +} + +// PossibleCPUs returns the max number of CPUs a system may possibly have +// Logical CPU numbers must be of the form 0-n +func PossibleCPUs() (int, error) { + sysCPU.once.Do(func() { + sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible") + }) + + return sysCPU.num, sysCPU.err +} + +func parseCPUsFromFile(path string) (int, error) { + spec, err := os.ReadFile(path) + if err != nil { + return 0, err + } + + n, err := parseCPUs(string(spec)) + if err != nil { + return 0, fmt.Errorf("can't parse %s: %v", path, err) + } + + return n, nil +} + +// parseCPUs parses the number of cpus from a string produced +// by bitmap_list_string() in the Linux kernel. +// Multiple ranges are rejected, since they can't be unified +// into a single number. +// This is the format of /sys/devices/system/cpu/possible, it +// is not suitable for /sys/devices/system/cpu/online, etc. +func parseCPUs(spec string) (int, error) { + if strings.Trim(spec, "\n") == "0" { + return 1, nil + } + + var low, high int + n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high) + if n != 2 || err != nil { + return 0, fmt.Errorf("invalid format: %s", spec) + } + if low != 0 { + return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec) + } + + // cpus is 0 indexed + return high + 1, nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/elf.go b/vendor/github.com/cilium/ebpf/internal/elf.go new file mode 100644 index 000000000..011581938 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/elf.go @@ -0,0 +1,102 @@ +package internal + +import ( + "debug/elf" + "fmt" + "io" +) + +type SafeELFFile struct { + *elf.File +} + +// NewSafeELFFile reads an ELF safely. +// +// Any panic during parsing is turned into an error. This is necessary since +// there are a bunch of unfixed bugs in debug/elf. +// +// https://github.com/golang/go/issues?q=is%3Aissue+is%3Aopen+debug%2Felf+in%3Atitle +func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + safe = nil + err = fmt.Errorf("reading ELF file panicked: %s", r) + }() + + file, err := elf.NewFile(r) + if err != nil { + return nil, err + } + + return &SafeELFFile{file}, nil +} + +// OpenSafeELFFile reads an ELF from a file. +// +// It works like NewSafeELFFile, with the exception that safe.Close will +// close the underlying file. +func OpenSafeELFFile(path string) (safe *SafeELFFile, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + safe = nil + err = fmt.Errorf("reading ELF file panicked: %s", r) + }() + + file, err := elf.Open(path) + if err != nil { + return nil, err + } + + return &SafeELFFile{file}, nil +} + +// Symbols is the safe version of elf.File.Symbols. +func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + syms = nil + err = fmt.Errorf("reading ELF symbols panicked: %s", r) + }() + + syms, err = se.File.Symbols() + return +} + +// DynamicSymbols is the safe version of elf.File.DynamicSymbols. +func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + syms = nil + err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r) + }() + + syms, err = se.File.DynamicSymbols() + return +} + +// SectionsByType returns all sections in the file with the specified section type. +func (se *SafeELFFile) SectionsByType(typ elf.SectionType) []*elf.Section { + sections := make([]*elf.Section, 0, 1) + for _, section := range se.Sections { + if section.Type == typ { + sections = append(sections, section) + } + } + return sections +} diff --git a/vendor/github.com/cilium/ebpf/internal/endian_be.go b/vendor/github.com/cilium/ebpf/internal/endian_be.go new file mode 100644 index 000000000..ad33cda85 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/endian_be.go @@ -0,0 +1,13 @@ +//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64 +// +build armbe arm64be mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64 + +package internal + +import "encoding/binary" + +// NativeEndian is set to either binary.BigEndian or binary.LittleEndian, +// depending on the host's endianness. +var NativeEndian binary.ByteOrder = binary.BigEndian + +// ClangEndian is set to either "el" or "eb" depending on the host's endianness. +const ClangEndian = "eb" diff --git a/vendor/github.com/cilium/ebpf/internal/endian_le.go b/vendor/github.com/cilium/ebpf/internal/endian_le.go new file mode 100644 index 000000000..41a68224c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/endian_le.go @@ -0,0 +1,13 @@ +//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64 +// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64 + +package internal + +import "encoding/binary" + +// NativeEndian is set to either binary.BigEndian or binary.LittleEndian, +// depending on the host's endianness. +var NativeEndian binary.ByteOrder = binary.LittleEndian + +// ClangEndian is set to either "el" or "eb" depending on the host's endianness. +const ClangEndian = "el" diff --git a/vendor/github.com/cilium/ebpf/internal/errors.go b/vendor/github.com/cilium/ebpf/internal/errors.go new file mode 100644 index 000000000..b5ccdd7d0 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/errors.go @@ -0,0 +1,206 @@ +package internal + +import ( + "bytes" + "fmt" + "io" + "strings" +) + +// ErrorWithLog returns an error which includes logs from the kernel verifier. +// +// The default error output is a summary of the full log. The latter can be +// accessed via VerifierError.Log or by formatting the error, see Format. +// +// A set of heuristics is used to determine whether the log has been truncated. +func ErrorWithLog(err error, log []byte) *VerifierError { + const whitespace = "\t\r\v\n " + + // Convert verifier log C string by truncating it on the first 0 byte + // and trimming trailing whitespace before interpreting as a Go string. + truncated := false + if i := bytes.IndexByte(log, 0); i != -1 { + if i == len(log)-1 && !bytes.HasSuffix(log[:i], []byte{'\n'}) { + // The null byte is at the end of the buffer and it's not preceded + // by a newline character. Most likely the buffer was too short. + truncated = true + } + + log = log[:i] + } else if len(log) > 0 { + // No null byte? Dodgy! + truncated = true + } + + log = bytes.Trim(log, whitespace) + logLines := bytes.Split(log, []byte{'\n'}) + lines := make([]string, 0, len(logLines)) + for _, line := range logLines { + // Don't remove leading white space on individual lines. We rely on it + // when outputting logs. + lines = append(lines, string(bytes.TrimRight(line, whitespace))) + } + + return &VerifierError{err, lines, truncated} +} + +// VerifierError includes information from the eBPF verifier. +// +// It summarises the log output, see Format if you want to output the full contents. +type VerifierError struct { + // The error which caused this error. + Cause error + // The verifier output split into lines. + Log []string + // Whether the log output is truncated, based on several heuristics. + Truncated bool +} + +func (le *VerifierError) Unwrap() error { + return le.Cause +} + +func (le *VerifierError) Error() string { + log := le.Log + if n := len(log); n > 0 && strings.HasPrefix(log[n-1], "processed ") { + // Get rid of "processed 39 insns (limit 1000000) ..." from summary. + log = log[:n-1] + } + + n := len(log) + if n == 0 { + return le.Cause.Error() + } + + lines := log[n-1:] + if n >= 2 && (includePreviousLine(log[n-1]) || le.Truncated) { + // Add one more line of context if it aids understanding the error. + lines = log[n-2:] + } + + var b strings.Builder + fmt.Fprintf(&b, "%s: ", le.Cause.Error()) + + for i, line := range lines { + b.WriteString(strings.TrimSpace(line)) + if i != len(lines)-1 { + b.WriteString(": ") + } + } + + omitted := len(le.Log) - len(lines) + if omitted == 0 && !le.Truncated { + return b.String() + } + + b.WriteString(" (") + if le.Truncated { + b.WriteString("truncated") + } + + if omitted > 0 { + if le.Truncated { + b.WriteString(", ") + } + fmt.Fprintf(&b, "%d line(s) omitted", omitted) + } + b.WriteString(")") + + return b.String() +} + +// includePreviousLine returns true if the given line likely is better +// understood with additional context from the preceding line. +func includePreviousLine(line string) bool { + // We need to find a good trade off between understandable error messages + // and too much complexity here. Checking the string prefix is ok, requiring + // regular expressions to do it is probably overkill. + + if strings.HasPrefix(line, "\t") { + // [13] STRUCT drm_rect size=16 vlen=4 + // \tx1 type_id=2 + return true + } + + if len(line) >= 2 && line[0] == 'R' && line[1] >= '0' && line[1] <= '9' { + // 0: (95) exit + // R0 !read_ok + return true + } + + if strings.HasPrefix(line, "invalid bpf_context access") { + // 0: (79) r6 = *(u64 *)(r1 +0) + // func '__x64_sys_recvfrom' arg0 type FWD is not a struct + // invalid bpf_context access off=0 size=8 + return true + } + + return false +} + +// Format the error. +// +// Understood verbs are %s and %v, which are equivalent to calling Error(). %v +// allows outputting additional information using the following flags: +// +// + Output the first lines, or all lines if no width is given. +// - Output the last lines, or all lines if no width is given. +// +// Use width to specify how many lines to output. Use the '-' flag to output +// lines from the end of the log instead of the beginning. +func (le *VerifierError) Format(f fmt.State, verb rune) { + switch verb { + case 's': + _, _ = io.WriteString(f, le.Error()) + + case 'v': + n, haveWidth := f.Width() + if !haveWidth || n > len(le.Log) { + n = len(le.Log) + } + + if !f.Flag('+') && !f.Flag('-') { + if haveWidth { + _, _ = io.WriteString(f, "%!v(BADWIDTH)") + return + } + + _, _ = io.WriteString(f, le.Error()) + return + } + + if f.Flag('+') && f.Flag('-') { + _, _ = io.WriteString(f, "%!v(BADFLAG)") + return + } + + fmt.Fprintf(f, "%s:", le.Cause.Error()) + + omitted := len(le.Log) - n + lines := le.Log[:n] + if f.Flag('-') { + // Print last instead of first lines. + lines = le.Log[len(le.Log)-n:] + if omitted > 0 { + fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted) + } + } + + for _, line := range lines { + fmt.Fprintf(f, "\n\t%s", line) + } + + if !f.Flag('-') { + if omitted > 0 { + fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted) + } + } + + if le.Truncated { + fmt.Fprintf(f, "\n\t(truncated)") + } + + default: + fmt.Fprintf(f, "%%!%c(BADVERB)", verb) + } +} diff --git a/vendor/github.com/cilium/ebpf/internal/feature.go b/vendor/github.com/cilium/ebpf/internal/feature.go new file mode 100644 index 000000000..0a6c2d1d5 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/feature.go @@ -0,0 +1,100 @@ +package internal + +import ( + "errors" + "fmt" + "sync" +) + +// ErrNotSupported indicates that a feature is not supported by the current kernel. +var ErrNotSupported = errors.New("not supported") + +// UnsupportedFeatureError is returned by FeatureTest() functions. +type UnsupportedFeatureError struct { + // The minimum Linux mainline version required for this feature. + // Used for the error string, and for sanity checking during testing. + MinimumVersion Version + + // The name of the feature that isn't supported. + Name string +} + +func (ufe *UnsupportedFeatureError) Error() string { + if ufe.MinimumVersion.Unspecified() { + return fmt.Sprintf("%s not supported", ufe.Name) + } + return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion) +} + +// Is indicates that UnsupportedFeatureError is ErrNotSupported. +func (ufe *UnsupportedFeatureError) Is(target error) bool { + return target == ErrNotSupported +} + +type featureTest struct { + sync.RWMutex + successful bool + result error +} + +// FeatureTestFn is used to determine whether the kernel supports +// a certain feature. +// +// The return values have the following semantics: +// +// err == ErrNotSupported: the feature is not available +// err == nil: the feature is available +// err != nil: the test couldn't be executed +type FeatureTestFn func() error + +// FeatureTest wraps a function so that it is run at most once. +// +// name should identify the tested feature, while version must be in the +// form Major.Minor[.Patch]. +// +// Returns an error wrapping ErrNotSupported if the feature is not supported. +func FeatureTest(name, version string, fn FeatureTestFn) func() error { + ft := new(featureTest) + return func() error { + ft.RLock() + if ft.successful { + defer ft.RUnlock() + return ft.result + } + ft.RUnlock() + ft.Lock() + defer ft.Unlock() + // check one more time on the off + // chance that two go routines + // were able to call into the write + // lock + if ft.successful { + return ft.result + } + err := fn() + switch { + case errors.Is(err, ErrNotSupported): + v, err := NewVersion(version) + if err != nil { + return err + } + + ft.result = &UnsupportedFeatureError{ + MinimumVersion: v, + Name: name, + } + fallthrough + + case err == nil: + ft.successful = true + + default: + // We couldn't execute the feature test to a point + // where it could make a determination. + // Don't cache the result, just return it. + return fmt.Errorf("detect support for %s: %w", name, err) + } + + return ft.result + } +} diff --git a/vendor/github.com/cilium/ebpf/internal/io.go b/vendor/github.com/cilium/ebpf/internal/io.go new file mode 100644 index 000000000..30b6641f0 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/io.go @@ -0,0 +1,62 @@ +package internal + +import ( + "bufio" + "compress/gzip" + "errors" + "io" + "os" +) + +// NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized +// buffered reader. It is a convenience function for reading subsections of +// ELF sections while minimizing the amount of read() syscalls made. +// +// Syscall overhead is non-negligible in continuous integration context +// where ELFs might be accessed over virtual filesystems with poor random +// access performance. Buffering reads makes sense because (sub)sections +// end up being read completely anyway. +// +// Use instead of the r.Seek() + io.LimitReader() pattern. +func NewBufferedSectionReader(ra io.ReaderAt, off, n int64) *bufio.Reader { + // Clamp the size of the buffer to one page to avoid slurping large parts + // of a file into memory. bufio.NewReader uses a hardcoded default buffer + // of 4096. Allow arches with larger pages to allocate more, but don't + // allocate a fixed 4k buffer if we only need to read a small segment. + buf := n + if ps := int64(os.Getpagesize()); n > ps { + buf = ps + } + + return bufio.NewReaderSize(io.NewSectionReader(ra, off, n), int(buf)) +} + +// DiscardZeroes makes sure that all written bytes are zero +// before discarding them. +type DiscardZeroes struct{} + +func (DiscardZeroes) Write(p []byte) (int, error) { + for _, b := range p { + if b != 0 { + return 0, errors.New("encountered non-zero byte") + } + } + return len(p), nil +} + +// ReadAllCompressed decompresses a gzipped file into memory. +func ReadAllCompressed(file string) ([]byte, error) { + fh, err := os.Open(file) + if err != nil { + return nil, err + } + defer fh.Close() + + gz, err := gzip.NewReader(fh) + if err != nil { + return nil, err + } + defer gz.Close() + + return io.ReadAll(gz) +} diff --git a/vendor/github.com/cilium/ebpf/internal/output.go b/vendor/github.com/cilium/ebpf/internal/output.go new file mode 100644 index 000000000..aeab37fcf --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/output.go @@ -0,0 +1,84 @@ +package internal + +import ( + "bytes" + "errors" + "go/format" + "go/scanner" + "io" + "strings" + "unicode" +) + +// Identifier turns a C style type or field name into an exportable Go equivalent. +func Identifier(str string) string { + prev := rune(-1) + return strings.Map(func(r rune) rune { + // See https://golang.org/ref/spec#Identifiers + switch { + case unicode.IsLetter(r): + if prev == -1 { + r = unicode.ToUpper(r) + } + + case r == '_': + switch { + // The previous rune was deleted, or we are at the + // beginning of the string. + case prev == -1: + fallthrough + + // The previous rune is a lower case letter or a digit. + case unicode.IsDigit(prev) || (unicode.IsLetter(prev) && unicode.IsLower(prev)): + // delete the current rune, and force the + // next character to be uppercased. + r = -1 + } + + case unicode.IsDigit(r): + + default: + // Delete the current rune. prev is unchanged. + return -1 + } + + prev = r + return r + }, str) +} + +// WriteFormatted outputs a formatted src into out. +// +// If formatting fails it returns an informative error message. +func WriteFormatted(src []byte, out io.Writer) error { + formatted, err := format.Source(src) + if err == nil { + _, err = out.Write(formatted) + return err + } + + var el scanner.ErrorList + if !errors.As(err, &el) { + return err + } + + var nel scanner.ErrorList + for _, err := range el { + if !err.Pos.IsValid() { + nel = append(nel, err) + continue + } + + buf := src[err.Pos.Offset:] + nl := bytes.IndexRune(buf, '\n') + if nl == -1 { + nel = append(nel, err) + continue + } + + err.Msg += ": " + string(buf[:nl]) + nel = append(nel, err) + } + + return nel +} diff --git a/vendor/github.com/cilium/ebpf/internal/pinning.go b/vendor/github.com/cilium/ebpf/internal/pinning.go new file mode 100644 index 000000000..c711353c3 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/pinning.go @@ -0,0 +1,77 @@ +package internal + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "unsafe" + + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +func Pin(currentPath, newPath string, fd *sys.FD) error { + const bpfFSType = 0xcafe4a11 + + if newPath == "" { + return errors.New("given pinning path cannot be empty") + } + if currentPath == newPath { + return nil + } + + var statfs unix.Statfs_t + if err := unix.Statfs(filepath.Dir(newPath), &statfs); err != nil { + return err + } + + fsType := int64(statfs.Type) + if unsafe.Sizeof(statfs.Type) == 4 { + // We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a + // negative number when interpreted as int32 so we need to cast via + // uint32 to avoid sign extension. + fsType = int64(uint32(statfs.Type)) + } + + if fsType != bpfFSType { + return fmt.Errorf("%s is not on a bpf filesystem", newPath) + } + + defer runtime.KeepAlive(fd) + + if currentPath == "" { + return sys.ObjPin(&sys.ObjPinAttr{ + Pathname: sys.NewStringPointer(newPath), + BpfFd: fd.Uint(), + }) + } + + // Renameat2 is used instead of os.Rename to disallow the new path replacing + // an existing path. + err := unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE) + if err == nil { + // Object is now moved to the new pinning path. + return nil + } + if !os.IsNotExist(err) { + return fmt.Errorf("unable to move pinned object to new path %v: %w", newPath, err) + } + // Internal state not in sync with the file system so let's fix it. + return sys.ObjPin(&sys.ObjPinAttr{ + Pathname: sys.NewStringPointer(newPath), + BpfFd: fd.Uint(), + }) +} + +func Unpin(pinnedPath string) error { + if pinnedPath == "" { + return nil + } + err := os.Remove(pinnedPath) + if err == nil || os.IsNotExist(err) { + return nil + } + return err +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/doc.go b/vendor/github.com/cilium/ebpf/internal/sys/doc.go new file mode 100644 index 000000000..dfe174448 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/doc.go @@ -0,0 +1,6 @@ +// Package sys contains bindings for the BPF syscall. +package sys + +// Regenerate types.go by invoking go generate in the current directory. + +//go:generate go run github.com/cilium/ebpf/internal/cmd/gentypes ../../btf/testdata/vmlinux.btf.gz diff --git a/vendor/github.com/cilium/ebpf/internal/sys/fd.go b/vendor/github.com/cilium/ebpf/internal/sys/fd.go new file mode 100644 index 000000000..65517d45e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/fd.go @@ -0,0 +1,96 @@ +package sys + +import ( + "fmt" + "math" + "os" + "runtime" + "strconv" + + "github.com/cilium/ebpf/internal/unix" +) + +var ErrClosedFd = unix.EBADF + +type FD struct { + raw int +} + +func newFD(value int) *FD { + fd := &FD{value} + runtime.SetFinalizer(fd, (*FD).Close) + return fd +} + +// NewFD wraps a raw fd with a finalizer. +// +// You must not use the raw fd after calling this function, since the underlying +// file descriptor number may change. This is because the BPF UAPI assumes that +// zero is not a valid fd value. +func NewFD(value int) (*FD, error) { + if value < 0 { + return nil, fmt.Errorf("invalid fd %d", value) + } + + fd := newFD(value) + if value != 0 { + return fd, nil + } + + dup, err := fd.Dup() + _ = fd.Close() + return dup, err +} + +func (fd *FD) String() string { + return strconv.FormatInt(int64(fd.raw), 10) +} + +func (fd *FD) Int() int { + return fd.raw +} + +func (fd *FD) Uint() uint32 { + if fd.raw < 0 || int64(fd.raw) > math.MaxUint32 { + // Best effort: this is the number most likely to be an invalid file + // descriptor. It is equal to -1 (on two's complement arches). + return math.MaxUint32 + } + return uint32(fd.raw) +} + +func (fd *FD) Close() error { + if fd.raw < 0 { + return nil + } + + value := int(fd.raw) + fd.raw = -1 + + fd.Forget() + return unix.Close(value) +} + +func (fd *FD) Forget() { + runtime.SetFinalizer(fd, nil) +} + +func (fd *FD) Dup() (*FD, error) { + if fd.raw < 0 { + return nil, ErrClosedFd + } + + // Always require the fd to be larger than zero: the BPF API treats the value + // as "no argument provided". + dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 1) + if err != nil { + return nil, fmt.Errorf("can't dup fd: %v", err) + } + + return newFD(dup), nil +} + +func (fd *FD) File(name string) *os.File { + fd.Forget() + return os.NewFile(uintptr(fd.raw), name) +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr.go new file mode 100644 index 000000000..a22100688 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr.go @@ -0,0 +1,38 @@ +package sys + +import ( + "unsafe" + + "github.com/cilium/ebpf/internal/unix" +) + +// NewPointer creates a 64-bit pointer from an unsafe Pointer. +func NewPointer(ptr unsafe.Pointer) Pointer { + return Pointer{ptr: ptr} +} + +// NewSlicePointer creates a 64-bit pointer from a byte slice. +func NewSlicePointer(buf []byte) Pointer { + if len(buf) == 0 { + return Pointer{} + } + + return Pointer{ptr: unsafe.Pointer(&buf[0])} +} + +// NewSlicePointer creates a 64-bit pointer from a byte slice. +// +// Useful to assign both the pointer and the length in one go. +func NewSlicePointerLen(buf []byte) (Pointer, uint32) { + return NewSlicePointer(buf), uint32(len(buf)) +} + +// NewStringPointer creates a 64-bit pointer from a string. +func NewStringPointer(str string) Pointer { + p, err := unix.BytePtrFromString(str) + if err != nil { + return Pointer{} + } + + return Pointer{ptr: unsafe.Pointer(p)} +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_be.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_be.go new file mode 100644 index 000000000..df903d780 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_be.go @@ -0,0 +1,15 @@ +//go:build armbe || mips || mips64p32 +// +build armbe mips mips64p32 + +package sys + +import ( + "unsafe" +) + +// Pointer wraps an unsafe.Pointer to be 64bit to +// conform to the syscall specification. +type Pointer struct { + pad uint32 + ptr unsafe.Pointer +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_le.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_le.go new file mode 100644 index 000000000..a6a51edb6 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_le.go @@ -0,0 +1,15 @@ +//go:build 386 || amd64p32 || arm || mipsle || mips64p32le +// +build 386 amd64p32 arm mipsle mips64p32le + +package sys + +import ( + "unsafe" +) + +// Pointer wraps an unsafe.Pointer to be 64bit to +// conform to the syscall specification. +type Pointer struct { + ptr unsafe.Pointer + pad uint32 +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr_64.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr_64.go new file mode 100644 index 000000000..7c0279e48 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr_64.go @@ -0,0 +1,14 @@ +//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32 +// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le,!armbe,!mips,!mips64p32 + +package sys + +import ( + "unsafe" +) + +// Pointer wraps an unsafe.Pointer to be 64bit to +// conform to the syscall specification. +type Pointer struct { + ptr unsafe.Pointer +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/syscall.go b/vendor/github.com/cilium/ebpf/internal/sys/syscall.go new file mode 100644 index 000000000..2a5935dc9 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/syscall.go @@ -0,0 +1,126 @@ +package sys + +import ( + "runtime" + "syscall" + "unsafe" + + "github.com/cilium/ebpf/internal/unix" +) + +// BPF wraps SYS_BPF. +// +// Any pointers contained in attr must use the Pointer type from this package. +func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) { + for { + r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size) + runtime.KeepAlive(attr) + + // As of ~4.20 the verifier can be interrupted by a signal, + // and returns EAGAIN in that case. + if errNo == unix.EAGAIN && cmd == BPF_PROG_LOAD { + continue + } + + var err error + if errNo != 0 { + err = wrappedErrno{errNo} + } + + return r1, err + } +} + +// Info is implemented by all structs that can be passed to the ObjInfo syscall. +// +// MapInfo +// ProgInfo +// LinkInfo +// BtfInfo +type Info interface { + info() (unsafe.Pointer, uint32) +} + +var _ Info = (*MapInfo)(nil) + +func (i *MapInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +var _ Info = (*ProgInfo)(nil) + +func (i *ProgInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +var _ Info = (*LinkInfo)(nil) + +func (i *LinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +var _ Info = (*BtfInfo)(nil) + +func (i *BtfInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +// ObjInfo retrieves information about a BPF Fd. +// +// info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo. +func ObjInfo(fd *FD, info Info) error { + ptr, len := info.info() + err := ObjGetInfoByFd(&ObjGetInfoByFdAttr{ + BpfFd: fd.Uint(), + InfoLen: len, + Info: NewPointer(ptr), + }) + runtime.KeepAlive(fd) + return err +} + +// BPFObjName is a null-terminated string made up of +// 'A-Za-z0-9_' characters. +type ObjName [unix.BPF_OBJ_NAME_LEN]byte + +// NewObjName truncates the result if it is too long. +func NewObjName(name string) ObjName { + var result ObjName + copy(result[:unix.BPF_OBJ_NAME_LEN-1], name) + return result +} + +// LinkID uniquely identifies a bpf_link. +type LinkID uint32 + +// BTFID uniquely identifies a BTF blob loaded into the kernel. +type BTFID uint32 + +// wrappedErrno wraps syscall.Errno to prevent direct comparisons with +// syscall.E* or unix.E* constants. +// +// You should never export an error of this type. +type wrappedErrno struct { + syscall.Errno +} + +func (we wrappedErrno) Unwrap() error { + return we.Errno +} + +type syscallError struct { + error + errno syscall.Errno +} + +func Error(err error, errno syscall.Errno) error { + return &syscallError{err, errno} +} + +func (se *syscallError) Is(target error) bool { + return target == se.error +} + +func (se *syscallError) Unwrap() error { + return se.errno +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/types.go b/vendor/github.com/cilium/ebpf/internal/sys/types.go new file mode 100644 index 000000000..291e3a619 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/types.go @@ -0,0 +1,1052 @@ +// Code generated by internal/cmd/gentypes; DO NOT EDIT. + +package sys + +import ( + "unsafe" +) + +type AdjRoomMode int32 + +const ( + BPF_ADJ_ROOM_NET AdjRoomMode = 0 + BPF_ADJ_ROOM_MAC AdjRoomMode = 1 +) + +type AttachType int32 + +const ( + BPF_CGROUP_INET_INGRESS AttachType = 0 + BPF_CGROUP_INET_EGRESS AttachType = 1 + BPF_CGROUP_INET_SOCK_CREATE AttachType = 2 + BPF_CGROUP_SOCK_OPS AttachType = 3 + BPF_SK_SKB_STREAM_PARSER AttachType = 4 + BPF_SK_SKB_STREAM_VERDICT AttachType = 5 + BPF_CGROUP_DEVICE AttachType = 6 + BPF_SK_MSG_VERDICT AttachType = 7 + BPF_CGROUP_INET4_BIND AttachType = 8 + BPF_CGROUP_INET6_BIND AttachType = 9 + BPF_CGROUP_INET4_CONNECT AttachType = 10 + BPF_CGROUP_INET6_CONNECT AttachType = 11 + BPF_CGROUP_INET4_POST_BIND AttachType = 12 + BPF_CGROUP_INET6_POST_BIND AttachType = 13 + BPF_CGROUP_UDP4_SENDMSG AttachType = 14 + BPF_CGROUP_UDP6_SENDMSG AttachType = 15 + BPF_LIRC_MODE2 AttachType = 16 + BPF_FLOW_DISSECTOR AttachType = 17 + BPF_CGROUP_SYSCTL AttachType = 18 + BPF_CGROUP_UDP4_RECVMSG AttachType = 19 + BPF_CGROUP_UDP6_RECVMSG AttachType = 20 + BPF_CGROUP_GETSOCKOPT AttachType = 21 + BPF_CGROUP_SETSOCKOPT AttachType = 22 + BPF_TRACE_RAW_TP AttachType = 23 + BPF_TRACE_FENTRY AttachType = 24 + BPF_TRACE_FEXIT AttachType = 25 + BPF_MODIFY_RETURN AttachType = 26 + BPF_LSM_MAC AttachType = 27 + BPF_TRACE_ITER AttachType = 28 + BPF_CGROUP_INET4_GETPEERNAME AttachType = 29 + BPF_CGROUP_INET6_GETPEERNAME AttachType = 30 + BPF_CGROUP_INET4_GETSOCKNAME AttachType = 31 + BPF_CGROUP_INET6_GETSOCKNAME AttachType = 32 + BPF_XDP_DEVMAP AttachType = 33 + BPF_CGROUP_INET_SOCK_RELEASE AttachType = 34 + BPF_XDP_CPUMAP AttachType = 35 + BPF_SK_LOOKUP AttachType = 36 + BPF_XDP AttachType = 37 + BPF_SK_SKB_VERDICT AttachType = 38 + BPF_SK_REUSEPORT_SELECT AttachType = 39 + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE AttachType = 40 + BPF_PERF_EVENT AttachType = 41 + BPF_TRACE_KPROBE_MULTI AttachType = 42 + __MAX_BPF_ATTACH_TYPE AttachType = 43 +) + +type Cmd int32 + +const ( + BPF_MAP_CREATE Cmd = 0 + BPF_MAP_LOOKUP_ELEM Cmd = 1 + BPF_MAP_UPDATE_ELEM Cmd = 2 + BPF_MAP_DELETE_ELEM Cmd = 3 + BPF_MAP_GET_NEXT_KEY Cmd = 4 + BPF_PROG_LOAD Cmd = 5 + BPF_OBJ_PIN Cmd = 6 + BPF_OBJ_GET Cmd = 7 + BPF_PROG_ATTACH Cmd = 8 + BPF_PROG_DETACH Cmd = 9 + BPF_PROG_TEST_RUN Cmd = 10 + BPF_PROG_RUN Cmd = 10 + BPF_PROG_GET_NEXT_ID Cmd = 11 + BPF_MAP_GET_NEXT_ID Cmd = 12 + BPF_PROG_GET_FD_BY_ID Cmd = 13 + BPF_MAP_GET_FD_BY_ID Cmd = 14 + BPF_OBJ_GET_INFO_BY_FD Cmd = 15 + BPF_PROG_QUERY Cmd = 16 + BPF_RAW_TRACEPOINT_OPEN Cmd = 17 + BPF_BTF_LOAD Cmd = 18 + BPF_BTF_GET_FD_BY_ID Cmd = 19 + BPF_TASK_FD_QUERY Cmd = 20 + BPF_MAP_LOOKUP_AND_DELETE_ELEM Cmd = 21 + BPF_MAP_FREEZE Cmd = 22 + BPF_BTF_GET_NEXT_ID Cmd = 23 + BPF_MAP_LOOKUP_BATCH Cmd = 24 + BPF_MAP_LOOKUP_AND_DELETE_BATCH Cmd = 25 + BPF_MAP_UPDATE_BATCH Cmd = 26 + BPF_MAP_DELETE_BATCH Cmd = 27 + BPF_LINK_CREATE Cmd = 28 + BPF_LINK_UPDATE Cmd = 29 + BPF_LINK_GET_FD_BY_ID Cmd = 30 + BPF_LINK_GET_NEXT_ID Cmd = 31 + BPF_ENABLE_STATS Cmd = 32 + BPF_ITER_CREATE Cmd = 33 + BPF_LINK_DETACH Cmd = 34 + BPF_PROG_BIND_MAP Cmd = 35 +) + +type FunctionId int32 + +const ( + BPF_FUNC_unspec FunctionId = 0 + BPF_FUNC_map_lookup_elem FunctionId = 1 + BPF_FUNC_map_update_elem FunctionId = 2 + BPF_FUNC_map_delete_elem FunctionId = 3 + BPF_FUNC_probe_read FunctionId = 4 + BPF_FUNC_ktime_get_ns FunctionId = 5 + BPF_FUNC_trace_printk FunctionId = 6 + BPF_FUNC_get_prandom_u32 FunctionId = 7 + BPF_FUNC_get_smp_processor_id FunctionId = 8 + BPF_FUNC_skb_store_bytes FunctionId = 9 + BPF_FUNC_l3_csum_replace FunctionId = 10 + BPF_FUNC_l4_csum_replace FunctionId = 11 + BPF_FUNC_tail_call FunctionId = 12 + BPF_FUNC_clone_redirect FunctionId = 13 + BPF_FUNC_get_current_pid_tgid FunctionId = 14 + BPF_FUNC_get_current_uid_gid FunctionId = 15 + BPF_FUNC_get_current_comm FunctionId = 16 + BPF_FUNC_get_cgroup_classid FunctionId = 17 + BPF_FUNC_skb_vlan_push FunctionId = 18 + BPF_FUNC_skb_vlan_pop FunctionId = 19 + BPF_FUNC_skb_get_tunnel_key FunctionId = 20 + BPF_FUNC_skb_set_tunnel_key FunctionId = 21 + BPF_FUNC_perf_event_read FunctionId = 22 + BPF_FUNC_redirect FunctionId = 23 + BPF_FUNC_get_route_realm FunctionId = 24 + BPF_FUNC_perf_event_output FunctionId = 25 + BPF_FUNC_skb_load_bytes FunctionId = 26 + BPF_FUNC_get_stackid FunctionId = 27 + BPF_FUNC_csum_diff FunctionId = 28 + BPF_FUNC_skb_get_tunnel_opt FunctionId = 29 + BPF_FUNC_skb_set_tunnel_opt FunctionId = 30 + BPF_FUNC_skb_change_proto FunctionId = 31 + BPF_FUNC_skb_change_type FunctionId = 32 + BPF_FUNC_skb_under_cgroup FunctionId = 33 + BPF_FUNC_get_hash_recalc FunctionId = 34 + BPF_FUNC_get_current_task FunctionId = 35 + BPF_FUNC_probe_write_user FunctionId = 36 + BPF_FUNC_current_task_under_cgroup FunctionId = 37 + BPF_FUNC_skb_change_tail FunctionId = 38 + BPF_FUNC_skb_pull_data FunctionId = 39 + BPF_FUNC_csum_update FunctionId = 40 + BPF_FUNC_set_hash_invalid FunctionId = 41 + BPF_FUNC_get_numa_node_id FunctionId = 42 + BPF_FUNC_skb_change_head FunctionId = 43 + BPF_FUNC_xdp_adjust_head FunctionId = 44 + BPF_FUNC_probe_read_str FunctionId = 45 + BPF_FUNC_get_socket_cookie FunctionId = 46 + BPF_FUNC_get_socket_uid FunctionId = 47 + BPF_FUNC_set_hash FunctionId = 48 + BPF_FUNC_setsockopt FunctionId = 49 + BPF_FUNC_skb_adjust_room FunctionId = 50 + BPF_FUNC_redirect_map FunctionId = 51 + BPF_FUNC_sk_redirect_map FunctionId = 52 + BPF_FUNC_sock_map_update FunctionId = 53 + BPF_FUNC_xdp_adjust_meta FunctionId = 54 + BPF_FUNC_perf_event_read_value FunctionId = 55 + BPF_FUNC_perf_prog_read_value FunctionId = 56 + BPF_FUNC_getsockopt FunctionId = 57 + BPF_FUNC_override_return FunctionId = 58 + BPF_FUNC_sock_ops_cb_flags_set FunctionId = 59 + BPF_FUNC_msg_redirect_map FunctionId = 60 + BPF_FUNC_msg_apply_bytes FunctionId = 61 + BPF_FUNC_msg_cork_bytes FunctionId = 62 + BPF_FUNC_msg_pull_data FunctionId = 63 + BPF_FUNC_bind FunctionId = 64 + BPF_FUNC_xdp_adjust_tail FunctionId = 65 + BPF_FUNC_skb_get_xfrm_state FunctionId = 66 + BPF_FUNC_get_stack FunctionId = 67 + BPF_FUNC_skb_load_bytes_relative FunctionId = 68 + BPF_FUNC_fib_lookup FunctionId = 69 + BPF_FUNC_sock_hash_update FunctionId = 70 + BPF_FUNC_msg_redirect_hash FunctionId = 71 + BPF_FUNC_sk_redirect_hash FunctionId = 72 + BPF_FUNC_lwt_push_encap FunctionId = 73 + BPF_FUNC_lwt_seg6_store_bytes FunctionId = 74 + BPF_FUNC_lwt_seg6_adjust_srh FunctionId = 75 + BPF_FUNC_lwt_seg6_action FunctionId = 76 + BPF_FUNC_rc_repeat FunctionId = 77 + BPF_FUNC_rc_keydown FunctionId = 78 + BPF_FUNC_skb_cgroup_id FunctionId = 79 + BPF_FUNC_get_current_cgroup_id FunctionId = 80 + BPF_FUNC_get_local_storage FunctionId = 81 + BPF_FUNC_sk_select_reuseport FunctionId = 82 + BPF_FUNC_skb_ancestor_cgroup_id FunctionId = 83 + BPF_FUNC_sk_lookup_tcp FunctionId = 84 + BPF_FUNC_sk_lookup_udp FunctionId = 85 + BPF_FUNC_sk_release FunctionId = 86 + BPF_FUNC_map_push_elem FunctionId = 87 + BPF_FUNC_map_pop_elem FunctionId = 88 + BPF_FUNC_map_peek_elem FunctionId = 89 + BPF_FUNC_msg_push_data FunctionId = 90 + BPF_FUNC_msg_pop_data FunctionId = 91 + BPF_FUNC_rc_pointer_rel FunctionId = 92 + BPF_FUNC_spin_lock FunctionId = 93 + BPF_FUNC_spin_unlock FunctionId = 94 + BPF_FUNC_sk_fullsock FunctionId = 95 + BPF_FUNC_tcp_sock FunctionId = 96 + BPF_FUNC_skb_ecn_set_ce FunctionId = 97 + BPF_FUNC_get_listener_sock FunctionId = 98 + BPF_FUNC_skc_lookup_tcp FunctionId = 99 + BPF_FUNC_tcp_check_syncookie FunctionId = 100 + BPF_FUNC_sysctl_get_name FunctionId = 101 + BPF_FUNC_sysctl_get_current_value FunctionId = 102 + BPF_FUNC_sysctl_get_new_value FunctionId = 103 + BPF_FUNC_sysctl_set_new_value FunctionId = 104 + BPF_FUNC_strtol FunctionId = 105 + BPF_FUNC_strtoul FunctionId = 106 + BPF_FUNC_sk_storage_get FunctionId = 107 + BPF_FUNC_sk_storage_delete FunctionId = 108 + BPF_FUNC_send_signal FunctionId = 109 + BPF_FUNC_tcp_gen_syncookie FunctionId = 110 + BPF_FUNC_skb_output FunctionId = 111 + BPF_FUNC_probe_read_user FunctionId = 112 + BPF_FUNC_probe_read_kernel FunctionId = 113 + BPF_FUNC_probe_read_user_str FunctionId = 114 + BPF_FUNC_probe_read_kernel_str FunctionId = 115 + BPF_FUNC_tcp_send_ack FunctionId = 116 + BPF_FUNC_send_signal_thread FunctionId = 117 + BPF_FUNC_jiffies64 FunctionId = 118 + BPF_FUNC_read_branch_records FunctionId = 119 + BPF_FUNC_get_ns_current_pid_tgid FunctionId = 120 + BPF_FUNC_xdp_output FunctionId = 121 + BPF_FUNC_get_netns_cookie FunctionId = 122 + BPF_FUNC_get_current_ancestor_cgroup_id FunctionId = 123 + BPF_FUNC_sk_assign FunctionId = 124 + BPF_FUNC_ktime_get_boot_ns FunctionId = 125 + BPF_FUNC_seq_printf FunctionId = 126 + BPF_FUNC_seq_write FunctionId = 127 + BPF_FUNC_sk_cgroup_id FunctionId = 128 + BPF_FUNC_sk_ancestor_cgroup_id FunctionId = 129 + BPF_FUNC_ringbuf_output FunctionId = 130 + BPF_FUNC_ringbuf_reserve FunctionId = 131 + BPF_FUNC_ringbuf_submit FunctionId = 132 + BPF_FUNC_ringbuf_discard FunctionId = 133 + BPF_FUNC_ringbuf_query FunctionId = 134 + BPF_FUNC_csum_level FunctionId = 135 + BPF_FUNC_skc_to_tcp6_sock FunctionId = 136 + BPF_FUNC_skc_to_tcp_sock FunctionId = 137 + BPF_FUNC_skc_to_tcp_timewait_sock FunctionId = 138 + BPF_FUNC_skc_to_tcp_request_sock FunctionId = 139 + BPF_FUNC_skc_to_udp6_sock FunctionId = 140 + BPF_FUNC_get_task_stack FunctionId = 141 + BPF_FUNC_load_hdr_opt FunctionId = 142 + BPF_FUNC_store_hdr_opt FunctionId = 143 + BPF_FUNC_reserve_hdr_opt FunctionId = 144 + BPF_FUNC_inode_storage_get FunctionId = 145 + BPF_FUNC_inode_storage_delete FunctionId = 146 + BPF_FUNC_d_path FunctionId = 147 + BPF_FUNC_copy_from_user FunctionId = 148 + BPF_FUNC_snprintf_btf FunctionId = 149 + BPF_FUNC_seq_printf_btf FunctionId = 150 + BPF_FUNC_skb_cgroup_classid FunctionId = 151 + BPF_FUNC_redirect_neigh FunctionId = 152 + BPF_FUNC_per_cpu_ptr FunctionId = 153 + BPF_FUNC_this_cpu_ptr FunctionId = 154 + BPF_FUNC_redirect_peer FunctionId = 155 + BPF_FUNC_task_storage_get FunctionId = 156 + BPF_FUNC_task_storage_delete FunctionId = 157 + BPF_FUNC_get_current_task_btf FunctionId = 158 + BPF_FUNC_bprm_opts_set FunctionId = 159 + BPF_FUNC_ktime_get_coarse_ns FunctionId = 160 + BPF_FUNC_ima_inode_hash FunctionId = 161 + BPF_FUNC_sock_from_file FunctionId = 162 + BPF_FUNC_check_mtu FunctionId = 163 + BPF_FUNC_for_each_map_elem FunctionId = 164 + BPF_FUNC_snprintf FunctionId = 165 + BPF_FUNC_sys_bpf FunctionId = 166 + BPF_FUNC_btf_find_by_name_kind FunctionId = 167 + BPF_FUNC_sys_close FunctionId = 168 + BPF_FUNC_timer_init FunctionId = 169 + BPF_FUNC_timer_set_callback FunctionId = 170 + BPF_FUNC_timer_start FunctionId = 171 + BPF_FUNC_timer_cancel FunctionId = 172 + BPF_FUNC_get_func_ip FunctionId = 173 + BPF_FUNC_get_attach_cookie FunctionId = 174 + BPF_FUNC_task_pt_regs FunctionId = 175 + BPF_FUNC_get_branch_snapshot FunctionId = 176 + BPF_FUNC_trace_vprintk FunctionId = 177 + BPF_FUNC_skc_to_unix_sock FunctionId = 178 + BPF_FUNC_kallsyms_lookup_name FunctionId = 179 + BPF_FUNC_find_vma FunctionId = 180 + BPF_FUNC_loop FunctionId = 181 + BPF_FUNC_strncmp FunctionId = 182 + BPF_FUNC_get_func_arg FunctionId = 183 + BPF_FUNC_get_func_ret FunctionId = 184 + BPF_FUNC_get_func_arg_cnt FunctionId = 185 + BPF_FUNC_get_retval FunctionId = 186 + BPF_FUNC_set_retval FunctionId = 187 + BPF_FUNC_xdp_get_buff_len FunctionId = 188 + BPF_FUNC_xdp_load_bytes FunctionId = 189 + BPF_FUNC_xdp_store_bytes FunctionId = 190 + BPF_FUNC_copy_from_user_task FunctionId = 191 + BPF_FUNC_skb_set_tstamp FunctionId = 192 + BPF_FUNC_ima_file_hash FunctionId = 193 + __BPF_FUNC_MAX_ID FunctionId = 194 +) + +type HdrStartOff int32 + +const ( + BPF_HDR_START_MAC HdrStartOff = 0 + BPF_HDR_START_NET HdrStartOff = 1 +) + +type LinkType int32 + +const ( + BPF_LINK_TYPE_UNSPEC LinkType = 0 + BPF_LINK_TYPE_RAW_TRACEPOINT LinkType = 1 + BPF_LINK_TYPE_TRACING LinkType = 2 + BPF_LINK_TYPE_CGROUP LinkType = 3 + BPF_LINK_TYPE_ITER LinkType = 4 + BPF_LINK_TYPE_NETNS LinkType = 5 + BPF_LINK_TYPE_XDP LinkType = 6 + BPF_LINK_TYPE_PERF_EVENT LinkType = 7 + BPF_LINK_TYPE_KPROBE_MULTI LinkType = 8 + MAX_BPF_LINK_TYPE LinkType = 9 +) + +type MapType int32 + +const ( + BPF_MAP_TYPE_UNSPEC MapType = 0 + BPF_MAP_TYPE_HASH MapType = 1 + BPF_MAP_TYPE_ARRAY MapType = 2 + BPF_MAP_TYPE_PROG_ARRAY MapType = 3 + BPF_MAP_TYPE_PERF_EVENT_ARRAY MapType = 4 + BPF_MAP_TYPE_PERCPU_HASH MapType = 5 + BPF_MAP_TYPE_PERCPU_ARRAY MapType = 6 + BPF_MAP_TYPE_STACK_TRACE MapType = 7 + BPF_MAP_TYPE_CGROUP_ARRAY MapType = 8 + BPF_MAP_TYPE_LRU_HASH MapType = 9 + BPF_MAP_TYPE_LRU_PERCPU_HASH MapType = 10 + BPF_MAP_TYPE_LPM_TRIE MapType = 11 + BPF_MAP_TYPE_ARRAY_OF_MAPS MapType = 12 + BPF_MAP_TYPE_HASH_OF_MAPS MapType = 13 + BPF_MAP_TYPE_DEVMAP MapType = 14 + BPF_MAP_TYPE_SOCKMAP MapType = 15 + BPF_MAP_TYPE_CPUMAP MapType = 16 + BPF_MAP_TYPE_XSKMAP MapType = 17 + BPF_MAP_TYPE_SOCKHASH MapType = 18 + BPF_MAP_TYPE_CGROUP_STORAGE MapType = 19 + BPF_MAP_TYPE_REUSEPORT_SOCKARRAY MapType = 20 + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE MapType = 21 + BPF_MAP_TYPE_QUEUE MapType = 22 + BPF_MAP_TYPE_STACK MapType = 23 + BPF_MAP_TYPE_SK_STORAGE MapType = 24 + BPF_MAP_TYPE_DEVMAP_HASH MapType = 25 + BPF_MAP_TYPE_STRUCT_OPS MapType = 26 + BPF_MAP_TYPE_RINGBUF MapType = 27 + BPF_MAP_TYPE_INODE_STORAGE MapType = 28 + BPF_MAP_TYPE_TASK_STORAGE MapType = 29 + BPF_MAP_TYPE_BLOOM_FILTER MapType = 30 +) + +type ProgType int32 + +const ( + BPF_PROG_TYPE_UNSPEC ProgType = 0 + BPF_PROG_TYPE_SOCKET_FILTER ProgType = 1 + BPF_PROG_TYPE_KPROBE ProgType = 2 + BPF_PROG_TYPE_SCHED_CLS ProgType = 3 + BPF_PROG_TYPE_SCHED_ACT ProgType = 4 + BPF_PROG_TYPE_TRACEPOINT ProgType = 5 + BPF_PROG_TYPE_XDP ProgType = 6 + BPF_PROG_TYPE_PERF_EVENT ProgType = 7 + BPF_PROG_TYPE_CGROUP_SKB ProgType = 8 + BPF_PROG_TYPE_CGROUP_SOCK ProgType = 9 + BPF_PROG_TYPE_LWT_IN ProgType = 10 + BPF_PROG_TYPE_LWT_OUT ProgType = 11 + BPF_PROG_TYPE_LWT_XMIT ProgType = 12 + BPF_PROG_TYPE_SOCK_OPS ProgType = 13 + BPF_PROG_TYPE_SK_SKB ProgType = 14 + BPF_PROG_TYPE_CGROUP_DEVICE ProgType = 15 + BPF_PROG_TYPE_SK_MSG ProgType = 16 + BPF_PROG_TYPE_RAW_TRACEPOINT ProgType = 17 + BPF_PROG_TYPE_CGROUP_SOCK_ADDR ProgType = 18 + BPF_PROG_TYPE_LWT_SEG6LOCAL ProgType = 19 + BPF_PROG_TYPE_LIRC_MODE2 ProgType = 20 + BPF_PROG_TYPE_SK_REUSEPORT ProgType = 21 + BPF_PROG_TYPE_FLOW_DISSECTOR ProgType = 22 + BPF_PROG_TYPE_CGROUP_SYSCTL ProgType = 23 + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE ProgType = 24 + BPF_PROG_TYPE_CGROUP_SOCKOPT ProgType = 25 + BPF_PROG_TYPE_TRACING ProgType = 26 + BPF_PROG_TYPE_STRUCT_OPS ProgType = 27 + BPF_PROG_TYPE_EXT ProgType = 28 + BPF_PROG_TYPE_LSM ProgType = 29 + BPF_PROG_TYPE_SK_LOOKUP ProgType = 30 + BPF_PROG_TYPE_SYSCALL ProgType = 31 +) + +type RetCode int32 + +const ( + BPF_OK RetCode = 0 + BPF_DROP RetCode = 2 + BPF_REDIRECT RetCode = 7 + BPF_LWT_REROUTE RetCode = 128 +) + +type SkAction int32 + +const ( + SK_DROP SkAction = 0 + SK_PASS SkAction = 1 +) + +type StackBuildIdStatus int32 + +const ( + BPF_STACK_BUILD_ID_EMPTY StackBuildIdStatus = 0 + BPF_STACK_BUILD_ID_VALID StackBuildIdStatus = 1 + BPF_STACK_BUILD_ID_IP StackBuildIdStatus = 2 +) + +type StatsType int32 + +const ( + BPF_STATS_RUN_TIME StatsType = 0 +) + +type XdpAction int32 + +const ( + XDP_ABORTED XdpAction = 0 + XDP_DROP XdpAction = 1 + XDP_PASS XdpAction = 2 + XDP_TX XdpAction = 3 + XDP_REDIRECT XdpAction = 4 +) + +type BtfInfo struct { + Btf Pointer + BtfSize uint32 + Id BTFID + Name Pointer + NameLen uint32 + KernelBtf uint32 +} + +type FuncInfo struct { + InsnOff uint32 + TypeId uint32 +} + +type LineInfo struct { + InsnOff uint32 + FileNameOff uint32 + LineOff uint32 + LineCol uint32 +} + +type LinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + Extra [16]uint8 +} + +type MapInfo struct { + Type uint32 + Id uint32 + KeySize uint32 + ValueSize uint32 + MaxEntries uint32 + MapFlags uint32 + Name ObjName + Ifindex uint32 + BtfVmlinuxValueTypeId uint32 + NetnsDev uint64 + NetnsIno uint64 + BtfId uint32 + BtfKeyTypeId uint32 + BtfValueTypeId uint32 + _ [4]byte + MapExtra uint64 +} + +type ProgInfo struct { + Type uint32 + Id uint32 + Tag [8]uint8 + JitedProgLen uint32 + XlatedProgLen uint32 + JitedProgInsns uint64 + XlatedProgInsns Pointer + LoadTime uint64 + CreatedByUid uint32 + NrMapIds uint32 + MapIds Pointer + Name ObjName + Ifindex uint32 + _ [4]byte /* unsupported bitfield */ + NetnsDev uint64 + NetnsIno uint64 + NrJitedKsyms uint32 + NrJitedFuncLens uint32 + JitedKsyms uint64 + JitedFuncLens uint64 + BtfId uint32 + FuncInfoRecSize uint32 + FuncInfo uint64 + NrFuncInfo uint32 + NrLineInfo uint32 + LineInfo uint64 + JitedLineInfo uint64 + NrJitedLineInfo uint32 + LineInfoRecSize uint32 + JitedLineInfoRecSize uint32 + NrProgTags uint32 + ProgTags uint64 + RunTimeNs uint64 + RunCnt uint64 + RecursionMisses uint64 + VerifiedInsns uint32 + _ [4]byte +} + +type SkLookup struct { + Cookie uint64 + Family uint32 + Protocol uint32 + RemoteIp4 [4]uint8 + RemoteIp6 [16]uint8 + RemotePort uint16 + _ [2]byte + LocalIp4 [4]uint8 + LocalIp6 [16]uint8 + LocalPort uint32 + IngressIfindex uint32 + _ [4]byte +} + +type XdpMd struct { + Data uint32 + DataEnd uint32 + DataMeta uint32 + IngressIfindex uint32 + RxQueueIndex uint32 + EgressIfindex uint32 +} + +type BtfGetFdByIdAttr struct{ Id uint32 } + +func BtfGetFdById(attr *BtfGetFdByIdAttr) (*FD, error) { + fd, err := BPF(BPF_BTF_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type BtfGetNextIdAttr struct { + Id BTFID + NextId BTFID +} + +func BtfGetNextId(attr *BtfGetNextIdAttr) error { + _, err := BPF(BPF_BTF_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type BtfLoadAttr struct { + Btf Pointer + BtfLogBuf Pointer + BtfSize uint32 + BtfLogSize uint32 + BtfLogLevel uint32 + _ [4]byte +} + +func BtfLoad(attr *BtfLoadAttr) (*FD, error) { + fd, err := BPF(BPF_BTF_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type EnableStatsAttr struct{ Type uint32 } + +func EnableStats(attr *EnableStatsAttr) (*FD, error) { + fd, err := BPF(BPF_ENABLE_STATS, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type IterCreateAttr struct { + LinkFd uint32 + Flags uint32 +} + +func IterCreate(attr *IterCreateAttr) (*FD, error) { + fd, err := BPF(BPF_ITER_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkCreateAttr struct { + ProgFd uint32 + TargetFd uint32 + AttachType AttachType + Flags uint32 + TargetBtfId uint32 + _ [28]byte +} + +func LinkCreate(attr *LinkCreateAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkCreateIterAttr struct { + ProgFd uint32 + TargetFd uint32 + AttachType AttachType + Flags uint32 + IterInfo Pointer + IterInfoLen uint32 + _ [20]byte +} + +func LinkCreateIter(attr *LinkCreateIterAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkCreatePerfEventAttr struct { + ProgFd uint32 + TargetFd uint32 + AttachType AttachType + Flags uint32 + BpfCookie uint64 + _ [24]byte +} + +func LinkCreatePerfEvent(attr *LinkCreatePerfEventAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkUpdateAttr struct { + LinkFd uint32 + NewProgFd uint32 + Flags uint32 + OldProgFd uint32 +} + +func LinkUpdate(attr *LinkUpdateAttr) error { + _, err := BPF(BPF_LINK_UPDATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapCreateAttr struct { + MapType MapType + KeySize uint32 + ValueSize uint32 + MaxEntries uint32 + MapFlags uint32 + InnerMapFd uint32 + NumaNode uint32 + MapName ObjName + MapIfindex uint32 + BtfFd uint32 + BtfKeyTypeId uint32 + BtfValueTypeId uint32 + BtfVmlinuxValueTypeId uint32 + MapExtra uint64 +} + +func MapCreate(attr *MapCreateAttr) (*FD, error) { + fd, err := BPF(BPF_MAP_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type MapDeleteBatchAttr struct { + InBatch Pointer + OutBatch Pointer + Keys Pointer + Values Pointer + Count uint32 + MapFd uint32 + ElemFlags uint64 + Flags uint64 +} + +func MapDeleteBatch(attr *MapDeleteBatchAttr) error { + _, err := BPF(BPF_MAP_DELETE_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapDeleteElemAttr struct { + MapFd uint32 + _ [4]byte + Key Pointer + Value Pointer + Flags uint64 +} + +func MapDeleteElem(attr *MapDeleteElemAttr) error { + _, err := BPF(BPF_MAP_DELETE_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapFreezeAttr struct{ MapFd uint32 } + +func MapFreeze(attr *MapFreezeAttr) error { + _, err := BPF(BPF_MAP_FREEZE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapGetFdByIdAttr struct{ Id uint32 } + +func MapGetFdById(attr *MapGetFdByIdAttr) (*FD, error) { + fd, err := BPF(BPF_MAP_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type MapGetNextIdAttr struct { + Id uint32 + NextId uint32 +} + +func MapGetNextId(attr *MapGetNextIdAttr) error { + _, err := BPF(BPF_MAP_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapGetNextKeyAttr struct { + MapFd uint32 + _ [4]byte + Key Pointer + NextKey Pointer +} + +func MapGetNextKey(attr *MapGetNextKeyAttr) error { + _, err := BPF(BPF_MAP_GET_NEXT_KEY, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapLookupAndDeleteBatchAttr struct { + InBatch Pointer + OutBatch Pointer + Keys Pointer + Values Pointer + Count uint32 + MapFd uint32 + ElemFlags uint64 + Flags uint64 +} + +func MapLookupAndDeleteBatch(attr *MapLookupAndDeleteBatchAttr) error { + _, err := BPF(BPF_MAP_LOOKUP_AND_DELETE_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapLookupAndDeleteElemAttr struct { + MapFd uint32 + _ [4]byte + Key Pointer + Value Pointer + Flags uint64 +} + +func MapLookupAndDeleteElem(attr *MapLookupAndDeleteElemAttr) error { + _, err := BPF(BPF_MAP_LOOKUP_AND_DELETE_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapLookupBatchAttr struct { + InBatch Pointer + OutBatch Pointer + Keys Pointer + Values Pointer + Count uint32 + MapFd uint32 + ElemFlags uint64 + Flags uint64 +} + +func MapLookupBatch(attr *MapLookupBatchAttr) error { + _, err := BPF(BPF_MAP_LOOKUP_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapLookupElemAttr struct { + MapFd uint32 + _ [4]byte + Key Pointer + Value Pointer + Flags uint64 +} + +func MapLookupElem(attr *MapLookupElemAttr) error { + _, err := BPF(BPF_MAP_LOOKUP_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapUpdateBatchAttr struct { + InBatch Pointer + OutBatch Pointer + Keys Pointer + Values Pointer + Count uint32 + MapFd uint32 + ElemFlags uint64 + Flags uint64 +} + +func MapUpdateBatch(attr *MapUpdateBatchAttr) error { + _, err := BPF(BPF_MAP_UPDATE_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type MapUpdateElemAttr struct { + MapFd uint32 + _ [4]byte + Key Pointer + Value Pointer + Flags uint64 +} + +func MapUpdateElem(attr *MapUpdateElemAttr) error { + _, err := BPF(BPF_MAP_UPDATE_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type ObjGetAttr struct { + Pathname Pointer + BpfFd uint32 + FileFlags uint32 +} + +func ObjGet(attr *ObjGetAttr) (*FD, error) { + fd, err := BPF(BPF_OBJ_GET, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type ObjGetInfoByFdAttr struct { + BpfFd uint32 + InfoLen uint32 + Info Pointer +} + +func ObjGetInfoByFd(attr *ObjGetInfoByFdAttr) error { + _, err := BPF(BPF_OBJ_GET_INFO_BY_FD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type ObjPinAttr struct { + Pathname Pointer + BpfFd uint32 + FileFlags uint32 +} + +func ObjPin(attr *ObjPinAttr) error { + _, err := BPF(BPF_OBJ_PIN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type ProgAttachAttr struct { + TargetFd uint32 + AttachBpfFd uint32 + AttachType uint32 + AttachFlags uint32 + ReplaceBpfFd uint32 +} + +func ProgAttach(attr *ProgAttachAttr) error { + _, err := BPF(BPF_PROG_ATTACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type ProgBindMapAttr struct { + ProgFd uint32 + MapFd uint32 + Flags uint32 +} + +func ProgBindMap(attr *ProgBindMapAttr) error { + _, err := BPF(BPF_PROG_BIND_MAP, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type ProgDetachAttr struct { + TargetFd uint32 + AttachBpfFd uint32 + AttachType uint32 +} + +func ProgDetach(attr *ProgDetachAttr) error { + _, err := BPF(BPF_PROG_DETACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type ProgGetFdByIdAttr struct{ Id uint32 } + +func ProgGetFdById(attr *ProgGetFdByIdAttr) (*FD, error) { + fd, err := BPF(BPF_PROG_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type ProgGetNextIdAttr struct { + Id uint32 + NextId uint32 +} + +func ProgGetNextId(attr *ProgGetNextIdAttr) error { + _, err := BPF(BPF_PROG_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type ProgLoadAttr struct { + ProgType ProgType + InsnCnt uint32 + Insns Pointer + License Pointer + LogLevel uint32 + LogSize uint32 + LogBuf Pointer + KernVersion uint32 + ProgFlags uint32 + ProgName ObjName + ProgIfindex uint32 + ExpectedAttachType AttachType + ProgBtfFd uint32 + FuncInfoRecSize uint32 + FuncInfo Pointer + FuncInfoCnt uint32 + LineInfoRecSize uint32 + LineInfo Pointer + LineInfoCnt uint32 + AttachBtfId uint32 + AttachProgFd uint32 + CoreReloCnt uint32 + FdArray Pointer + CoreRelos Pointer + CoreReloRecSize uint32 + _ [4]byte +} + +func ProgLoad(attr *ProgLoadAttr) (*FD, error) { + fd, err := BPF(BPF_PROG_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type ProgRunAttr struct { + ProgFd uint32 + Retval uint32 + DataSizeIn uint32 + DataSizeOut uint32 + DataIn Pointer + DataOut Pointer + Repeat uint32 + Duration uint32 + CtxSizeIn uint32 + CtxSizeOut uint32 + CtxIn Pointer + CtxOut Pointer + Flags uint32 + Cpu uint32 + BatchSize uint32 + _ [4]byte +} + +func ProgRun(attr *ProgRunAttr) error { + _, err := BPF(BPF_PROG_TEST_RUN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +type RawTracepointOpenAttr struct { + Name Pointer + ProgFd uint32 + _ [4]byte +} + +func RawTracepointOpen(attr *RawTracepointOpenAttr) (*FD, error) { + fd, err := BPF(BPF_RAW_TRACEPOINT_OPEN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type CgroupLinkInfo struct { + CgroupId uint64 + AttachType AttachType + _ [4]byte +} + +type IterLinkInfo struct { + TargetName Pointer + TargetNameLen uint32 +} + +type NetNsLinkInfo struct { + NetnsIno uint32 + AttachType AttachType +} + +type RawTracepointLinkInfo struct { + TpName Pointer + TpNameLen uint32 + _ [4]byte +} + +type TracingLinkInfo struct { + AttachType AttachType + TargetObjId uint32 + TargetBtfId uint32 +} + +type XDPLinkInfo struct{ Ifindex uint32 } diff --git a/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go b/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go new file mode 100644 index 000000000..db4a1f5bf --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go @@ -0,0 +1,210 @@ +//go:build linux +// +build linux + +package unix + +import ( + "syscall" + + linux "golang.org/x/sys/unix" +) + +const ( + ENOENT = linux.ENOENT + EEXIST = linux.EEXIST + EAGAIN = linux.EAGAIN + ENOSPC = linux.ENOSPC + EINVAL = linux.EINVAL + EPOLLIN = linux.EPOLLIN + EINTR = linux.EINTR + EPERM = linux.EPERM + ESRCH = linux.ESRCH + ENODEV = linux.ENODEV + EBADF = linux.EBADF + E2BIG = linux.E2BIG + EFAULT = linux.EFAULT + EACCES = linux.EACCES + // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP + ENOTSUPP = syscall.Errno(0x20c) + + BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC + BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE + BPF_F_RDONLY = linux.BPF_F_RDONLY + BPF_F_WRONLY = linux.BPF_F_WRONLY + BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG + BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG + BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE + BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE + BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP + BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN + BPF_TAG_SIZE = linux.BPF_TAG_SIZE + BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT + BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT + BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ + SYS_BPF = linux.SYS_BPF + F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC + EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD + EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC + O_CLOEXEC = linux.O_CLOEXEC + O_NONBLOCK = linux.O_NONBLOCK + PROT_READ = linux.PROT_READ + PROT_WRITE = linux.PROT_WRITE + MAP_SHARED = linux.MAP_SHARED + PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1 + PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE + PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT + PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT + PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE + PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE + PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF + PerfBitWatermark = linux.PerfBitWatermark + PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW + PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC + RLIM_INFINITY = linux.RLIM_INFINITY + RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK + BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME + PERF_RECORD_LOST = linux.PERF_RECORD_LOST + PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE + AT_FDCWD = linux.AT_FDCWD + RENAME_NOREPLACE = linux.RENAME_NOREPLACE + SO_ATTACH_BPF = linux.SO_ATTACH_BPF + SO_DETACH_BPF = linux.SO_DETACH_BPF + SOL_SOCKET = linux.SOL_SOCKET +) + +// Statfs_t is a wrapper +type Statfs_t = linux.Statfs_t + +type Stat_t = linux.Stat_t + +// Rlimit is a wrapper +type Rlimit = linux.Rlimit + +// Syscall is a wrapper +func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { + return linux.Syscall(trap, a1, a2, a3) +} + +// FcntlInt is a wrapper +func FcntlInt(fd uintptr, cmd, arg int) (int, error) { + return linux.FcntlInt(fd, cmd, arg) +} + +// IoctlSetInt is a wrapper +func IoctlSetInt(fd int, req uint, value int) error { + return linux.IoctlSetInt(fd, req, value) +} + +// Statfs is a wrapper +func Statfs(path string, buf *Statfs_t) (err error) { + return linux.Statfs(path, buf) +} + +// Close is a wrapper +func Close(fd int) (err error) { + return linux.Close(fd) +} + +// EpollEvent is a wrapper +type EpollEvent = linux.EpollEvent + +// EpollWait is a wrapper +func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { + return linux.EpollWait(epfd, events, msec) +} + +// EpollCtl is a wrapper +func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { + return linux.EpollCtl(epfd, op, fd, event) +} + +// Eventfd is a wrapper +func Eventfd(initval uint, flags int) (fd int, err error) { + return linux.Eventfd(initval, flags) +} + +// Write is a wrapper +func Write(fd int, p []byte) (n int, err error) { + return linux.Write(fd, p) +} + +// EpollCreate1 is a wrapper +func EpollCreate1(flag int) (fd int, err error) { + return linux.EpollCreate1(flag) +} + +// PerfEventMmapPage is a wrapper +type PerfEventMmapPage linux.PerfEventMmapPage + +// SetNonblock is a wrapper +func SetNonblock(fd int, nonblocking bool) (err error) { + return linux.SetNonblock(fd, nonblocking) +} + +// Mmap is a wrapper +func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { + return linux.Mmap(fd, offset, length, prot, flags) +} + +// Munmap is a wrapper +func Munmap(b []byte) (err error) { + return linux.Munmap(b) +} + +// PerfEventAttr is a wrapper +type PerfEventAttr = linux.PerfEventAttr + +// PerfEventOpen is a wrapper +func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags) +} + +// Utsname is a wrapper +type Utsname = linux.Utsname + +// Uname is a wrapper +func Uname(buf *Utsname) (err error) { + return linux.Uname(buf) +} + +// Getpid is a wrapper +func Getpid() int { + return linux.Getpid() +} + +// Gettid is a wrapper +func Gettid() int { + return linux.Gettid() +} + +// Tgkill is a wrapper +func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { + return linux.Tgkill(tgid, tid, sig) +} + +// BytePtrFromString is a wrapper +func BytePtrFromString(s string) (*byte, error) { + return linux.BytePtrFromString(s) +} + +// ByteSliceToString is a wrapper +func ByteSliceToString(s []byte) string { + return linux.ByteSliceToString(s) +} + +// Renameat2 is a wrapper +func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { + return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags) +} + +func Prlimit(pid, resource int, new, old *Rlimit) error { + return linux.Prlimit(pid, resource, new, old) +} + +func Open(path string, mode int, perm uint32) (int, error) { + return linux.Open(path, mode, perm) +} + +func Fstat(fd int, stat *Stat_t) error { + return linux.Fstat(fd, stat) +} diff --git a/vendor/github.com/cilium/ebpf/internal/unix/types_other.go b/vendor/github.com/cilium/ebpf/internal/unix/types_other.go new file mode 100644 index 000000000..133c267db --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/unix/types_other.go @@ -0,0 +1,278 @@ +//go:build !linux +// +build !linux + +package unix + +import ( + "fmt" + "runtime" + "syscall" +) + +var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) + +const ( + ENOENT = syscall.ENOENT + EEXIST = syscall.EEXIST + EAGAIN = syscall.EAGAIN + ENOSPC = syscall.ENOSPC + EINVAL = syscall.EINVAL + EINTR = syscall.EINTR + EPERM = syscall.EPERM + ESRCH = syscall.ESRCH + ENODEV = syscall.ENODEV + EBADF = syscall.Errno(0) + E2BIG = syscall.Errno(0) + EFAULT = syscall.EFAULT + EACCES = syscall.Errno(0) + // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP + ENOTSUPP = syscall.Errno(0x20c) + + BPF_F_NO_PREALLOC = 0 + BPF_F_NUMA_NODE = 0 + BPF_F_RDONLY = 0 + BPF_F_WRONLY = 0 + BPF_F_RDONLY_PROG = 0 + BPF_F_WRONLY_PROG = 0 + BPF_F_SLEEPABLE = 0 + BPF_F_MMAPABLE = 0 + BPF_F_INNER_MAP = 0 + BPF_OBJ_NAME_LEN = 0x10 + BPF_TAG_SIZE = 0x8 + BPF_RINGBUF_BUSY_BIT = 0 + BPF_RINGBUF_DISCARD_BIT = 0 + BPF_RINGBUF_HDR_SZ = 0 + SYS_BPF = 321 + F_DUPFD_CLOEXEC = 0x406 + EPOLLIN = 0x1 + EPOLL_CTL_ADD = 0x1 + EPOLL_CLOEXEC = 0x80000 + O_CLOEXEC = 0x80000 + O_NONBLOCK = 0x800 + PROT_READ = 0x1 + PROT_WRITE = 0x2 + MAP_SHARED = 0x1 + PERF_ATTR_SIZE_VER1 = 0 + PERF_TYPE_SOFTWARE = 0x1 + PERF_TYPE_TRACEPOINT = 0 + PERF_COUNT_SW_BPF_OUTPUT = 0xa + PERF_EVENT_IOC_DISABLE = 0 + PERF_EVENT_IOC_ENABLE = 0 + PERF_EVENT_IOC_SET_BPF = 0 + PerfBitWatermark = 0x4000 + PERF_SAMPLE_RAW = 0x400 + PERF_FLAG_FD_CLOEXEC = 0x8 + RLIM_INFINITY = 0x7fffffffffffffff + RLIMIT_MEMLOCK = 8 + BPF_STATS_RUN_TIME = 0 + PERF_RECORD_LOST = 2 + PERF_RECORD_SAMPLE = 9 + AT_FDCWD = -0x2 + RENAME_NOREPLACE = 0x1 + SO_ATTACH_BPF = 0x32 + SO_DETACH_BPF = 0x1b + SOL_SOCKET = 0x1 +) + +// Statfs_t is a wrapper +type Statfs_t struct { + Type int64 + Bsize int64 + Blocks uint64 + Bfree uint64 + Bavail uint64 + Files uint64 + Ffree uint64 + Fsid [2]int32 + Namelen int64 + Frsize int64 + Flags int64 + Spare [4]int64 +} + +type Stat_t struct{} + +// Rlimit is a wrapper +type Rlimit struct { + Cur uint64 + Max uint64 +} + +// Syscall is a wrapper +func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { + return 0, 0, syscall.Errno(1) +} + +// FcntlInt is a wrapper +func FcntlInt(fd uintptr, cmd, arg int) (int, error) { + return -1, errNonLinux +} + +// IoctlSetInt is a wrapper +func IoctlSetInt(fd int, req uint, value int) error { + return errNonLinux +} + +// Statfs is a wrapper +func Statfs(path string, buf *Statfs_t) error { + return errNonLinux +} + +// Close is a wrapper +func Close(fd int) (err error) { + return errNonLinux +} + +// EpollEvent is a wrapper +type EpollEvent struct { + Events uint32 + Fd int32 + Pad int32 +} + +// EpollWait is a wrapper +func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { + return 0, errNonLinux +} + +// EpollCtl is a wrapper +func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { + return errNonLinux +} + +// Eventfd is a wrapper +func Eventfd(initval uint, flags int) (fd int, err error) { + return 0, errNonLinux +} + +// Write is a wrapper +func Write(fd int, p []byte) (n int, err error) { + return 0, errNonLinux +} + +// EpollCreate1 is a wrapper +func EpollCreate1(flag int) (fd int, err error) { + return 0, errNonLinux +} + +// PerfEventMmapPage is a wrapper +type PerfEventMmapPage struct { + Version uint32 + Compat_version uint32 + Lock uint32 + Index uint32 + Offset int64 + Time_enabled uint64 + Time_running uint64 + Capabilities uint64 + Pmc_width uint16 + Time_shift uint16 + Time_mult uint32 + Time_offset uint64 + Time_zero uint64 + Size uint32 + + Data_head uint64 + Data_tail uint64 + Data_offset uint64 + Data_size uint64 + Aux_head uint64 + Aux_tail uint64 + Aux_offset uint64 + Aux_size uint64 +} + +// SetNonblock is a wrapper +func SetNonblock(fd int, nonblocking bool) (err error) { + return errNonLinux +} + +// Mmap is a wrapper +func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { + return []byte{}, errNonLinux +} + +// Munmap is a wrapper +func Munmap(b []byte) (err error) { + return errNonLinux +} + +// PerfEventAttr is a wrapper +type PerfEventAttr struct { + Type uint32 + Size uint32 + Config uint64 + Sample uint64 + Sample_type uint64 + Read_format uint64 + Bits uint64 + Wakeup uint32 + Bp_type uint32 + Ext1 uint64 + Ext2 uint64 + Branch_sample_type uint64 + Sample_regs_user uint64 + Sample_stack_user uint32 + Clockid int32 + Sample_regs_intr uint64 + Aux_watermark uint32 + Sample_max_stack uint16 +} + +// PerfEventOpen is a wrapper +func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return 0, errNonLinux +} + +// Utsname is a wrapper +type Utsname struct { + Release [65]byte + Version [65]byte +} + +// Uname is a wrapper +func Uname(buf *Utsname) (err error) { + return errNonLinux +} + +// Getpid is a wrapper +func Getpid() int { + return -1 +} + +// Gettid is a wrapper +func Gettid() int { + return -1 +} + +// Tgkill is a wrapper +func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { + return errNonLinux +} + +// BytePtrFromString is a wrapper +func BytePtrFromString(s string) (*byte, error) { + return nil, errNonLinux +} + +// ByteSliceToString is a wrapper +func ByteSliceToString(s []byte) string { + return "" +} + +// Renameat2 is a wrapper +func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { + return errNonLinux +} + +func Prlimit(pid, resource int, new, old *Rlimit) error { + return errNonLinux +} + +func Open(path string, mode int, perm uint32) (int, error) { + return -1, errNonLinux +} + +func Fstat(fd int, stat *Stat_t) error { + return errNonLinux +} diff --git a/vendor/github.com/cilium/ebpf/internal/vdso.go b/vendor/github.com/cilium/ebpf/internal/vdso.go new file mode 100644 index 000000000..ae4821de2 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/vdso.go @@ -0,0 +1,150 @@ +package internal + +import ( + "debug/elf" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "os" + + "github.com/cilium/ebpf/internal/unix" +) + +var ( + errAuxvNoVDSO = errors.New("no vdso address found in auxv") +) + +// vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library +// linked into the current process image. +func vdsoVersion() (uint32, error) { + // Read data from the auxiliary vector, which is normally passed directly + // to the process. Go does not expose that data, so we must read it from procfs. + // https://man7.org/linux/man-pages/man3/getauxval.3.html + av, err := os.Open("/proc/self/auxv") + if err != nil { + return 0, fmt.Errorf("opening auxv: %w", err) + } + defer av.Close() + + vdsoAddr, err := vdsoMemoryAddress(av) + if err != nil { + return 0, fmt.Errorf("finding vDSO memory address: %w", err) + } + + // Use /proc/self/mem rather than unsafe.Pointer tricks. + mem, err := os.Open("/proc/self/mem") + if err != nil { + return 0, fmt.Errorf("opening mem: %w", err) + } + defer mem.Close() + + // Open ELF at provided memory address, as offset into /proc/self/mem. + c, err := vdsoLinuxVersionCode(io.NewSectionReader(mem, int64(vdsoAddr), math.MaxInt64)) + if err != nil { + return 0, fmt.Errorf("reading linux version code: %w", err) + } + + return c, nil +} + +// vdsoMemoryAddress returns the memory address of the vDSO library +// linked into the current process image. r is an io.Reader into an auxv blob. +func vdsoMemoryAddress(r io.Reader) (uint64, error) { + const ( + _AT_NULL = 0 // End of vector + _AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image + ) + + // Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`, + // the address of a page containing the virtual Dynamic Shared Object (vDSO). + aux := struct{ Tag, Val uint64 }{} + for { + if err := binary.Read(r, NativeEndian, &aux); err != nil { + return 0, fmt.Errorf("reading auxv entry: %w", err) + } + + switch aux.Tag { + case _AT_SYSINFO_EHDR: + if aux.Val != 0 { + return aux.Val, nil + } + return 0, fmt.Errorf("invalid vDSO address in auxv") + // _AT_NULL is always the last tag/val pair in the aux vector + // and can be treated like EOF. + case _AT_NULL: + return 0, errAuxvNoVDSO + } + } +} + +// format described at https://www.man7.org/linux/man-pages/man5/elf.5.html in section 'Notes (Nhdr)' +type elfNoteHeader struct { + NameSize int32 + DescSize int32 + Type int32 +} + +// vdsoLinuxVersionCode returns the LINUX_VERSION_CODE embedded in +// the ELF notes section of the binary provided by the reader. +func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) { + hdr, err := NewSafeELFFile(r) + if err != nil { + return 0, fmt.Errorf("reading vDSO ELF: %w", err) + } + + sections := hdr.SectionsByType(elf.SHT_NOTE) + if len(sections) == 0 { + return 0, fmt.Errorf("no note section found in vDSO ELF") + } + + for _, sec := range sections { + sr := sec.Open() + var n elfNoteHeader + + // Read notes until we find one named 'Linux'. + for { + if err := binary.Read(sr, hdr.ByteOrder, &n); err != nil { + if errors.Is(err, io.EOF) { + // We looked at all the notes in this section + break + } + return 0, fmt.Errorf("reading note header: %w", err) + } + + // If a note name is defined, it follows the note header. + var name string + if n.NameSize > 0 { + // Read the note name, aligned to 4 bytes. + buf := make([]byte, Align(int(n.NameSize), 4)) + if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil { + return 0, fmt.Errorf("reading note name: %w", err) + } + + // Read nul-terminated string. + name = unix.ByteSliceToString(buf[:n.NameSize]) + } + + // If a note descriptor is defined, it follows the name. + // It is possible for a note to have a descriptor but not a name. + if n.DescSize > 0 { + // LINUX_VERSION_CODE is a uint32 value. + if name == "Linux" && n.DescSize == 4 && n.Type == 0 { + var version uint32 + if err := binary.Read(sr, hdr.ByteOrder, &version); err != nil { + return 0, fmt.Errorf("reading note descriptor: %w", err) + } + return version, nil + } + + // Discard the note descriptor if it exists but we're not interested in it. + if _, err := io.CopyN(io.Discard, sr, int64(Align(int(n.DescSize), 4))); err != nil { + return 0, err + } + } + } + } + + return 0, fmt.Errorf("no Linux note in ELF") +} diff --git a/vendor/github.com/cilium/ebpf/internal/version.go b/vendor/github.com/cilium/ebpf/internal/version.go new file mode 100644 index 000000000..370e01e44 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/version.go @@ -0,0 +1,122 @@ +package internal + +import ( + "fmt" + "sync" + + "github.com/cilium/ebpf/internal/unix" +) + +const ( + // Version constant used in ELF binaries indicating that the loader needs to + // substitute the eBPF program's version with the value of the kernel's + // KERNEL_VERSION compile-time macro. Used for compatibility with BCC, gobpf + // and RedSift. + MagicKernelVersion = 0xFFFFFFFE +) + +var ( + kernelVersion = struct { + once sync.Once + version Version + err error + }{} +) + +// A Version in the form Major.Minor.Patch. +type Version [3]uint16 + +// NewVersion creates a version from a string like "Major.Minor.Patch". +// +// Patch is optional. +func NewVersion(ver string) (Version, error) { + var major, minor, patch uint16 + n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch) + if n < 2 { + return Version{}, fmt.Errorf("invalid version: %s", ver) + } + return Version{major, minor, patch}, nil +} + +// NewVersionFromCode creates a version from a LINUX_VERSION_CODE. +func NewVersionFromCode(code uint32) Version { + return Version{ + uint16(uint8(code >> 16)), + uint16(uint8(code >> 8)), + uint16(uint8(code)), + } +} + +func (v Version) String() string { + if v[2] == 0 { + return fmt.Sprintf("v%d.%d", v[0], v[1]) + } + return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2]) +} + +// Less returns true if the version is less than another version. +func (v Version) Less(other Version) bool { + for i, a := range v { + if a == other[i] { + continue + } + return a < other[i] + } + return false +} + +// Unspecified returns true if the version is all zero. +func (v Version) Unspecified() bool { + return v[0] == 0 && v[1] == 0 && v[2] == 0 +} + +// Kernel implements the kernel's KERNEL_VERSION macro from linux/version.h. +// It represents the kernel version and patch level as a single value. +func (v Version) Kernel() uint32 { + + // Kernels 4.4 and 4.9 have their SUBLEVEL clamped to 255 to avoid + // overflowing into PATCHLEVEL. + // See kernel commit 9b82f13e7ef3 ("kbuild: clamp SUBLEVEL to 255"). + s := v[2] + if s > 255 { + s = 255 + } + + // Truncate members to uint8 to prevent them from spilling over into + // each other when overflowing 8 bits. + return uint32(uint8(v[0]))<<16 | uint32(uint8(v[1]))<<8 | uint32(uint8(s)) +} + +// KernelVersion returns the version of the currently running kernel. +func KernelVersion() (Version, error) { + kernelVersion.once.Do(func() { + kernelVersion.version, kernelVersion.err = detectKernelVersion() + }) + + if kernelVersion.err != nil { + return Version{}, kernelVersion.err + } + return kernelVersion.version, nil +} + +// detectKernelVersion returns the version of the running kernel. +func detectKernelVersion() (Version, error) { + vc, err := vdsoVersion() + if err != nil { + return Version{}, err + } + return NewVersionFromCode(vc), nil +} + +// KernelRelease returns the release string of the running kernel. +// Its format depends on the Linux distribution and corresponds to directory +// names in /lib/modules by convention. Some examples are 5.15.17-1-lts and +// 4.19.0-16-amd64. +func KernelRelease() (string, error) { + var uname unix.Utsname + if err := unix.Uname(&uname); err != nil { + return "", fmt.Errorf("uname failed: %w", err) + } + + return unix.ByteSliceToString(uname.Release[:]), nil +} diff --git a/vendor/github.com/cilium/ebpf/link/cgroup.go b/vendor/github.com/cilium/ebpf/link/cgroup.go new file mode 100644 index 000000000..003b0638e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/cgroup.go @@ -0,0 +1,165 @@ +package link + +import ( + "errors" + "fmt" + "os" + + "github.com/cilium/ebpf" +) + +type cgroupAttachFlags uint32 + +// cgroup attach flags +const ( + flagAllowOverride cgroupAttachFlags = 1 << iota + flagAllowMulti + flagReplace +) + +type CgroupOptions struct { + // Path to a cgroupv2 folder. + Path string + // One of the AttachCgroup* constants + Attach ebpf.AttachType + // Program must be of type CGroup*, and the attach type must match Attach. + Program *ebpf.Program +} + +// AttachCgroup links a BPF program to a cgroup. +func AttachCgroup(opts CgroupOptions) (Link, error) { + cgroup, err := os.Open(opts.Path) + if err != nil { + return nil, fmt.Errorf("can't open cgroup: %s", err) + } + + clone, err := opts.Program.Clone() + if err != nil { + cgroup.Close() + return nil, err + } + + var cg Link + cg, err = newLinkCgroup(cgroup, opts.Attach, clone) + if errors.Is(err, ErrNotSupported) { + cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowMulti) + } + if errors.Is(err, ErrNotSupported) { + cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowOverride) + } + if err != nil { + cgroup.Close() + clone.Close() + return nil, err + } + + return cg, nil +} + +type progAttachCgroup struct { + cgroup *os.File + current *ebpf.Program + attachType ebpf.AttachType + flags cgroupAttachFlags +} + +var _ Link = (*progAttachCgroup)(nil) + +func (cg *progAttachCgroup) isLink() {} + +func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) { + if flags&flagAllowMulti > 0 { + if err := haveProgAttachReplace(); err != nil { + return nil, fmt.Errorf("can't support multiple programs: %w", err) + } + } + + err := RawAttachProgram(RawAttachProgramOptions{ + Target: int(cgroup.Fd()), + Program: prog, + Flags: uint32(flags), + Attach: attach, + }) + if err != nil { + return nil, fmt.Errorf("cgroup: %w", err) + } + + return &progAttachCgroup{cgroup, prog, attach, flags}, nil +} + +func (cg *progAttachCgroup) Close() error { + defer cg.cgroup.Close() + defer cg.current.Close() + + err := RawDetachProgram(RawDetachProgramOptions{ + Target: int(cg.cgroup.Fd()), + Program: cg.current, + Attach: cg.attachType, + }) + if err != nil { + return fmt.Errorf("close cgroup: %s", err) + } + return nil +} + +func (cg *progAttachCgroup) Update(prog *ebpf.Program) error { + new, err := prog.Clone() + if err != nil { + return err + } + + args := RawAttachProgramOptions{ + Target: int(cg.cgroup.Fd()), + Program: prog, + Attach: cg.attachType, + Flags: uint32(cg.flags), + } + + if cg.flags&flagAllowMulti > 0 { + // Atomically replacing multiple programs requires at least + // 5.5 (commit 7dd68b3279f17921 "bpf: Support replacing cgroup-bpf + // program in MULTI mode") + args.Flags |= uint32(flagReplace) + args.Replace = cg.current + } + + if err := RawAttachProgram(args); err != nil { + new.Close() + return fmt.Errorf("can't update cgroup: %s", err) + } + + cg.current.Close() + cg.current = new + return nil +} + +func (cg *progAttachCgroup) Pin(string) error { + return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported) +} + +func (cg *progAttachCgroup) Unpin() error { + return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported) +} + +func (cg *progAttachCgroup) Info() (*Info, error) { + return nil, fmt.Errorf("can't get cgroup info: %w", ErrNotSupported) +} + +type linkCgroup struct { + RawLink +} + +var _ Link = (*linkCgroup)(nil) + +func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) { + link, err := AttachRawLink(RawLinkOptions{ + Target: int(cgroup.Fd()), + Program: prog, + Attach: attach, + }) + if err != nil { + return nil, err + } + + return &linkCgroup{*link}, err +} diff --git a/vendor/github.com/cilium/ebpf/link/doc.go b/vendor/github.com/cilium/ebpf/link/doc.go new file mode 100644 index 000000000..2bde35ed7 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/doc.go @@ -0,0 +1,2 @@ +// Package link allows attaching eBPF programs to various kernel hooks. +package link diff --git a/vendor/github.com/cilium/ebpf/link/iter.go b/vendor/github.com/cilium/ebpf/link/iter.go new file mode 100644 index 000000000..d2b32ef33 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/iter.go @@ -0,0 +1,85 @@ +package link + +import ( + "fmt" + "io" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +type IterOptions struct { + // Program must be of type Tracing with attach type + // AttachTraceIter. The kind of iterator to attach to is + // determined at load time via the AttachTo field. + // + // AttachTo requires the kernel to include BTF of itself, + // and it to be compiled with a recent pahole (>= 1.16). + Program *ebpf.Program + + // Map specifies the target map for bpf_map_elem and sockmap iterators. + // It may be nil. + Map *ebpf.Map +} + +// AttachIter attaches a BPF seq_file iterator. +func AttachIter(opts IterOptions) (*Iter, error) { + if err := haveBPFLink(); err != nil { + return nil, err + } + + progFd := opts.Program.FD() + if progFd < 0 { + return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) + } + + var info bpfIterLinkInfoMap + if opts.Map != nil { + mapFd := opts.Map.FD() + if mapFd < 0 { + return nil, fmt.Errorf("invalid map: %w", sys.ErrClosedFd) + } + info.map_fd = uint32(mapFd) + } + + attr := sys.LinkCreateIterAttr{ + ProgFd: uint32(progFd), + AttachType: sys.AttachType(ebpf.AttachTraceIter), + IterInfo: sys.NewPointer(unsafe.Pointer(&info)), + IterInfoLen: uint32(unsafe.Sizeof(info)), + } + + fd, err := sys.LinkCreateIter(&attr) + if err != nil { + return nil, fmt.Errorf("can't link iterator: %w", err) + } + + return &Iter{RawLink{fd, ""}}, err +} + +// Iter represents an attached bpf_iter. +type Iter struct { + RawLink +} + +// Open creates a new instance of the iterator. +// +// Reading from the returned reader triggers the BPF program. +func (it *Iter) Open() (io.ReadCloser, error) { + attr := &sys.IterCreateAttr{ + LinkFd: it.fd.Uint(), + } + + fd, err := sys.IterCreate(attr) + if err != nil { + return nil, fmt.Errorf("can't create iterator: %w", err) + } + + return fd.File("bpf_iter"), nil +} + +// union bpf_iter_link_info.map +type bpfIterLinkInfoMap struct { + map_fd uint32 +} diff --git a/vendor/github.com/cilium/ebpf/link/kprobe.go b/vendor/github.com/cilium/ebpf/link/kprobe.go new file mode 100644 index 000000000..fdf622a0c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/kprobe.go @@ -0,0 +1,568 @@ +package link + +import ( + "bytes" + "crypto/rand" + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "syscall" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +var ( + kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events") + + kprobeRetprobeBit = struct { + once sync.Once + value uint64 + err error + }{} +) + +type probeType uint8 + +type probeArgs struct { + symbol, group, path string + offset, refCtrOffset, cookie uint64 + pid int + ret bool +} + +// KprobeOptions defines additional parameters that will be used +// when loading Kprobes. +type KprobeOptions struct { + // Arbitrary value that can be fetched from an eBPF program + // via `bpf_get_attach_cookie()`. + // + // Needs kernel 5.15+. + Cookie uint64 + // Offset of the kprobe relative to the traced symbol. + // Can be used to insert kprobes at arbitrary offsets in kernel functions, + // e.g. in places where functions have been inlined. + Offset uint64 +} + +const ( + kprobeType probeType = iota + uprobeType +) + +func (pt probeType) String() string { + if pt == kprobeType { + return "kprobe" + } + return "uprobe" +} + +func (pt probeType) EventsPath() string { + if pt == kprobeType { + return kprobeEventsPath + } + return uprobeEventsPath +} + +func (pt probeType) PerfEventType(ret bool) perfEventType { + if pt == kprobeType { + if ret { + return kretprobeEvent + } + return kprobeEvent + } + if ret { + return uretprobeEvent + } + return uprobeEvent +} + +func (pt probeType) RetprobeBit() (uint64, error) { + if pt == kprobeType { + return kretprobeBit() + } + return uretprobeBit() +} + +// Kprobe attaches the given eBPF program to a perf event that fires when the +// given kernel symbol starts executing. See /proc/kallsyms for available +// symbols. For example, printk(): +// +// kp, err := Kprobe("printk", prog, nil) +// +// Losing the reference to the resulting Link (kp) will close the Kprobe +// and prevent further execution of prog. The Link must be Closed during +// program shutdown to avoid leaking system resources. +func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { + k, err := kprobe(symbol, prog, opts, false) + if err != nil { + return nil, err + } + + lnk, err := attachPerfEvent(k, prog) + if err != nil { + k.Close() + return nil, err + } + + return lnk, nil +} + +// Kretprobe attaches the given eBPF program to a perf event that fires right +// before the given kernel symbol exits, with the function stack left intact. +// See /proc/kallsyms for available symbols. For example, printk(): +// +// kp, err := Kretprobe("printk", prog, nil) +// +// Losing the reference to the resulting Link (kp) will close the Kretprobe +// and prevent further execution of prog. The Link must be Closed during +// program shutdown to avoid leaking system resources. +func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { + k, err := kprobe(symbol, prog, opts, true) + if err != nil { + return nil, err + } + + lnk, err := attachPerfEvent(k, prog) + if err != nil { + k.Close() + return nil, err + } + + return lnk, nil +} + +// isValidKprobeSymbol implements the equivalent of a regex match +// against "^[a-zA-Z_][0-9a-zA-Z_.]*$". +func isValidKprobeSymbol(s string) bool { + if len(s) < 1 { + return false + } + + for i, c := range []byte(s) { + switch { + case c >= 'a' && c <= 'z': + case c >= 'A' && c <= 'Z': + case c == '_': + case i > 0 && c >= '0' && c <= '9': + + // Allow `.` in symbol name. GCC-compiled kernel may change symbol name + // to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`. + // See: https://gcc.gnu.org/gcc-10/changes.html + case i > 0 && c == '.': + + default: + return false + } + } + + return true +} + +// kprobe opens a perf event on the given symbol and attaches prog to it. +// If ret is true, create a kretprobe. +func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) { + if symbol == "" { + return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput) + } + if prog == nil { + return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) + } + if !isValidKprobeSymbol(symbol) { + return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput) + } + if prog.Type() != ebpf.Kprobe { + return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) + } + + args := probeArgs{ + pid: perfAllThreads, + symbol: symbol, + ret: ret, + } + + if opts != nil { + args.cookie = opts.Cookie + args.offset = opts.Offset + } + + // Use kprobe PMU if the kernel has it available. + tp, err := pmuKprobe(args) + if errors.Is(err, os.ErrNotExist) { + args.symbol = platformPrefix(symbol) + tp, err = pmuKprobe(args) + } + if err == nil { + return tp, nil + } + if err != nil && !errors.Is(err, ErrNotSupported) { + return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err) + } + + // Use tracefs if kprobe PMU is missing. + args.symbol = symbol + tp, err = tracefsKprobe(args) + if errors.Is(err, os.ErrNotExist) { + args.symbol = platformPrefix(symbol) + tp, err = tracefsKprobe(args) + } + if err != nil { + return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err) + } + + return tp, nil +} + +// pmuKprobe opens a perf event based on the kprobe PMU. +// Returns os.ErrNotExist if the given symbol does not exist in the kernel. +func pmuKprobe(args probeArgs) (*perfEvent, error) { + return pmuProbe(kprobeType, args) +} + +// pmuProbe opens a perf event based on a Performance Monitoring Unit. +// +// Requires at least a 4.17 kernel. +// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU" +// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" +// +// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU +func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) { + // Getting the PMU type will fail if the kernel doesn't support + // the perf_[k,u]probe PMU. + et, err := getPMUEventType(typ) + if err != nil { + return nil, err + } + + var config uint64 + if args.ret { + bit, err := typ.RetprobeBit() + if err != nil { + return nil, err + } + config |= 1 << bit + } + + var ( + attr unix.PerfEventAttr + sp unsafe.Pointer + ) + switch typ { + case kprobeType: + // Create a pointer to a NUL-terminated string for the kernel. + sp, err = unsafeStringPtr(args.symbol) + if err != nil { + return nil, err + } + + attr = unix.PerfEventAttr{ + // The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1, + // since it added the config2 (Ext2) field. Use Ext2 as probe_offset. + Size: unix.PERF_ATTR_SIZE_VER1, + Type: uint32(et), // PMU event type read from sysfs + Ext1: uint64(uintptr(sp)), // Kernel symbol to trace + Ext2: args.offset, // Kernel symbol offset + Config: config, // Retprobe flag + } + case uprobeType: + sp, err = unsafeStringPtr(args.path) + if err != nil { + return nil, err + } + + if args.refCtrOffset != 0 { + config |= args.refCtrOffset << uprobeRefCtrOffsetShift + } + + attr = unix.PerfEventAttr{ + // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, + // since it added the config2 (Ext2) field. The Size field controls the + // size of the internal buffer the kernel allocates for reading the + // perf_event_attr argument from userspace. + Size: unix.PERF_ATTR_SIZE_VER1, + Type: uint32(et), // PMU event type read from sysfs + Ext1: uint64(uintptr(sp)), // Uprobe path + Ext2: args.offset, // Uprobe offset + Config: config, // RefCtrOffset, Retprobe flag + } + } + + rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) + + // On some old kernels, kprobe PMU doesn't allow `.` in symbol names and + // return -EINVAL. Return ErrNotSupported to allow falling back to tracefs. + // https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343 + if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") { + return nil, fmt.Errorf("symbol '%s+%#x': older kernels don't accept dots: %w", args.symbol, args.offset, ErrNotSupported) + } + // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL + // when trying to create a kretprobe for a missing symbol. Make sure ENOENT + // is returned to the caller. + if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { + return nil, fmt.Errorf("symbol '%s+%#x' not found: %w", args.symbol, args.offset, os.ErrNotExist) + } + // Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved + // to an invalid insn boundary. + if errors.Is(err, syscall.EILSEQ) { + return nil, fmt.Errorf("symbol '%s+%#x' not found (bad insn boundary): %w", args.symbol, args.offset, os.ErrNotExist) + } + // Since at least commit cb9a19fe4aa51, ENOTSUPP is returned + // when attempting to set a uprobe on a trap instruction. + if errors.Is(err, unix.ENOTSUPP) { + return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", args.offset, err) + } + if err != nil { + return nil, fmt.Errorf("opening perf event: %w", err) + } + + // Ensure the string pointer is not collected before PerfEventOpen returns. + runtime.KeepAlive(sp) + + fd, err := sys.NewFD(rawFd) + if err != nil { + return nil, err + } + + // Kernel has perf_[k,u]probe PMU available, initialize perf event. + return &perfEvent{ + typ: typ.PerfEventType(args.ret), + name: args.symbol, + pmuID: et, + cookie: args.cookie, + fd: fd, + }, nil +} + +// tracefsKprobe creates a Kprobe tracefs entry. +func tracefsKprobe(args probeArgs) (*perfEvent, error) { + return tracefsProbe(kprobeType, args) +} + +// tracefsProbe creates a trace event by writing an entry to /[k,u]probe_events. +// A new trace event group name is generated on every call to support creating +// multiple trace events for the same kernel or userspace symbol. +// Path and offset are only set in the case of uprobe(s) and are used to set +// the executable/library path on the filesystem and the offset where the probe is inserted. +// A perf event is then opened on the newly-created trace event and returned to the caller. +func tracefsProbe(typ probeType, args probeArgs) (_ *perfEvent, err error) { + // Generate a random string for each trace event we attempt to create. + // This value is used as the 'group' token in tracefs to allow creating + // multiple kprobe trace events with the same name. + group, err := randomGroup("ebpf") + if err != nil { + return nil, fmt.Errorf("randomizing group name: %w", err) + } + args.group = group + + // Before attempting to create a trace event through tracefs, + // check if an event with the same group and name already exists. + // Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate + // entry, so we need to rely on reads for detecting uniqueness. + _, err = getTraceEventID(group, args.symbol) + if err == nil { + return nil, fmt.Errorf("trace event already exists: %s/%s", group, args.symbol) + } + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("checking trace event %s/%s: %w", group, args.symbol, err) + } + + // Create the [k,u]probe trace event using tracefs. + if err := createTraceFSProbeEvent(typ, args); err != nil { + return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) + } + defer func() { + if err != nil { + // Make sure we clean up the created tracefs event when we return error. + // If a livepatch handler is already active on the symbol, the write to + // tracefs will succeed, a trace event will show up, but creating the + // perf event will fail with EBUSY. + _ = closeTraceFSProbeEvent(typ, args.group, args.symbol) + } + }() + + // Get the newly-created trace event's id. + tid, err := getTraceEventID(group, args.symbol) + if err != nil { + return nil, fmt.Errorf("getting trace event id: %w", err) + } + + // Kprobes are ephemeral tracepoints and share the same perf event type. + fd, err := openTracepointPerfEvent(tid, args.pid) + if err != nil { + return nil, err + } + + return &perfEvent{ + typ: typ.PerfEventType(args.ret), + group: group, + name: args.symbol, + tracefsID: tid, + cookie: args.cookie, + fd: fd, + }, nil +} + +// createTraceFSProbeEvent creates a new ephemeral trace event by writing to +// /[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid +// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist +// if a probe with the same group and symbol already exists. +func createTraceFSProbeEvent(typ probeType, args probeArgs) error { + // Open the kprobe_events file in tracefs. + f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) + if err != nil { + return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err) + } + defer f.Close() + + var pe, token string + switch typ { + case kprobeType: + // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): + // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe + // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe + // -:[GRP/]EVENT : Clear a probe + // + // Some examples: + // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy + // p:ebpf_5678/p_my_kprobe __x64_sys_execve + // + // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the + // kernel default to NR_CPUS. This is desired in most eBPF cases since + // subsampling or rate limiting logic can be more accurately implemented in + // the eBPF program itself. + // See Documentation/kprobes.txt for more details. + token = kprobeToken(args) + pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, sanitizeSymbol(args.symbol), token) + case uprobeType: + // The uprobe_events syntax is as follows: + // p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe + // r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe + // -:[GRP/]EVENT : Clear a probe + // + // Some examples: + // r:ebpf_1234/readline /bin/bash:0x12345 + // p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123) + // + // See Documentation/trace/uprobetracer.txt for more details. + token = uprobeToken(args) + pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, args.symbol, token) + } + _, err = f.WriteString(pe) + // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL + // when trying to create a kretprobe for a missing symbol. Make sure ENOENT + // is returned to the caller. + // EINVAL is also returned on pre-5.2 kernels when the `SYM[+offs]` token + // is resolved to an invalid insn boundary. + if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { + return fmt.Errorf("token %s: %w", token, os.ErrNotExist) + } + // Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved + // to an invalid insn boundary. + if errors.Is(err, syscall.EILSEQ) { + return fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) + } + // ERANGE is returned when the `SYM[+offs]` token is too big and cannot + // be resolved. + if errors.Is(err, syscall.ERANGE) { + return fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist) + } + if err != nil { + return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) + } + + return nil +} + +// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol +// from /[k,u]probe_events. +func closeTraceFSProbeEvent(typ probeType, group, symbol string) error { + f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) + if err != nil { + return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err) + } + defer f.Close() + + // See [k,u]probe_events syntax above. The probe type does not need to be specified + // for removals. + pe := fmt.Sprintf("-:%s/%s", group, sanitizeSymbol(symbol)) + if _, err = f.WriteString(pe); err != nil { + return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) + } + + return nil +} + +// randomGroup generates a pseudorandom string for use as a tracefs group name. +// Returns an error when the output string would exceed 63 characters (kernel +// limitation), when rand.Read() fails or when prefix contains characters not +// allowed by isValidTraceID. +func randomGroup(prefix string) (string, error) { + if !isValidTraceID(prefix) { + return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput) + } + + b := make([]byte, 8) + if _, err := rand.Read(b); err != nil { + return "", fmt.Errorf("reading random bytes: %w", err) + } + + group := fmt.Sprintf("%s_%x", prefix, b) + if len(group) > 63 { + return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput) + } + + return group, nil +} + +func probePrefix(ret bool) string { + if ret { + return "r" + } + return "p" +} + +// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit +// from /sys/bus/event_source/devices//format/retprobe. +func determineRetprobeBit(typ probeType) (uint64, error) { + p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe") + + data, err := os.ReadFile(p) + if err != nil { + return 0, err + } + + var rp uint64 + n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp) + if err != nil { + return 0, fmt.Errorf("parse retprobe bit: %w", err) + } + if n != 1 { + return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n) + } + + return rp, nil +} + +func kretprobeBit() (uint64, error) { + kprobeRetprobeBit.once.Do(func() { + kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType) + }) + return kprobeRetprobeBit.value, kprobeRetprobeBit.err +} + +// kprobeToken creates the SYM[+offs] token for the tracefs api. +func kprobeToken(args probeArgs) string { + po := args.symbol + + if args.offset != 0 { + po += fmt.Sprintf("+%#x", args.offset) + } + + return po +} diff --git a/vendor/github.com/cilium/ebpf/link/link.go b/vendor/github.com/cilium/ebpf/link/link.go new file mode 100644 index 000000000..067d0101a --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/link.go @@ -0,0 +1,313 @@ +package link + +import ( + "bytes" + "encoding/binary" + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" +) + +var ErrNotSupported = internal.ErrNotSupported + +// Link represents a Program attached to a BPF hook. +type Link interface { + // Replace the current program with a new program. + // + // Passing a nil program is an error. May return an error wrapping ErrNotSupported. + Update(*ebpf.Program) error + + // Persist a link by pinning it into a bpffs. + // + // May return an error wrapping ErrNotSupported. + Pin(string) error + + // Undo a previous call to Pin. + // + // May return an error wrapping ErrNotSupported. + Unpin() error + + // Close frees resources. + // + // The link will be broken unless it has been successfully pinned. + // A link may continue past the lifetime of the process if Close is + // not called. + Close() error + + // Info returns metadata on a link. + // + // May return an error wrapping ErrNotSupported. + Info() (*Info, error) + + // Prevent external users from implementing this interface. + isLink() +} + +// LoadPinnedLink loads a link that was persisted into a bpffs. +func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) { + raw, err := loadPinnedRawLink(fileName, opts) + if err != nil { + return nil, err + } + + return wrapRawLink(raw) +} + +// wrap a RawLink in a more specific type if possible. +// +// The function takes ownership of raw and closes it on error. +func wrapRawLink(raw *RawLink) (Link, error) { + info, err := raw.Info() + if err != nil { + raw.Close() + return nil, err + } + + switch info.Type { + case RawTracepointType: + return &rawTracepoint{*raw}, nil + case TracingType: + return &tracing{*raw}, nil + case CgroupType: + return &linkCgroup{*raw}, nil + case IterType: + return &Iter{*raw}, nil + case NetNsType: + return &NetNsLink{*raw}, nil + default: + return raw, nil + } +} + +// ID uniquely identifies a BPF link. +type ID = sys.LinkID + +// RawLinkOptions control the creation of a raw link. +type RawLinkOptions struct { + // File descriptor to attach to. This differs for each attach type. + Target int + // Program to attach. + Program *ebpf.Program + // Attach must match the attach type of Program. + Attach ebpf.AttachType + // BTF is the BTF of the attachment target. + BTF btf.TypeID + // Flags control the attach behaviour. + Flags uint32 +} + +// Info contains metadata on a link. +type Info struct { + Type Type + ID ID + Program ebpf.ProgramID + extra interface{} +} + +type TracingInfo sys.TracingLinkInfo +type CgroupInfo sys.CgroupLinkInfo +type NetNsInfo sys.NetNsLinkInfo +type XDPInfo sys.XDPLinkInfo + +// Tracing returns tracing type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) Tracing() *TracingInfo { + e, _ := r.extra.(*TracingInfo) + return e +} + +// Cgroup returns cgroup type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) Cgroup() *CgroupInfo { + e, _ := r.extra.(*CgroupInfo) + return e +} + +// NetNs returns netns type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) NetNs() *NetNsInfo { + e, _ := r.extra.(*NetNsInfo) + return e +} + +// ExtraNetNs returns XDP type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) XDP() *XDPInfo { + e, _ := r.extra.(*XDPInfo) + return e +} + +// RawLink is the low-level API to bpf_link. +// +// You should consider using the higher level interfaces in this +// package instead. +type RawLink struct { + fd *sys.FD + pinnedPath string +} + +// AttachRawLink creates a raw link. +func AttachRawLink(opts RawLinkOptions) (*RawLink, error) { + if err := haveBPFLink(); err != nil { + return nil, err + } + + if opts.Target < 0 { + return nil, fmt.Errorf("invalid target: %s", sys.ErrClosedFd) + } + + progFd := opts.Program.FD() + if progFd < 0 { + return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) + } + + attr := sys.LinkCreateAttr{ + TargetFd: uint32(opts.Target), + ProgFd: uint32(progFd), + AttachType: sys.AttachType(opts.Attach), + TargetBtfId: uint32(opts.BTF), + Flags: opts.Flags, + } + fd, err := sys.LinkCreate(&attr) + if err != nil { + return nil, fmt.Errorf("can't create link: %s", err) + } + + return &RawLink{fd, ""}, nil +} + +func loadPinnedRawLink(fileName string, opts *ebpf.LoadPinOptions) (*RawLink, error) { + fd, err := sys.ObjGet(&sys.ObjGetAttr{ + Pathname: sys.NewStringPointer(fileName), + FileFlags: opts.Marshal(), + }) + if err != nil { + return nil, fmt.Errorf("load pinned link: %w", err) + } + + return &RawLink{fd, fileName}, nil +} + +func (l *RawLink) isLink() {} + +// FD returns the raw file descriptor. +func (l *RawLink) FD() int { + return l.fd.Int() +} + +// Close breaks the link. +// +// Use Pin if you want to make the link persistent. +func (l *RawLink) Close() error { + return l.fd.Close() +} + +// Pin persists a link past the lifetime of the process. +// +// Calling Close on a pinned Link will not break the link +// until the pin is removed. +func (l *RawLink) Pin(fileName string) error { + if err := internal.Pin(l.pinnedPath, fileName, l.fd); err != nil { + return err + } + l.pinnedPath = fileName + return nil +} + +// Unpin implements the Link interface. +func (l *RawLink) Unpin() error { + if err := internal.Unpin(l.pinnedPath); err != nil { + return err + } + l.pinnedPath = "" + return nil +} + +// Update implements the Link interface. +func (l *RawLink) Update(new *ebpf.Program) error { + return l.UpdateArgs(RawLinkUpdateOptions{ + New: new, + }) +} + +// RawLinkUpdateOptions control the behaviour of RawLink.UpdateArgs. +type RawLinkUpdateOptions struct { + New *ebpf.Program + Old *ebpf.Program + Flags uint32 +} + +// UpdateArgs updates a link based on args. +func (l *RawLink) UpdateArgs(opts RawLinkUpdateOptions) error { + newFd := opts.New.FD() + if newFd < 0 { + return fmt.Errorf("invalid program: %s", sys.ErrClosedFd) + } + + var oldFd int + if opts.Old != nil { + oldFd = opts.Old.FD() + if oldFd < 0 { + return fmt.Errorf("invalid replacement program: %s", sys.ErrClosedFd) + } + } + + attr := sys.LinkUpdateAttr{ + LinkFd: l.fd.Uint(), + NewProgFd: uint32(newFd), + OldProgFd: uint32(oldFd), + Flags: opts.Flags, + } + return sys.LinkUpdate(&attr) +} + +// Info returns metadata about the link. +func (l *RawLink) Info() (*Info, error) { + var info sys.LinkInfo + + if err := sys.ObjInfo(l.fd, &info); err != nil { + return nil, fmt.Errorf("link info: %s", err) + } + + var extra interface{} + switch info.Type { + case CgroupType: + extra = &CgroupInfo{} + case IterType: + // not supported + case NetNsType: + extra = &NetNsInfo{} + case RawTracepointType: + // not supported + case TracingType: + extra = &TracingInfo{} + case XDPType: + extra = &XDPInfo{} + case PerfEventType: + // no extra + default: + return nil, fmt.Errorf("unknown link info type: %d", info.Type) + } + + if info.Type != RawTracepointType && info.Type != IterType && info.Type != PerfEventType { + buf := bytes.NewReader(info.Extra[:]) + err := binary.Read(buf, internal.NativeEndian, extra) + if err != nil { + return nil, fmt.Errorf("can not read extra link info: %w", err) + } + } + + return &Info{ + info.Type, + info.Id, + ebpf.ProgramID(info.ProgId), + extra, + }, nil +} diff --git a/vendor/github.com/cilium/ebpf/link/netns.go b/vendor/github.com/cilium/ebpf/link/netns.go new file mode 100644 index 000000000..344ecced6 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/netns.go @@ -0,0 +1,36 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" +) + +// NetNsLink is a program attached to a network namespace. +type NetNsLink struct { + RawLink +} + +// AttachNetNs attaches a program to a network namespace. +func AttachNetNs(ns int, prog *ebpf.Program) (*NetNsLink, error) { + var attach ebpf.AttachType + switch t := prog.Type(); t { + case ebpf.FlowDissector: + attach = ebpf.AttachFlowDissector + case ebpf.SkLookup: + attach = ebpf.AttachSkLookup + default: + return nil, fmt.Errorf("can't attach %v to network namespace", t) + } + + link, err := AttachRawLink(RawLinkOptions{ + Target: ns, + Program: prog, + Attach: attach, + }) + if err != nil { + return nil, err + } + + return &NetNsLink{*link}, nil +} diff --git a/vendor/github.com/cilium/ebpf/link/perf_event.go b/vendor/github.com/cilium/ebpf/link/perf_event.go new file mode 100644 index 000000000..0e5bd4791 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/perf_event.go @@ -0,0 +1,394 @@ +package link + +import ( + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// Getting the terminology right is usually the hardest part. For posterity and +// for staying sane during implementation: +// +// - trace event: Representation of a kernel runtime hook. Filesystem entries +// under /events. Can be tracepoints (static), kprobes or uprobes. +// Can be instantiated into perf events (see below). +// - tracepoint: A predetermined hook point in the kernel. Exposed as trace +// events in (sub)directories under /events. Cannot be closed or +// removed, they are static. +// - k(ret)probe: Ephemeral trace events based on entry or exit points of +// exported kernel symbols. kprobe-based (tracefs) trace events can be +// created system-wide by writing to the /kprobe_events file, or +// they can be scoped to the current process by creating PMU perf events. +// - u(ret)probe: Ephemeral trace events based on user provides ELF binaries +// and offsets. uprobe-based (tracefs) trace events can be +// created system-wide by writing to the /uprobe_events file, or +// they can be scoped to the current process by creating PMU perf events. +// - perf event: An object instantiated based on an existing trace event or +// kernel symbol. Referred to by fd in userspace. +// Exactly one eBPF program can be attached to a perf event. Multiple perf +// events can be created from a single trace event. Closing a perf event +// stops any further invocations of the attached eBPF program. + +var ( + tracefsPath = "/sys/kernel/debug/tracing" + + errInvalidInput = errors.New("invalid input") +) + +const ( + perfAllThreads = -1 +) + +type perfEventType uint8 + +const ( + tracepointEvent perfEventType = iota + kprobeEvent + kretprobeEvent + uprobeEvent + uretprobeEvent +) + +// A perfEvent represents a perf event kernel object. Exactly one eBPF program +// can be attached to it. It is created based on a tracefs trace event or a +// Performance Monitoring Unit (PMU). +type perfEvent struct { + // The event type determines the types of programs that can be attached. + typ perfEventType + + // Group and name of the tracepoint/kprobe/uprobe. + group string + name string + + // PMU event ID read from sysfs. Valid IDs are non-zero. + pmuID uint64 + // ID of the trace event read from tracefs. Valid IDs are non-zero. + tracefsID uint64 + + // User provided arbitrary value. + cookie uint64 + + // This is the perf event FD. + fd *sys.FD +} + +func (pe *perfEvent) Close() error { + if err := pe.fd.Close(); err != nil { + return fmt.Errorf("closing perf event fd: %w", err) + } + + switch pe.typ { + case kprobeEvent, kretprobeEvent: + // Clean up kprobe tracefs entry. + if pe.tracefsID != 0 { + return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name) + } + case uprobeEvent, uretprobeEvent: + // Clean up uprobe tracefs entry. + if pe.tracefsID != 0 { + return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name) + } + case tracepointEvent: + // Tracepoint trace events don't hold any extra resources. + return nil + } + + return nil +} + +// perfEventLink represents a bpf perf link. +type perfEventLink struct { + RawLink + pe *perfEvent +} + +func (pl *perfEventLink) isLink() {} + +// Pinning requires the underlying perf event FD to stay open. +// +// | PerfEvent FD | BpfLink FD | Works | +// |--------------|------------|-------| +// | Open | Open | Yes | +// | Closed | Open | No | +// | Open | Closed | No (Pin() -> EINVAL) | +// | Closed | Closed | No (Pin() -> EINVAL) | +// +// There is currently no pretty way to recover the perf event FD +// when loading a pinned link, so leave as not supported for now. +func (pl *perfEventLink) Pin(string) error { + return fmt.Errorf("perf event link pin: %w", ErrNotSupported) +} + +func (pl *perfEventLink) Unpin() error { + return fmt.Errorf("perf event link unpin: %w", ErrNotSupported) +} + +func (pl *perfEventLink) Close() error { + if err := pl.pe.Close(); err != nil { + return fmt.Errorf("perf event link close: %w", err) + } + return pl.fd.Close() +} + +func (pl *perfEventLink) Update(prog *ebpf.Program) error { + return fmt.Errorf("perf event link update: %w", ErrNotSupported) +} + +// perfEventIoctl implements Link and handles the perf event lifecycle +// via ioctl(). +type perfEventIoctl struct { + *perfEvent +} + +func (pi *perfEventIoctl) isLink() {} + +// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"), +// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array +// owned by the perf event, which means multiple programs can be attached +// simultaneously. +// +// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event +// returns EEXIST. +// +// Detaching a program from a perf event is currently not possible, so a +// program replacement mechanism cannot be implemented for perf events. +func (pi *perfEventIoctl) Update(prog *ebpf.Program) error { + return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported) +} + +func (pi *perfEventIoctl) Pin(string) error { + return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported) +} + +func (pi *perfEventIoctl) Unpin() error { + return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported) +} + +func (pi *perfEventIoctl) Info() (*Info, error) { + return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported) +} + +// attach the given eBPF prog to the perf event stored in pe. +// pe must contain a valid perf event fd. +// prog's type must match the program type stored in pe. +func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) { + if prog == nil { + return nil, errors.New("cannot attach a nil program") + } + if prog.FD() < 0 { + return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) + } + + switch pe.typ { + case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent: + if t := prog.Type(); t != ebpf.Kprobe { + return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t) + } + case tracepointEvent: + if t := prog.Type(); t != ebpf.TracePoint { + return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t) + } + default: + return nil, fmt.Errorf("unknown perf event type: %d", pe.typ) + } + + if err := haveBPFLinkPerfEvent(); err == nil { + return attachPerfEventLink(pe, prog) + } + return attachPerfEventIoctl(pe, prog) +} + +func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) { + if pe.cookie != 0 { + return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported) + } + + // Assign the eBPF program to the perf event. + err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD()) + if err != nil { + return nil, fmt.Errorf("setting perf event bpf program: %w", err) + } + + // PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values. + if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil { + return nil, fmt.Errorf("enable perf event: %s", err) + } + + pi := &perfEventIoctl{pe} + + // Close the perf event when its reference is lost to avoid leaking system resources. + runtime.SetFinalizer(pi, (*perfEventIoctl).Close) + return pi, nil +} + +// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+). +// +// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e +func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) { + fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ + ProgFd: uint32(prog.FD()), + TargetFd: pe.fd.Uint(), + AttachType: sys.BPF_PERF_EVENT, + BpfCookie: pe.cookie, + }) + if err != nil { + return nil, fmt.Errorf("cannot create bpf perf link: %v", err) + } + + pl := &perfEventLink{RawLink{fd: fd}, pe} + + // Close the perf event when its reference is lost to avoid leaking system resources. + runtime.SetFinalizer(pl, (*perfEventLink).Close) + return pl, nil +} + +// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str. +func unsafeStringPtr(str string) (unsafe.Pointer, error) { + p, err := unix.BytePtrFromString(str) + if err != nil { + return nil, err + } + return unsafe.Pointer(p), nil +} + +// getTraceEventID reads a trace event's ID from tracefs given its group and name. +// The kernel requires group and name to be alphanumeric or underscore. +// +// name automatically has its invalid symbols converted to underscores so the caller +// can pass a raw symbol name, e.g. a kernel symbol containing dots. +func getTraceEventID(group, name string) (uint64, error) { + name = sanitizeSymbol(name) + tid, err := uint64FromFile(tracefsPath, "events", group, name, "id") + if errors.Is(err, os.ErrNotExist) { + return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist) + } + if err != nil { + return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err) + } + + return tid, nil +} + +// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier) +// from /sys/bus/event_source/devices//type. +// +// Returns ErrNotSupported if the pmu type is not supported. +func getPMUEventType(typ probeType) (uint64, error) { + et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type") + if errors.Is(err, os.ErrNotExist) { + return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported) + } + if err != nil { + return 0, fmt.Errorf("reading pmu type %s: %w", typ, err) + } + + return et, nil +} + +// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide +// [k,u]probes created by writing to /[k,u]probe_events are tracepoints +// behind the scenes, and can be attached to using these perf events. +func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) { + attr := unix.PerfEventAttr{ + Type: unix.PERF_TYPE_TRACEPOINT, + Config: tid, + Sample_type: unix.PERF_SAMPLE_RAW, + Sample: 1, + Wakeup: 1, + } + + fd, err := unix.PerfEventOpen(&attr, pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) + if err != nil { + return nil, fmt.Errorf("opening tracepoint perf event: %w", err) + } + + return sys.NewFD(fd) +} + +// uint64FromFile reads a uint64 from a file. All elements of path are sanitized +// and joined onto base. Returns error if base no longer prefixes the path after +// joining all components. +func uint64FromFile(base string, path ...string) (uint64, error) { + l := filepath.Join(path...) + p := filepath.Join(base, l) + if !strings.HasPrefix(p, base) { + return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput) + } + + data, err := os.ReadFile(p) + if err != nil { + return 0, fmt.Errorf("reading file %s: %w", p, err) + } + + et := bytes.TrimSpace(data) + return strconv.ParseUint(string(et), 10, 64) +} + +// Probe BPF perf link. +// +// https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307 +// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e +var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error { + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Name: "probe_bpf_perf_link", + Type: ebpf.Kprobe, + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + License: "MIT", + }) + if err != nil { + return err + } + defer prog.Close() + + _, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ + ProgFd: uint32(prog.FD()), + AttachType: sys.BPF_PERF_EVENT, + }) + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + if errors.Is(err, unix.EBADF) { + return nil + } + return err +}) + +// isValidTraceID implements the equivalent of a regex match +// against "^[a-zA-Z_][0-9a-zA-Z_]*$". +// +// Trace event groups, names and kernel symbols must adhere to this set +// of characters. Non-empty, first character must not be a number, all +// characters must be alphanumeric or underscore. +func isValidTraceID(s string) bool { + if len(s) < 1 { + return false + } + for i, c := range []byte(s) { + switch { + case c >= 'a' && c <= 'z': + case c >= 'A' && c <= 'Z': + case c == '_': + case i > 0 && c >= '0' && c <= '9': + + default: + return false + } + } + + return true +} diff --git a/vendor/github.com/cilium/ebpf/link/platform.go b/vendor/github.com/cilium/ebpf/link/platform.go new file mode 100644 index 000000000..eb6f7b7a3 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/platform.go @@ -0,0 +1,25 @@ +package link + +import ( + "fmt" + "runtime" +) + +func platformPrefix(symbol string) string { + + prefix := runtime.GOARCH + + // per https://github.com/golang/go/blob/master/src/go/build/syslist.go + switch prefix { + case "386": + prefix = "ia32" + case "amd64", "amd64p32": + prefix = "x64" + case "arm64", "arm64be": + prefix = "arm64" + default: + return symbol + } + + return fmt.Sprintf("__%s_%s", prefix, symbol) +} diff --git a/vendor/github.com/cilium/ebpf/link/program.go b/vendor/github.com/cilium/ebpf/link/program.go new file mode 100644 index 000000000..ea3181737 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/program.go @@ -0,0 +1,76 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +type RawAttachProgramOptions struct { + // File descriptor to attach to. This differs for each attach type. + Target int + // Program to attach. + Program *ebpf.Program + // Program to replace (cgroups). + Replace *ebpf.Program + // Attach must match the attach type of Program (and Replace). + Attach ebpf.AttachType + // Flags control the attach behaviour. This differs for each attach type. + Flags uint32 +} + +// RawAttachProgram is a low level wrapper around BPF_PROG_ATTACH. +// +// You should use one of the higher level abstractions available in this +// package if possible. +func RawAttachProgram(opts RawAttachProgramOptions) error { + if err := haveProgAttach(); err != nil { + return err + } + + var replaceFd uint32 + if opts.Replace != nil { + replaceFd = uint32(opts.Replace.FD()) + } + + attr := sys.ProgAttachAttr{ + TargetFd: uint32(opts.Target), + AttachBpfFd: uint32(opts.Program.FD()), + ReplaceBpfFd: replaceFd, + AttachType: uint32(opts.Attach), + AttachFlags: uint32(opts.Flags), + } + + if err := sys.ProgAttach(&attr); err != nil { + return fmt.Errorf("can't attach program: %w", err) + } + return nil +} + +type RawDetachProgramOptions struct { + Target int + Program *ebpf.Program + Attach ebpf.AttachType +} + +// RawDetachProgram is a low level wrapper around BPF_PROG_DETACH. +// +// You should use one of the higher level abstractions available in this +// package if possible. +func RawDetachProgram(opts RawDetachProgramOptions) error { + if err := haveProgAttach(); err != nil { + return err + } + + attr := sys.ProgDetachAttr{ + TargetFd: uint32(opts.Target), + AttachBpfFd: uint32(opts.Program.FD()), + AttachType: uint32(opts.Attach), + } + if err := sys.ProgDetach(&attr); err != nil { + return fmt.Errorf("can't detach program: %w", err) + } + + return nil +} diff --git a/vendor/github.com/cilium/ebpf/link/raw_tracepoint.go b/vendor/github.com/cilium/ebpf/link/raw_tracepoint.go new file mode 100644 index 000000000..925e621cb --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/raw_tracepoint.go @@ -0,0 +1,87 @@ +package link + +import ( + "errors" + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +type RawTracepointOptions struct { + // Tracepoint name. + Name string + // Program must be of type RawTracepoint* + Program *ebpf.Program +} + +// AttachRawTracepoint links a BPF program to a raw_tracepoint. +// +// Requires at least Linux 4.17. +func AttachRawTracepoint(opts RawTracepointOptions) (Link, error) { + if t := opts.Program.Type(); t != ebpf.RawTracepoint && t != ebpf.RawTracepointWritable { + return nil, fmt.Errorf("invalid program type %s, expected RawTracepoint(Writable)", t) + } + if opts.Program.FD() < 0 { + return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) + } + + fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{ + Name: sys.NewStringPointer(opts.Name), + ProgFd: uint32(opts.Program.FD()), + }) + if err != nil { + return nil, err + } + + err = haveBPFLink() + if errors.Is(err, ErrNotSupported) { + // Prior to commit 70ed506c3bbc ("bpf: Introduce pinnable bpf_link abstraction") + // raw_tracepoints are just a plain fd. + return &simpleRawTracepoint{fd}, nil + } + + if err != nil { + return nil, err + } + + return &rawTracepoint{RawLink{fd: fd}}, nil +} + +type simpleRawTracepoint struct { + fd *sys.FD +} + +var _ Link = (*simpleRawTracepoint)(nil) + +func (frt *simpleRawTracepoint) isLink() {} + +func (frt *simpleRawTracepoint) Close() error { + return frt.fd.Close() +} + +func (frt *simpleRawTracepoint) Update(_ *ebpf.Program) error { + return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported) +} + +func (frt *simpleRawTracepoint) Pin(string) error { + return fmt.Errorf("pin raw_tracepoint: %w", ErrNotSupported) +} + +func (frt *simpleRawTracepoint) Unpin() error { + return fmt.Errorf("unpin raw_tracepoint: %w", ErrNotSupported) +} + +func (frt *simpleRawTracepoint) Info() (*Info, error) { + return nil, fmt.Errorf("can't get raw_tracepoint info: %w", ErrNotSupported) +} + +type rawTracepoint struct { + RawLink +} + +var _ Link = (*rawTracepoint)(nil) + +func (rt *rawTracepoint) Update(_ *ebpf.Program) error { + return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported) +} diff --git a/vendor/github.com/cilium/ebpf/link/socket_filter.go b/vendor/github.com/cilium/ebpf/link/socket_filter.go new file mode 100644 index 000000000..94f3958cc --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/socket_filter.go @@ -0,0 +1,40 @@ +package link + +import ( + "syscall" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/unix" +) + +// AttachSocketFilter attaches a SocketFilter BPF program to a socket. +func AttachSocketFilter(conn syscall.Conn, program *ebpf.Program) error { + rawConn, err := conn.SyscallConn() + if err != nil { + return err + } + var ssoErr error + err = rawConn.Control(func(fd uintptr) { + ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD()) + }) + if ssoErr != nil { + return ssoErr + } + return err +} + +// DetachSocketFilter detaches a SocketFilter BPF program from a socket. +func DetachSocketFilter(conn syscall.Conn) error { + rawConn, err := conn.SyscallConn() + if err != nil { + return err + } + var ssoErr error + err = rawConn.Control(func(fd uintptr) { + ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0) + }) + if ssoErr != nil { + return ssoErr + } + return err +} diff --git a/vendor/github.com/cilium/ebpf/link/syscalls.go b/vendor/github.com/cilium/ebpf/link/syscalls.go new file mode 100644 index 000000000..a661395b3 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/syscalls.go @@ -0,0 +1,103 @@ +package link + +import ( + "errors" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// Type is the kind of link. +type Type = sys.LinkType + +// Valid link types. +const ( + UnspecifiedType = sys.BPF_LINK_TYPE_UNSPEC + RawTracepointType = sys.BPF_LINK_TYPE_RAW_TRACEPOINT + TracingType = sys.BPF_LINK_TYPE_TRACING + CgroupType = sys.BPF_LINK_TYPE_CGROUP + IterType = sys.BPF_LINK_TYPE_ITER + NetNsType = sys.BPF_LINK_TYPE_NETNS + XDPType = sys.BPF_LINK_TYPE_XDP + PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT +) + +var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error { + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Type: ebpf.CGroupSKB, + License: "MIT", + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + }) + if err != nil { + return internal.ErrNotSupported + } + + // BPF_PROG_ATTACH was introduced at the same time as CGgroupSKB, + // so being able to load the program is enough to infer that we + // have the syscall. + prog.Close() + return nil +}) + +var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replacement", "5.5", func() error { + if err := haveProgAttach(); err != nil { + return err + } + + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Type: ebpf.CGroupSKB, + AttachType: ebpf.AttachCGroupInetIngress, + License: "MIT", + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + }) + if err != nil { + return internal.ErrNotSupported + } + defer prog.Close() + + // We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs. + // If passing BPF_F_REPLACE gives us EINVAL we know that the feature isn't + // present. + attr := sys.ProgAttachAttr{ + // We rely on this being checked after attachFlags. + TargetFd: ^uint32(0), + AttachBpfFd: uint32(prog.FD()), + AttachType: uint32(ebpf.AttachCGroupInetIngress), + AttachFlags: uint32(flagReplace), + } + + err = sys.ProgAttach(&attr) + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + if errors.Is(err, unix.EBADF) { + return nil + } + return err +}) + +var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error { + attr := sys.LinkCreateAttr{ + // This is a hopefully invalid file descriptor, which triggers EBADF. + TargetFd: ^uint32(0), + ProgFd: ^uint32(0), + AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress), + } + _, err := sys.LinkCreate(&attr) + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + if errors.Is(err, unix.EBADF) { + return nil + } + return err +}) diff --git a/vendor/github.com/cilium/ebpf/link/tracepoint.go b/vendor/github.com/cilium/ebpf/link/tracepoint.go new file mode 100644 index 000000000..a59ef9d1c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/tracepoint.go @@ -0,0 +1,77 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" +) + +// TracepointOptions defines additional parameters that will be used +// when loading Tracepoints. +type TracepointOptions struct { + // Arbitrary value that can be fetched from an eBPF program + // via `bpf_get_attach_cookie()`. + // + // Needs kernel 5.15+. + Cookie uint64 +} + +// Tracepoint attaches the given eBPF program to the tracepoint with the given +// group and name. See /sys/kernel/debug/tracing/events to find available +// tracepoints. The top-level directory is the group, the event's subdirectory +// is the name. Example: +// +// tp, err := Tracepoint("syscalls", "sys_enter_fork", prog, nil) +// +// Losing the reference to the resulting Link (tp) will close the Tracepoint +// and prevent further execution of prog. The Link must be Closed during +// program shutdown to avoid leaking system resources. +// +// Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is +// only possible as of kernel 4.14 (commit cf5f5ce). +func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions) (Link, error) { + if group == "" || name == "" { + return nil, fmt.Errorf("group and name cannot be empty: %w", errInvalidInput) + } + if prog == nil { + return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) + } + if !isValidTraceID(group) || !isValidTraceID(name) { + return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput) + } + if prog.Type() != ebpf.TracePoint { + return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput) + } + + tid, err := getTraceEventID(group, name) + if err != nil { + return nil, err + } + + fd, err := openTracepointPerfEvent(tid, perfAllThreads) + if err != nil { + return nil, err + } + + var cookie uint64 + if opts != nil { + cookie = opts.Cookie + } + + pe := &perfEvent{ + typ: tracepointEvent, + group: group, + name: name, + tracefsID: tid, + cookie: cookie, + fd: fd, + } + + lnk, err := attachPerfEvent(pe, prog) + if err != nil { + pe.Close() + return nil, err + } + + return lnk, nil +} diff --git a/vendor/github.com/cilium/ebpf/link/tracing.go b/vendor/github.com/cilium/ebpf/link/tracing.go new file mode 100644 index 000000000..e47e61a3b --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/tracing.go @@ -0,0 +1,141 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal/sys" +) + +type tracing struct { + RawLink +} + +func (f *tracing) Update(new *ebpf.Program) error { + return fmt.Errorf("tracing update: %w", ErrNotSupported) +} + +// AttachFreplace attaches the given eBPF program to the function it replaces. +// +// The program and name can either be provided at link time, or can be provided +// at program load time. If they were provided at load time, they should be nil +// and empty respectively here, as they will be ignored by the kernel. +// Examples: +// +// AttachFreplace(dispatcher, "function", replacement) +// AttachFreplace(nil, "", replacement) +func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) (Link, error) { + if (name == "") != (targetProg == nil) { + return nil, fmt.Errorf("must provide both or neither of name and targetProg: %w", errInvalidInput) + } + if prog == nil { + return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) + } + if prog.Type() != ebpf.Extension { + return nil, fmt.Errorf("eBPF program type %s is not an Extension: %w", prog.Type(), errInvalidInput) + } + + var ( + target int + typeID btf.TypeID + ) + if targetProg != nil { + btfHandle, err := targetProg.Handle() + if err != nil { + return nil, err + } + defer btfHandle.Close() + + spec, err := btfHandle.Spec(nil) + if err != nil { + return nil, err + } + + var function *btf.Func + if err := spec.TypeByName(name, &function); err != nil { + return nil, err + } + + target = targetProg.FD() + typeID, err = spec.TypeID(function) + if err != nil { + return nil, err + } + } + + link, err := AttachRawLink(RawLinkOptions{ + Target: target, + Program: prog, + Attach: ebpf.AttachNone, + BTF: typeID, + }) + if err != nil { + return nil, err + } + + return &tracing{*link}, nil +} + +type TracingOptions struct { + // Program must be of type Tracing with attach type + // AttachTraceFEntry/AttachTraceFExit/AttachModifyReturn or + // AttachTraceRawTp. + Program *ebpf.Program +} + +type LSMOptions struct { + // Program must be of type LSM with attach type + // AttachLSMMac. + Program *ebpf.Program +} + +// attachBTFID links all BPF program types (Tracing/LSM) that they attach to a btf_id. +func attachBTFID(program *ebpf.Program) (Link, error) { + if program.FD() < 0 { + return nil, fmt.Errorf("invalid program %w", sys.ErrClosedFd) + } + + fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{ + ProgFd: uint32(program.FD()), + }) + if err != nil { + return nil, err + } + + raw := RawLink{fd: fd} + info, err := raw.Info() + if err != nil { + raw.Close() + return nil, err + } + + if info.Type == RawTracepointType { + // Sadness upon sadness: a Tracing program with AttachRawTp returns + // a raw_tracepoint link. Other types return a tracing link. + return &rawTracepoint{raw}, nil + } + + return &tracing{RawLink: RawLink{fd: fd}}, nil +} + +// AttachTracing links a tracing (fentry/fexit/fmod_ret) BPF program or +// a BTF-powered raw tracepoint (tp_btf) BPF Program to a BPF hook defined +// in kernel modules. +func AttachTracing(opts TracingOptions) (Link, error) { + if t := opts.Program.Type(); t != ebpf.Tracing { + return nil, fmt.Errorf("invalid program type %s, expected Tracing", t) + } + + return attachBTFID(opts.Program) +} + +// AttachLSM links a Linux security module (LSM) BPF Program to a BPF +// hook defined in kernel modules. +func AttachLSM(opts LSMOptions) (Link, error) { + if t := opts.Program.Type(); t != ebpf.LSM { + return nil, fmt.Errorf("invalid program type %s, expected LSM", t) + } + + return attachBTFID(opts.Program) +} diff --git a/vendor/github.com/cilium/ebpf/link/uprobe.go b/vendor/github.com/cilium/ebpf/link/uprobe.go new file mode 100644 index 000000000..edf925b57 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/uprobe.go @@ -0,0 +1,373 @@ +package link + +import ( + "debug/elf" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal" +) + +var ( + uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events") + + uprobeRetprobeBit = struct { + once sync.Once + value uint64 + err error + }{} + + uprobeRefCtrOffsetPMUPath = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset" + // elixir.bootlin.com/linux/v5.15-rc7/source/kernel/events/core.c#L9799 + uprobeRefCtrOffsetShift = 32 + haveRefCtrOffsetPMU = internal.FeatureTest("RefCtrOffsetPMU", "4.20", func() error { + _, err := os.Stat(uprobeRefCtrOffsetPMUPath) + if err != nil { + return internal.ErrNotSupported + } + return nil + }) + + // ErrNoSymbol indicates that the given symbol was not found + // in the ELF symbols table. + ErrNoSymbol = errors.New("not found") +) + +// Executable defines an executable program on the filesystem. +type Executable struct { + // Path of the executable on the filesystem. + path string + // Parsed ELF and dynamic symbols' addresses. + addresses map[string]uint64 +} + +// UprobeOptions defines additional parameters that will be used +// when loading Uprobes. +type UprobeOptions struct { + // Symbol address. Must be provided in case of external symbols (shared libs). + // If set, overrides the address eventually parsed from the executable. + Address uint64 + // The offset relative to given symbol. Useful when tracing an arbitrary point + // inside the frame of given symbol. + // + // Note: this field changed from being an absolute offset to being relative + // to Address. + Offset uint64 + // Only set the uprobe on the given process ID. Useful when tracing + // shared library calls or programs that have many running instances. + PID int + // Automatically manage SDT reference counts (semaphores). + // + // If this field is set, the Kernel will increment/decrement the + // semaphore located in the process memory at the provided address on + // probe attach/detach. + // + // See also: + // sourceware.org/systemtap/wiki/UserSpaceProbeImplementation (Semaphore Handling) + // github.com/torvalds/linux/commit/1cc33161a83d + // github.com/torvalds/linux/commit/a6ca88b241d5 + RefCtrOffset uint64 + // Arbitrary value that can be fetched from an eBPF program + // via `bpf_get_attach_cookie()`. + // + // Needs kernel 5.15+. + Cookie uint64 +} + +// To open a new Executable, use: +// +// OpenExecutable("/bin/bash") +// +// The returned value can then be used to open Uprobe(s). +func OpenExecutable(path string) (*Executable, error) { + if path == "" { + return nil, fmt.Errorf("path cannot be empty") + } + + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("open file '%s': %w", path, err) + } + defer f.Close() + + se, err := internal.NewSafeELFFile(f) + if err != nil { + return nil, fmt.Errorf("parse ELF file: %w", err) + } + + if se.Type != elf.ET_EXEC && se.Type != elf.ET_DYN { + // ELF is not an executable or a shared object. + return nil, errors.New("the given file is not an executable or a shared object") + } + + ex := Executable{ + path: path, + addresses: make(map[string]uint64), + } + + if err := ex.load(se); err != nil { + return nil, err + } + + return &ex, nil +} + +func (ex *Executable) load(f *internal.SafeELFFile) error { + syms, err := f.Symbols() + if err != nil && !errors.Is(err, elf.ErrNoSymbols) { + return err + } + + dynsyms, err := f.DynamicSymbols() + if err != nil && !errors.Is(err, elf.ErrNoSymbols) { + return err + } + + syms = append(syms, dynsyms...) + + for _, s := range syms { + if elf.ST_TYPE(s.Info) != elf.STT_FUNC { + // Symbol not associated with a function or other executable code. + continue + } + + address := s.Value + + // Loop over ELF segments. + for _, prog := range f.Progs { + // Skip uninteresting segments. + if prog.Type != elf.PT_LOAD || (prog.Flags&elf.PF_X) == 0 { + continue + } + + if prog.Vaddr <= s.Value && s.Value < (prog.Vaddr+prog.Memsz) { + // If the symbol value is contained in the segment, calculate + // the symbol offset. + // + // fn symbol offset = fn symbol VA - .text VA + .text offset + // + // stackoverflow.com/a/40249502 + address = s.Value - prog.Vaddr + prog.Off + break + } + } + + ex.addresses[s.Name] = address + } + + return nil +} + +// address calculates the address of a symbol in the executable. +// +// opts must not be nil. +func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error) { + if opts.Address > 0 { + return opts.Address + opts.Offset, nil + } + + address, ok := ex.addresses[symbol] + if !ok { + return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol) + } + + // Symbols with location 0 from section undef are shared library calls and + // are relocated before the binary is executed. Dynamic linking is not + // implemented by the library, so mark this as unsupported for now. + // + // Since only offset values are stored and not elf.Symbol, if the value is 0, + // assume it's an external symbol. + if address == 0 { + return 0, fmt.Errorf("cannot resolve %s library call '%s': %w "+ + "(consider providing UprobeOptions.Address)", ex.path, symbol, ErrNotSupported) + } + + return address + opts.Offset, nil +} + +// Uprobe attaches the given eBPF program to a perf event that fires when the +// given symbol starts executing in the given Executable. +// For example, /bin/bash::main(): +// +// ex, _ = OpenExecutable("/bin/bash") +// ex.Uprobe("main", prog, nil) +// +// When using symbols which belongs to shared libraries, +// an offset must be provided via options: +// +// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123}) +// +// Note: Setting the Offset field in the options supersedes the symbol's offset. +// +// Losing the reference to the resulting Link (up) will close the Uprobe +// and prevent further execution of prog. The Link must be Closed during +// program shutdown to avoid leaking system resources. +// +// Functions provided by shared libraries can currently not be traced and +// will result in an ErrNotSupported. +func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) { + u, err := ex.uprobe(symbol, prog, opts, false) + if err != nil { + return nil, err + } + + lnk, err := attachPerfEvent(u, prog) + if err != nil { + u.Close() + return nil, err + } + + return lnk, nil +} + +// Uretprobe attaches the given eBPF program to a perf event that fires right +// before the given symbol exits. For example, /bin/bash::main(): +// +// ex, _ = OpenExecutable("/bin/bash") +// ex.Uretprobe("main", prog, nil) +// +// When using symbols which belongs to shared libraries, +// an offset must be provided via options: +// +// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123}) +// +// Note: Setting the Offset field in the options supersedes the symbol's offset. +// +// Losing the reference to the resulting Link (up) will close the Uprobe +// and prevent further execution of prog. The Link must be Closed during +// program shutdown to avoid leaking system resources. +// +// Functions provided by shared libraries can currently not be traced and +// will result in an ErrNotSupported. +func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) { + u, err := ex.uprobe(symbol, prog, opts, true) + if err != nil { + return nil, err + } + + lnk, err := attachPerfEvent(u, prog) + if err != nil { + u.Close() + return nil, err + } + + return lnk, nil +} + +// uprobe opens a perf event for the given binary/symbol and attaches prog to it. +// If ret is true, create a uretprobe. +func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions, ret bool) (*perfEvent, error) { + if prog == nil { + return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) + } + if prog.Type() != ebpf.Kprobe { + return nil, fmt.Errorf("eBPF program type %s is not Kprobe: %w", prog.Type(), errInvalidInput) + } + if opts == nil { + opts = &UprobeOptions{} + } + + offset, err := ex.address(symbol, opts) + if err != nil { + return nil, err + } + + pid := opts.PID + if pid == 0 { + pid = perfAllThreads + } + + if opts.RefCtrOffset != 0 { + if err := haveRefCtrOffsetPMU(); err != nil { + return nil, fmt.Errorf("uprobe ref_ctr_offset: %w", err) + } + } + + args := probeArgs{ + symbol: symbol, + path: ex.path, + offset: offset, + pid: pid, + refCtrOffset: opts.RefCtrOffset, + ret: ret, + cookie: opts.Cookie, + } + + // Use uprobe PMU if the kernel has it available. + tp, err := pmuUprobe(args) + if err == nil { + return tp, nil + } + if err != nil && !errors.Is(err, ErrNotSupported) { + return nil, fmt.Errorf("creating perf_uprobe PMU: %w", err) + } + + // Use tracefs if uprobe PMU is missing. + args.symbol = sanitizeSymbol(symbol) + tp, err = tracefsUprobe(args) + if err != nil { + return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err) + } + + return tp, nil +} + +// pmuUprobe opens a perf event based on the uprobe PMU. +func pmuUprobe(args probeArgs) (*perfEvent, error) { + return pmuProbe(uprobeType, args) +} + +// tracefsUprobe creates a Uprobe tracefs entry. +func tracefsUprobe(args probeArgs) (*perfEvent, error) { + return tracefsProbe(uprobeType, args) +} + +// sanitizeSymbol replaces every invalid character for the tracefs api with an underscore. +// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_"). +func sanitizeSymbol(s string) string { + var b strings.Builder + b.Grow(len(s)) + var skip bool + for _, c := range []byte(s) { + switch { + case c >= 'a' && c <= 'z', + c >= 'A' && c <= 'Z', + c >= '0' && c <= '9': + skip = false + b.WriteByte(c) + + default: + if !skip { + b.WriteByte('_') + skip = true + } + } + } + + return b.String() +} + +// uprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api. +func uprobeToken(args probeArgs) string { + po := fmt.Sprintf("%s:%#x", args.path, args.offset) + + if args.refCtrOffset != 0 { + // This is not documented in Documentation/trace/uprobetracer.txt. + // elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564 + po += fmt.Sprintf("(%#x)", args.refCtrOffset) + } + + return po +} + +func uretprobeBit() (uint64, error) { + uprobeRetprobeBit.once.Do(func() { + uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType) + }) + return uprobeRetprobeBit.value, uprobeRetprobeBit.err +} diff --git a/vendor/github.com/cilium/ebpf/link/xdp.go b/vendor/github.com/cilium/ebpf/link/xdp.go new file mode 100644 index 000000000..aa8dd3a4c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/xdp.go @@ -0,0 +1,54 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" +) + +// XDPAttachFlags represents how XDP program will be attached to interface. +type XDPAttachFlags uint32 + +const ( + // XDPGenericMode (SKB) links XDP BPF program for drivers which do + // not yet support native XDP. + XDPGenericMode XDPAttachFlags = 1 << (iota + 1) + // XDPDriverMode links XDP BPF program into the driver’s receive path. + XDPDriverMode + // XDPOffloadMode offloads the entire XDP BPF program into hardware. + XDPOffloadMode +) + +type XDPOptions struct { + // Program must be an XDP BPF program. + Program *ebpf.Program + + // Interface is the interface index to attach program to. + Interface int + + // Flags is one of XDPAttachFlags (optional). + // + // Only one XDP mode should be set, without flag defaults + // to driver/generic mode (best effort). + Flags XDPAttachFlags +} + +// AttachXDP links an XDP BPF program to an XDP hook. +func AttachXDP(opts XDPOptions) (Link, error) { + if t := opts.Program.Type(); t != ebpf.XDP { + return nil, fmt.Errorf("invalid program type %s, expected XDP", t) + } + + if opts.Interface < 1 { + return nil, fmt.Errorf("invalid interface index: %d", opts.Interface) + } + + rawLink, err := AttachRawLink(RawLinkOptions{ + Program: opts.Program, + Attach: ebpf.AttachXDP, + Target: opts.Interface, + Flags: uint32(opts.Flags), + }) + + return rawLink, err +} diff --git a/vendor/github.com/cilium/ebpf/linker.go b/vendor/github.com/cilium/ebpf/linker.go new file mode 100644 index 000000000..e6276b182 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/linker.go @@ -0,0 +1,238 @@ +package ebpf + +import ( + "errors" + "fmt" + "sync" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/btf" +) + +// splitSymbols splits insns into subsections delimited by Symbol Instructions. +// insns cannot be empty and must start with a Symbol Instruction. +// +// The resulting map is indexed by Symbol name. +func splitSymbols(insns asm.Instructions) (map[string]asm.Instructions, error) { + if len(insns) == 0 { + return nil, errors.New("insns is empty") + } + + if insns[0].Symbol() == "" { + return nil, errors.New("insns must start with a Symbol") + } + + var name string + progs := make(map[string]asm.Instructions) + for _, ins := range insns { + if sym := ins.Symbol(); sym != "" { + if progs[sym] != nil { + return nil, fmt.Errorf("insns contains duplicate Symbol %s", sym) + } + name = sym + } + + progs[name] = append(progs[name], ins) + } + + return progs, nil +} + +// The linker is responsible for resolving bpf-to-bpf calls between programs +// within an ELF. Each BPF program must be a self-contained binary blob, +// so when an instruction in one ELF program section wants to jump to +// a function in another, the linker needs to pull in the bytecode +// (and BTF info) of the target function and concatenate the instruction +// streams. +// +// Later on in the pipeline, all call sites are fixed up with relative jumps +// within this newly-created instruction stream to then finally hand off to +// the kernel with BPF_PROG_LOAD. +// +// Each function is denoted by an ELF symbol and the compiler takes care of +// register setup before each jump instruction. + +// hasFunctionReferences returns true if insns contains one or more bpf2bpf +// function references. +func hasFunctionReferences(insns asm.Instructions) bool { + for _, i := range insns { + if i.IsFunctionReference() { + return true + } + } + return false +} + +// applyRelocations collects and applies any CO-RE relocations in insns. +// +// Passing a nil target will relocate against the running kernel. insns are +// modified in place. +func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error { + var relos []*btf.CORERelocation + var reloInsns []*asm.Instruction + iter := insns.Iterate() + for iter.Next() { + if relo := btf.CORERelocationMetadata(iter.Ins); relo != nil { + relos = append(relos, relo) + reloInsns = append(reloInsns, iter.Ins) + } + } + + if len(relos) == 0 { + return nil + } + + target, err := maybeLoadKernelBTF(target) + if err != nil { + return err + } + + fixups, err := btf.CORERelocate(local, target, relos) + if err != nil { + return err + } + + for i, fixup := range fixups { + if err := fixup.Apply(reloInsns[i]); err != nil { + return fmt.Errorf("apply fixup %s: %w", &fixup, err) + } + } + + return nil +} + +// flattenPrograms resolves bpf-to-bpf calls for a set of programs. +// +// Links all programs in names by modifying their ProgramSpec in progs. +func flattenPrograms(progs map[string]*ProgramSpec, names []string) { + // Pre-calculate all function references. + refs := make(map[*ProgramSpec][]string) + for _, prog := range progs { + refs[prog] = prog.Instructions.FunctionReferences() + } + + // Create a flattened instruction stream, but don't modify progs yet to + // avoid linking multiple times. + flattened := make([]asm.Instructions, 0, len(names)) + for _, name := range names { + flattened = append(flattened, flattenInstructions(name, progs, refs)) + } + + // Finally, assign the flattened instructions. + for i, name := range names { + progs[name].Instructions = flattened[i] + } +} + +// flattenInstructions resolves bpf-to-bpf calls for a single program. +// +// Flattens the instructions of prog by concatenating the instructions of all +// direct and indirect dependencies. +// +// progs contains all referenceable programs, while refs contain the direct +// dependencies of each program. +func flattenInstructions(name string, progs map[string]*ProgramSpec, refs map[*ProgramSpec][]string) asm.Instructions { + prog := progs[name] + + insns := make(asm.Instructions, len(prog.Instructions)) + copy(insns, prog.Instructions) + + // Add all direct references of prog to the list of to be linked programs. + pending := make([]string, len(refs[prog])) + copy(pending, refs[prog]) + + // All references for which we've appended instructions. + linked := make(map[string]bool) + + // Iterate all pending references. We can't use a range since pending is + // modified in the body below. + for len(pending) > 0 { + var ref string + ref, pending = pending[0], pending[1:] + + if linked[ref] { + // We've already linked this ref, don't append instructions again. + continue + } + + progRef := progs[ref] + if progRef == nil { + // We don't have instructions that go with this reference. This + // happens when calling extern functions. + continue + } + + insns = append(insns, progRef.Instructions...) + linked[ref] = true + + // Make sure we link indirect references. + pending = append(pending, refs[progRef]...) + } + + return insns +} + +// fixupAndValidate is called by the ELF reader right before marshaling the +// instruction stream. It performs last-minute adjustments to the program and +// runs some sanity checks before sending it off to the kernel. +func fixupAndValidate(insns asm.Instructions) error { + iter := insns.Iterate() + for iter.Next() { + ins := iter.Ins + + // Map load was tagged with a Reference, but does not contain a Map pointer. + if ins.IsLoadFromMap() && ins.Reference() != "" && ins.Map() == nil { + return fmt.Errorf("instruction %d: map %s: %w", iter.Index, ins.Reference(), asm.ErrUnsatisfiedMapReference) + } + + fixupProbeReadKernel(ins) + } + + return nil +} + +// fixupProbeReadKernel replaces calls to bpf_probe_read_{kernel,user}(_str) +// with bpf_probe_read(_str) on kernels that don't support it yet. +func fixupProbeReadKernel(ins *asm.Instruction) { + if !ins.IsBuiltinCall() { + return + } + + // Kernel supports bpf_probe_read_kernel, nothing to do. + if haveProbeReadKernel() == nil { + return + } + + switch asm.BuiltinFunc(ins.Constant) { + case asm.FnProbeReadKernel, asm.FnProbeReadUser: + ins.Constant = int64(asm.FnProbeRead) + case asm.FnProbeReadKernelStr, asm.FnProbeReadUserStr: + ins.Constant = int64(asm.FnProbeReadStr) + } +} + +var kernelBTF struct { + sync.Mutex + spec *btf.Spec +} + +// maybeLoadKernelBTF loads the current kernel's BTF if spec is nil, otherwise +// it returns spec unchanged. +// +// The kernel BTF is cached for the lifetime of the process. +func maybeLoadKernelBTF(spec *btf.Spec) (*btf.Spec, error) { + if spec != nil { + return spec, nil + } + + kernelBTF.Lock() + defer kernelBTF.Unlock() + + if kernelBTF.spec != nil { + return kernelBTF.spec, nil + } + + var err error + kernelBTF.spec, err = btf.LoadKernelSpec() + return kernelBTF.spec, err +} diff --git a/vendor/github.com/cilium/ebpf/map.go b/vendor/github.com/cilium/ebpf/map.go new file mode 100644 index 000000000..e4a6c87e9 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/map.go @@ -0,0 +1,1424 @@ +package ebpf + +import ( + "bytes" + "errors" + "fmt" + "io" + "math/rand" + "path/filepath" + "reflect" + "time" + "unsafe" + + "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// Errors returned by Map and MapIterator methods. +var ( + ErrKeyNotExist = errors.New("key does not exist") + ErrKeyExist = errors.New("key already exists") + ErrIterationAborted = errors.New("iteration aborted") + ErrMapIncompatible = errors.New("map spec is incompatible with existing map") + errMapNoBTFValue = errors.New("map spec does not contain a BTF Value") +) + +// MapOptions control loading a map into the kernel. +type MapOptions struct { + // The base path to pin maps in if requested via PinByName. + // Existing maps will be re-used if they are compatible, otherwise an + // error is returned. + PinPath string + LoadPinOptions LoadPinOptions +} + +// MapID represents the unique ID of an eBPF map +type MapID uint32 + +// MapSpec defines a Map. +type MapSpec struct { + // Name is passed to the kernel as a debug aid. Must only contain + // alpha numeric and '_' characters. + Name string + Type MapType + KeySize uint32 + ValueSize uint32 + MaxEntries uint32 + + // Flags is passed to the kernel and specifies additional map + // creation attributes. + Flags uint32 + + // Automatically pin and load a map from MapOptions.PinPath. + // Generates an error if an existing pinned map is incompatible with the MapSpec. + Pinning PinType + + // Specify numa node during map creation + // (effective only if unix.BPF_F_NUMA_NODE flag is set, + // which can be imported from golang.org/x/sys/unix) + NumaNode uint32 + + // The initial contents of the map. May be nil. + Contents []MapKV + + // Whether to freeze a map after setting its initial contents. + Freeze bool + + // InnerMap is used as a template for ArrayOfMaps and HashOfMaps + InnerMap *MapSpec + + // Extra trailing bytes found in the ELF map definition when using structs + // larger than libbpf's bpf_map_def. nil if no trailing bytes were present. + // Must be nil or empty before instantiating the MapSpec into a Map. + Extra *bytes.Reader + + // The key and value type of this map. May be nil. + Key, Value btf.Type + + // The BTF associated with this map. + BTF *btf.Spec +} + +func (ms *MapSpec) String() string { + return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) +} + +// Copy returns a copy of the spec. +// +// MapSpec.Contents is a shallow copy. +func (ms *MapSpec) Copy() *MapSpec { + if ms == nil { + return nil + } + + cpy := *ms + + cpy.Contents = make([]MapKV, len(ms.Contents)) + copy(cpy.Contents, ms.Contents) + + cpy.InnerMap = ms.InnerMap.Copy() + + return &cpy +} + +// hasBTF returns true if the MapSpec has a valid BTF spec and if its +// map type supports associated BTF metadata in the kernel. +func (ms *MapSpec) hasBTF() bool { + return ms.BTF != nil && ms.Type.hasBTF() +} + +func (ms *MapSpec) clampPerfEventArraySize() error { + if ms.Type != PerfEventArray { + return nil + } + + n, err := internal.PossibleCPUs() + if err != nil { + return fmt.Errorf("perf event array: %w", err) + } + + if n := uint32(n); ms.MaxEntries > n { + ms.MaxEntries = n + } + + return nil +} + +// dataSection returns the contents and BTF Datasec descriptor of the spec. +func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) { + + if ms.Value == nil { + return nil, nil, errMapNoBTFValue + } + + ds, ok := ms.Value.(*btf.Datasec) + if !ok { + return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value) + } + + if n := len(ms.Contents); n != 1 { + return nil, nil, fmt.Errorf("expected one key, found %d", n) + } + + kv := ms.Contents[0] + value, ok := kv.Value.([]byte) + if !ok { + return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value) + } + + return value, ds, nil +} + +// MapKV is used to initialize the contents of a Map. +type MapKV struct { + Key interface{} + Value interface{} +} + +func (ms *MapSpec) checkCompatibility(m *Map) error { + switch { + case m.typ != ms.Type: + return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible) + + case m.keySize != ms.KeySize: + return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible) + + case m.valueSize != ms.ValueSize: + return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible) + + case !(ms.Type == PerfEventArray && ms.MaxEntries == 0) && + m.maxEntries != ms.MaxEntries: + return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible) + + case m.flags != ms.Flags: + return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible) + } + return nil +} + +// Map represents a Map file descriptor. +// +// It is not safe to close a map which is used by other goroutines. +// +// Methods which take interface{} arguments by default encode +// them using binary.Read/Write in the machine's native endianness. +// +// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler +// if you require custom encoding. +type Map struct { + name string + fd *sys.FD + typ MapType + keySize uint32 + valueSize uint32 + maxEntries uint32 + flags uint32 + pinnedPath string + // Per CPU maps return values larger than the size in the spec + fullValueSize int +} + +// NewMapFromFD creates a map from a raw fd. +// +// You should not use fd after calling this function. +func NewMapFromFD(fd int) (*Map, error) { + f, err := sys.NewFD(fd) + if err != nil { + return nil, err + } + + return newMapFromFD(f) +} + +func newMapFromFD(fd *sys.FD) (*Map, error) { + info, err := newMapInfoFromFd(fd) + if err != nil { + fd.Close() + return nil, fmt.Errorf("get map info: %w", err) + } + + return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags) +} + +// NewMap creates a new Map. +// +// It's equivalent to calling NewMapWithOptions with default options. +func NewMap(spec *MapSpec) (*Map, error) { + return NewMapWithOptions(spec, MapOptions{}) +} + +// NewMapWithOptions creates a new Map. +// +// Creating a map for the first time will perform feature detection +// by creating small, temporary maps. +// +// The caller is responsible for ensuring the process' rlimit is set +// sufficiently high for locking memory during map creation. This can be done +// by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions. +// +// May return an error wrapping ErrMapIncompatible. +func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { + handles := newHandleCache() + defer handles.close() + + m, err := newMapWithOptions(spec, opts, handles) + if err != nil { + return nil, fmt.Errorf("creating map: %w", err) + } + + if err := m.finalize(spec); err != nil { + m.Close() + return nil, fmt.Errorf("populating map: %w", err) + } + + return m, nil +} + +func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) { + closeOnError := func(c io.Closer) { + if err != nil { + c.Close() + } + } + + switch spec.Pinning { + case PinByName: + if spec.Name == "" { + return nil, fmt.Errorf("pin by name: missing Name") + } + + if opts.PinPath == "" { + return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath") + } + + path := filepath.Join(opts.PinPath, spec.Name) + m, err := LoadPinnedMap(path, &opts.LoadPinOptions) + if errors.Is(err, unix.ENOENT) { + break + } + if err != nil { + return nil, fmt.Errorf("load pinned map: %w", err) + } + defer closeOnError(m) + + if err := spec.checkCompatibility(m); err != nil { + return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) + } + + return m, nil + + case PinNone: + // Nothing to do here + + default: + return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) + } + + var innerFd *sys.FD + if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps { + if spec.InnerMap == nil { + return nil, fmt.Errorf("%s requires InnerMap", spec.Type) + } + + if spec.InnerMap.Pinning != PinNone { + return nil, errors.New("inner maps cannot be pinned") + } + + template, err := spec.InnerMap.createMap(nil, opts, handles) + if err != nil { + return nil, fmt.Errorf("inner map: %w", err) + } + defer template.Close() + + // Intentionally skip populating and freezing (finalizing) + // the inner map template since it will be removed shortly. + + innerFd = template.fd + } + + m, err := spec.createMap(innerFd, opts, handles) + if err != nil { + return nil, err + } + defer closeOnError(m) + + if spec.Pinning == PinByName { + path := filepath.Join(opts.PinPath, spec.Name) + if err := m.Pin(path); err != nil { + return nil, fmt.Errorf("pin map: %w", err) + } + } + + return m, nil +} + +// createMap validates the spec's properties and creates the map in the kernel +// using the given opts. It does not populate or freeze the map. +func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) { + closeOnError := func(closer io.Closer) { + if err != nil { + closer.Close() + } + } + + spec = spec.Copy() + + // Kernels 4.13 through 5.4 used a struct bpf_map_def that contained + // additional 'inner_map_idx' and later 'numa_node' fields. + // In order to support loading these definitions, tolerate the presence of + // extra bytes, but require them to be zeroes. + if spec.Extra != nil { + if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil { + return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map") + } + } + + switch spec.Type { + case ArrayOfMaps, HashOfMaps: + if err := haveNestedMaps(); err != nil { + return nil, err + } + + if spec.ValueSize != 0 && spec.ValueSize != 4 { + return nil, errors.New("ValueSize must be zero or four for map of map") + } + spec.ValueSize = 4 + + case PerfEventArray: + if spec.KeySize != 0 && spec.KeySize != 4 { + return nil, errors.New("KeySize must be zero or four for perf event array") + } + spec.KeySize = 4 + + if spec.ValueSize != 0 && spec.ValueSize != 4 { + return nil, errors.New("ValueSize must be zero or four for perf event array") + } + spec.ValueSize = 4 + + if spec.MaxEntries == 0 { + n, err := internal.PossibleCPUs() + if err != nil { + return nil, fmt.Errorf("perf event array: %w", err) + } + spec.MaxEntries = uint32(n) + } + } + + if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { + if err := haveMapMutabilityModifiers(); err != nil { + return nil, fmt.Errorf("map create: %w", err) + } + } + if spec.Flags&unix.BPF_F_MMAPABLE > 0 { + if err := haveMmapableMaps(); err != nil { + return nil, fmt.Errorf("map create: %w", err) + } + } + if spec.Flags&unix.BPF_F_INNER_MAP > 0 { + if err := haveInnerMaps(); err != nil { + return nil, fmt.Errorf("map create: %w", err) + } + } + if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { + if err := haveNoPreallocMaps(); err != nil { + return nil, fmt.Errorf("map create: %w", err) + } + } + + attr := sys.MapCreateAttr{ + MapType: sys.MapType(spec.Type), + KeySize: spec.KeySize, + ValueSize: spec.ValueSize, + MaxEntries: spec.MaxEntries, + MapFlags: spec.Flags, + NumaNode: spec.NumaNode, + } + + if inner != nil { + attr.InnerMapFd = inner.Uint() + } + + if haveObjName() == nil { + attr.MapName = sys.NewObjName(spec.Name) + } + + if spec.hasBTF() { + handle, err := handles.btfHandle(spec.BTF) + if err != nil && !errors.Is(err, btf.ErrNotSupported) { + return nil, fmt.Errorf("load BTF: %w", err) + } + + if handle != nil { + keyTypeID, err := spec.BTF.TypeID(spec.Key) + if err != nil { + return nil, err + } + + valueTypeID, err := spec.BTF.TypeID(spec.Value) + if err != nil { + return nil, err + } + + attr.BtfFd = uint32(handle.FD()) + attr.BtfKeyTypeId = uint32(keyTypeID) + attr.BtfValueTypeId = uint32(valueTypeID) + } + } + + fd, err := sys.MapCreate(&attr) + if err != nil { + if errors.Is(err, unix.EPERM) { + return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) + } + if !spec.hasBTF() { + return nil, fmt.Errorf("map create without BTF: %w", err) + } + if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 { + return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) + } + return nil, fmt.Errorf("map create: %w", err) + } + defer closeOnError(fd) + + m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) + if err != nil { + return nil, fmt.Errorf("map create: %w", err) + } + + return m, nil +} + +// newMap allocates and returns a new Map structure. +// Sets the fullValueSize on per-CPU maps. +func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { + m := &Map{ + name, + fd, + typ, + keySize, + valueSize, + maxEntries, + flags, + "", + int(valueSize), + } + + if !typ.hasPerCPUValue() { + return m, nil + } + + possibleCPUs, err := internal.PossibleCPUs() + if err != nil { + return nil, err + } + + m.fullValueSize = internal.Align(int(valueSize), 8) * possibleCPUs + return m, nil +} + +func (m *Map) String() string { + if m.name != "" { + return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd) + } + return fmt.Sprintf("%s#%v", m.typ, m.fd) +} + +// Type returns the underlying type of the map. +func (m *Map) Type() MapType { + return m.typ +} + +// KeySize returns the size of the map key in bytes. +func (m *Map) KeySize() uint32 { + return m.keySize +} + +// ValueSize returns the size of the map value in bytes. +func (m *Map) ValueSize() uint32 { + return m.valueSize +} + +// MaxEntries returns the maximum number of elements the map can hold. +func (m *Map) MaxEntries() uint32 { + return m.maxEntries +} + +// Flags returns the flags of the map. +func (m *Map) Flags() uint32 { + return m.flags +} + +// Info returns metadata about the map. +func (m *Map) Info() (*MapInfo, error) { + return newMapInfoFromFd(m.fd) +} + +// MapLookupFlags controls the behaviour of the map lookup calls. +type MapLookupFlags uint64 + +// LookupLock look up the value of a spin-locked map. +const LookupLock MapLookupFlags = 4 + +// Lookup retrieves a value from a Map. +// +// Calls Close() on valueOut if it is of type **Map or **Program, +// and *valueOut is not nil. +// +// Returns an error if the key doesn't exist, see ErrKeyNotExist. +func (m *Map) Lookup(key, valueOut interface{}) error { + valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) + if err := m.lookup(key, valuePtr, 0); err != nil { + return err + } + + return m.unmarshalValue(valueOut, valueBytes) +} + +// LookupWithFlags retrieves a value from a Map with flags. +// +// Passing LookupLock flag will look up the value of a spin-locked +// map without returning the lock. This must be specified if the +// elements contain a spinlock. +// +// Calls Close() on valueOut if it is of type **Map or **Program, +// and *valueOut is not nil. +// +// Returns an error if the key doesn't exist, see ErrKeyNotExist. +func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { + valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) + if err := m.lookup(key, valuePtr, flags); err != nil { + return err + } + + return m.unmarshalValue(valueOut, valueBytes) +} + +// LookupAndDelete retrieves and deletes a value from a Map. +// +// Returns ErrKeyNotExist if the key doesn't exist. +func (m *Map) LookupAndDelete(key, valueOut interface{}) error { + return m.lookupAndDelete(key, valueOut, 0) +} + +// LookupAndDeleteWithFlags retrieves and deletes a value from a Map. +// +// Passing LookupLock flag will look up and delete the value of a spin-locked +// map without returning the lock. This must be specified if the elements +// contain a spinlock. +// +// Returns ErrKeyNotExist if the key doesn't exist. +func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { + return m.lookupAndDelete(key, valueOut, flags) +} + +// LookupBytes gets a value from Map. +// +// Returns a nil value if a key doesn't exist. +func (m *Map) LookupBytes(key interface{}) ([]byte, error) { + valueBytes := make([]byte, m.fullValueSize) + valuePtr := sys.NewSlicePointer(valueBytes) + + err := m.lookup(key, valuePtr, 0) + if errors.Is(err, ErrKeyNotExist) { + return nil, nil + } + + return valueBytes, err +} + +func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error { + keyPtr, err := m.marshalKey(key) + if err != nil { + return fmt.Errorf("can't marshal key: %w", err) + } + + attr := sys.MapLookupElemAttr{ + MapFd: m.fd.Uint(), + Key: keyPtr, + Value: valueOut, + Flags: uint64(flags), + } + + if err = sys.MapLookupElem(&attr); err != nil { + return fmt.Errorf("lookup: %w", wrapMapError(err)) + } + return nil +} + +func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) error { + valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) + + keyPtr, err := m.marshalKey(key) + if err != nil { + return fmt.Errorf("can't marshal key: %w", err) + } + + attr := sys.MapLookupAndDeleteElemAttr{ + MapFd: m.fd.Uint(), + Key: keyPtr, + Value: valuePtr, + Flags: uint64(flags), + } + + if err := sys.MapLookupAndDeleteElem(&attr); err != nil { + return fmt.Errorf("lookup and delete: %w", wrapMapError(err)) + } + + return m.unmarshalValue(valueOut, valueBytes) +} + +// MapUpdateFlags controls the behaviour of the Map.Update call. +// +// The exact semantics depend on the specific MapType. +type MapUpdateFlags uint64 + +const ( + // UpdateAny creates a new element or update an existing one. + UpdateAny MapUpdateFlags = iota + // UpdateNoExist creates a new element. + UpdateNoExist MapUpdateFlags = 1 << (iota - 1) + // UpdateExist updates an existing element. + UpdateExist + // UpdateLock updates elements under bpf_spin_lock. + UpdateLock +) + +// Put replaces or creates a value in map. +// +// It is equivalent to calling Update with UpdateAny. +func (m *Map) Put(key, value interface{}) error { + return m.Update(key, value, UpdateAny) +} + +// Update changes the value of a key. +func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error { + keyPtr, err := m.marshalKey(key) + if err != nil { + return fmt.Errorf("can't marshal key: %w", err) + } + + valuePtr, err := m.marshalValue(value) + if err != nil { + return fmt.Errorf("can't marshal value: %w", err) + } + + attr := sys.MapUpdateElemAttr{ + MapFd: m.fd.Uint(), + Key: keyPtr, + Value: valuePtr, + Flags: uint64(flags), + } + + if err = sys.MapUpdateElem(&attr); err != nil { + return fmt.Errorf("update: %w", wrapMapError(err)) + } + + return nil +} + +// Delete removes a value. +// +// Returns ErrKeyNotExist if the key does not exist. +func (m *Map) Delete(key interface{}) error { + keyPtr, err := m.marshalKey(key) + if err != nil { + return fmt.Errorf("can't marshal key: %w", err) + } + + attr := sys.MapDeleteElemAttr{ + MapFd: m.fd.Uint(), + Key: keyPtr, + } + + if err = sys.MapDeleteElem(&attr); err != nil { + return fmt.Errorf("delete: %w", wrapMapError(err)) + } + return nil +} + +// NextKey finds the key following an initial key. +// +// See NextKeyBytes for details. +// +// Returns ErrKeyNotExist if there is no next key. +func (m *Map) NextKey(key, nextKeyOut interface{}) error { + nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.keySize)) + + if err := m.nextKey(key, nextKeyPtr); err != nil { + return err + } + + if err := m.unmarshalKey(nextKeyOut, nextKeyBytes); err != nil { + return fmt.Errorf("can't unmarshal next key: %w", err) + } + return nil +} + +// NextKeyBytes returns the key following an initial key as a byte slice. +// +// Passing nil will return the first key. +// +// Use Iterate if you want to traverse all entries in the map. +// +// Returns nil if there are no more keys. +func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { + nextKey := make([]byte, m.keySize) + nextKeyPtr := sys.NewSlicePointer(nextKey) + + err := m.nextKey(key, nextKeyPtr) + if errors.Is(err, ErrKeyNotExist) { + return nil, nil + } + + return nextKey, err +} + +func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error { + var ( + keyPtr sys.Pointer + err error + ) + + if key != nil { + keyPtr, err = m.marshalKey(key) + if err != nil { + return fmt.Errorf("can't marshal key: %w", err) + } + } + + attr := sys.MapGetNextKeyAttr{ + MapFd: m.fd.Uint(), + Key: keyPtr, + NextKey: nextKeyOut, + } + + if err = sys.MapGetNextKey(&attr); err != nil { + // Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the + // first map element when a nil key pointer is specified. + if key == nil && errors.Is(err, unix.EFAULT) { + var guessKey []byte + guessKey, err = m.guessNonExistentKey() + if err != nil { + return err + } + + // Retry the syscall with a valid non-existing key. + attr.Key = sys.NewSlicePointer(guessKey) + if err = sys.MapGetNextKey(&attr); err == nil { + return nil + } + } + + return fmt.Errorf("next key: %w", wrapMapError(err)) + } + + return nil +} + +// guessNonExistentKey attempts to perform a map lookup that returns ENOENT. +// This is necessary on kernels before 4.4.132, since those don't support +// iterating maps from the start by providing an invalid key pointer. +func (m *Map) guessNonExistentKey() ([]byte, error) { + // Provide an invalid value pointer to prevent a copy on the kernel side. + valuePtr := sys.NewPointer(unsafe.Pointer(^uintptr(0))) + randKey := make([]byte, int(m.keySize)) + + for i := 0; i < 4; i++ { + switch i { + // For hash maps, the 0 key is less likely to be occupied. They're often + // used for storing data related to pointers, and their access pattern is + // generally scattered across the keyspace. + case 0: + // An all-0xff key is guaranteed to be out of bounds of any array, since + // those have a fixed key size of 4 bytes. The only corner case being + // arrays with 2^32 max entries, but those are prohibitively expensive + // in many environments. + case 1: + for r := range randKey { + randKey[r] = 0xff + } + // Inspired by BCC, 0x55 is an alternating binary pattern (0101), so + // is unlikely to be taken. + case 2: + for r := range randKey { + randKey[r] = 0x55 + } + // Last ditch effort, generate a random key. + case 3: + rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey) + } + + err := m.lookup(randKey, valuePtr, 0) + if errors.Is(err, ErrKeyNotExist) { + return randKey, nil + } + } + + return nil, errors.New("couldn't find non-existing key") +} + +// BatchLookup looks up many elements in a map at once. +// +// "keysOut" and "valuesOut" must be of type slice, a pointer +// to a slice or buffer will not work. +// "prevKey" is the key to start the batch lookup from, it will +// *not* be included in the results. Use nil to start at the first key. +// +// ErrKeyNotExist is returned when the batch lookup has reached +// the end of all possible results, even when partial results +// are returned. It should be used to evaluate when lookup is "done". +func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { + return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) +} + +// BatchLookupAndDelete looks up many elements in a map at once, +// +// It then deletes all those elements. +// "keysOut" and "valuesOut" must be of type slice, a pointer +// to a slice or buffer will not work. +// "prevKey" is the key to start the batch lookup from, it will +// *not* be included in the results. Use nil to start at the first key. +// +// ErrKeyNotExist is returned when the batch lookup has reached +// the end of all possible results, even when partial results +// are returned. It should be used to evaluate when lookup is "done". +func (m *Map) BatchLookupAndDelete(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { + return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) +} + +func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { + if err := haveBatchAPI(); err != nil { + return 0, err + } + if m.typ.hasPerCPUValue() { + return 0, ErrNotSupported + } + keysValue := reflect.ValueOf(keysOut) + if keysValue.Kind() != reflect.Slice { + return 0, fmt.Errorf("keys must be a slice") + } + valuesValue := reflect.ValueOf(valuesOut) + if valuesValue.Kind() != reflect.Slice { + return 0, fmt.Errorf("valuesOut must be a slice") + } + count := keysValue.Len() + if count != valuesValue.Len() { + return 0, fmt.Errorf("keysOut and valuesOut must be the same length") + } + keyBuf := make([]byte, count*int(m.keySize)) + keyPtr := sys.NewSlicePointer(keyBuf) + valueBuf := make([]byte, count*int(m.fullValueSize)) + valuePtr := sys.NewSlicePointer(valueBuf) + nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize)) + + attr := sys.MapLookupBatchAttr{ + MapFd: m.fd.Uint(), + Keys: keyPtr, + Values: valuePtr, + Count: uint32(count), + OutBatch: nextPtr, + } + + if opts != nil { + attr.ElemFlags = opts.ElemFlags + attr.Flags = opts.Flags + } + + var err error + if startKey != nil { + attr.InBatch, err = marshalPtr(startKey, int(m.keySize)) + if err != nil { + return 0, err + } + } + + _, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + sysErr = wrapMapError(sysErr) + if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { + return 0, sysErr + } + + err = m.unmarshalKey(nextKeyOut, nextBuf) + if err != nil { + return 0, err + } + err = unmarshalBytes(keysOut, keyBuf) + if err != nil { + return 0, err + } + err = unmarshalBytes(valuesOut, valueBuf) + if err != nil { + return 0, err + } + + return int(attr.Count), sysErr +} + +// BatchUpdate updates the map with multiple keys and values +// simultaneously. +// "keys" and "values" must be of type slice, a pointer +// to a slice or buffer will not work. +func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { + if err := haveBatchAPI(); err != nil { + return 0, err + } + if m.typ.hasPerCPUValue() { + return 0, ErrNotSupported + } + keysValue := reflect.ValueOf(keys) + if keysValue.Kind() != reflect.Slice { + return 0, fmt.Errorf("keys must be a slice") + } + valuesValue := reflect.ValueOf(values) + if valuesValue.Kind() != reflect.Slice { + return 0, fmt.Errorf("values must be a slice") + } + var ( + count = keysValue.Len() + valuePtr sys.Pointer + err error + ) + if count != valuesValue.Len() { + return 0, fmt.Errorf("keys and values must be the same length") + } + keyPtr, err := marshalPtr(keys, count*int(m.keySize)) + if err != nil { + return 0, err + } + valuePtr, err = marshalPtr(values, count*int(m.valueSize)) + if err != nil { + return 0, err + } + + attr := sys.MapUpdateBatchAttr{ + MapFd: m.fd.Uint(), + Keys: keyPtr, + Values: valuePtr, + Count: uint32(count), + } + if opts != nil { + attr.ElemFlags = opts.ElemFlags + attr.Flags = opts.Flags + } + + err = sys.MapUpdateBatch(&attr) + if err != nil { + return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err)) + } + + return int(attr.Count), nil +} + +// BatchDelete batch deletes entries in the map by keys. +// "keys" must be of type slice, a pointer to a slice or buffer will not work. +func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { + if err := haveBatchAPI(); err != nil { + return 0, err + } + if m.typ.hasPerCPUValue() { + return 0, ErrNotSupported + } + keysValue := reflect.ValueOf(keys) + if keysValue.Kind() != reflect.Slice { + return 0, fmt.Errorf("keys must be a slice") + } + count := keysValue.Len() + keyPtr, err := marshalPtr(keys, count*int(m.keySize)) + if err != nil { + return 0, fmt.Errorf("cannot marshal keys: %v", err) + } + + attr := sys.MapDeleteBatchAttr{ + MapFd: m.fd.Uint(), + Keys: keyPtr, + Count: uint32(count), + } + + if opts != nil { + attr.ElemFlags = opts.ElemFlags + attr.Flags = opts.Flags + } + + if err = sys.MapDeleteBatch(&attr); err != nil { + return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err)) + } + + return int(attr.Count), nil +} + +// Iterate traverses a map. +// +// It's safe to create multiple iterators at the same time. +// +// It's not possible to guarantee that all keys in a map will be +// returned if there are concurrent modifications to the map. +func (m *Map) Iterate() *MapIterator { + return newMapIterator(m) +} + +// Close the Map's underlying file descriptor, which could unload the +// Map from the kernel if it is not pinned or in use by a loaded Program. +func (m *Map) Close() error { + if m == nil { + // This makes it easier to clean up when iterating maps + // of maps / programs. + return nil + } + + return m.fd.Close() +} + +// FD gets the file descriptor of the Map. +// +// Calling this function is invalid after Close has been called. +func (m *Map) FD() int { + return m.fd.Int() +} + +// Clone creates a duplicate of the Map. +// +// Closing the duplicate does not affect the original, and vice versa. +// Changes made to the map are reflected by both instances however. +// If the original map was pinned, the cloned map will not be pinned by default. +// +// Cloning a nil Map returns nil. +func (m *Map) Clone() (*Map, error) { + if m == nil { + return nil, nil + } + + dup, err := m.fd.Dup() + if err != nil { + return nil, fmt.Errorf("can't clone map: %w", err) + } + + return &Map{ + m.name, + dup, + m.typ, + m.keySize, + m.valueSize, + m.maxEntries, + m.flags, + "", + m.fullValueSize, + }, nil +} + +// Pin persists the map on the BPF virtual file system past the lifetime of +// the process that created it . +// +// Calling Pin on a previously pinned map will overwrite the path, except when +// the new path already exists. Re-pinning across filesystems is not supported. +// You can Clone a map to pin it to a different path. +// +// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs +func (m *Map) Pin(fileName string) error { + if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil { + return err + } + m.pinnedPath = fileName + return nil +} + +// Unpin removes the persisted state for the map from the BPF virtual filesystem. +// +// Failed calls to Unpin will not alter the state returned by IsPinned. +// +// Unpinning an unpinned Map returns nil. +func (m *Map) Unpin() error { + if err := internal.Unpin(m.pinnedPath); err != nil { + return err + } + m.pinnedPath = "" + return nil +} + +// IsPinned returns true if the map has a non-empty pinned path. +func (m *Map) IsPinned() bool { + return m.pinnedPath != "" +} + +// Freeze prevents a map to be modified from user space. +// +// It makes no changes to kernel-side restrictions. +func (m *Map) Freeze() error { + if err := haveMapMutabilityModifiers(); err != nil { + return fmt.Errorf("can't freeze map: %w", err) + } + + attr := sys.MapFreezeAttr{ + MapFd: m.fd.Uint(), + } + + if err := sys.MapFreeze(&attr); err != nil { + return fmt.Errorf("can't freeze map: %w", err) + } + return nil +} + +// finalize populates the Map according to the Contents specified +// in spec and freezes the Map if requested by spec. +func (m *Map) finalize(spec *MapSpec) error { + for _, kv := range spec.Contents { + if err := m.Put(kv.Key, kv.Value); err != nil { + return fmt.Errorf("putting value: key %v: %w", kv.Key, err) + } + } + + if spec.Freeze { + if err := m.Freeze(); err != nil { + return fmt.Errorf("freezing map: %w", err) + } + } + + return nil +} + +func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) { + if data == nil { + if m.keySize == 0 { + // Queues have a key length of zero, so passing nil here is valid. + return sys.NewPointer(nil), nil + } + return sys.Pointer{}, errors.New("can't use nil as key of map") + } + + return marshalPtr(data, int(m.keySize)) +} + +func (m *Map) unmarshalKey(data interface{}, buf []byte) error { + if buf == nil { + // This is from a makeBuffer call, nothing do do here. + return nil + } + + return unmarshalBytes(data, buf) +} + +func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { + if m.typ.hasPerCPUValue() { + return marshalPerCPUValue(data, int(m.valueSize)) + } + + var ( + buf []byte + err error + ) + + switch value := data.(type) { + case *Map: + if !m.typ.canStoreMap() { + return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ) + } + buf, err = marshalMap(value, int(m.valueSize)) + + case *Program: + if !m.typ.canStoreProgram() { + return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ) + } + buf, err = marshalProgram(value, int(m.valueSize)) + + default: + return marshalPtr(data, int(m.valueSize)) + } + + if err != nil { + return sys.Pointer{}, err + } + + return sys.NewSlicePointer(buf), nil +} + +func (m *Map) unmarshalValue(value interface{}, buf []byte) error { + if buf == nil { + // This is from a makeBuffer call, nothing do do here. + return nil + } + + if m.typ.hasPerCPUValue() { + return unmarshalPerCPUValue(value, int(m.valueSize), buf) + } + + switch value := value.(type) { + case **Map: + if !m.typ.canStoreMap() { + return fmt.Errorf("can't read a map from %s", m.typ) + } + + other, err := unmarshalMap(buf) + if err != nil { + return err + } + + // The caller might close the map externally, so ignore errors. + _ = (*value).Close() + + *value = other + return nil + + case *Map: + if !m.typ.canStoreMap() { + return fmt.Errorf("can't read a map from %s", m.typ) + } + return errors.New("require pointer to *Map") + + case **Program: + if !m.typ.canStoreProgram() { + return fmt.Errorf("can't read a program from %s", m.typ) + } + + other, err := unmarshalProgram(buf) + if err != nil { + return err + } + + // The caller might close the program externally, so ignore errors. + _ = (*value).Close() + + *value = other + return nil + + case *Program: + if !m.typ.canStoreProgram() { + return fmt.Errorf("can't read a program from %s", m.typ) + } + return errors.New("require pointer to *Program") + } + + return unmarshalBytes(value, buf) +} + +// LoadPinnedMap loads a Map from a BPF file. +func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { + fd, err := sys.ObjGet(&sys.ObjGetAttr{ + Pathname: sys.NewStringPointer(fileName), + FileFlags: opts.Marshal(), + }) + if err != nil { + return nil, err + } + + m, err := newMapFromFD(fd) + if err == nil { + m.pinnedPath = fileName + } + + return m, err +} + +// unmarshalMap creates a map from a map ID encoded in host endianness. +func unmarshalMap(buf []byte) (*Map, error) { + if len(buf) != 4 { + return nil, errors.New("map id requires 4 byte value") + } + + id := internal.NativeEndian.Uint32(buf) + return NewMapFromID(MapID(id)) +} + +// marshalMap marshals the fd of a map into a buffer in host endianness. +func marshalMap(m *Map, length int) ([]byte, error) { + if length != 4 { + return nil, fmt.Errorf("can't marshal map to %d bytes", length) + } + + buf := make([]byte, 4) + internal.NativeEndian.PutUint32(buf, m.fd.Uint()) + return buf, nil +} + +// MapIterator iterates a Map. +// +// See Map.Iterate. +type MapIterator struct { + target *Map + prevKey interface{} + prevBytes []byte + count, maxEntries uint32 + done bool + err error +} + +func newMapIterator(target *Map) *MapIterator { + return &MapIterator{ + target: target, + maxEntries: target.maxEntries, + prevBytes: make([]byte, target.keySize), + } +} + +// Next decodes the next key and value. +// +// Iterating a hash map from which keys are being deleted is not +// safe. You may see the same key multiple times. Iteration may +// also abort with an error, see IsIterationAborted. +// +// Returns false if there are no more entries. You must check +// the result of Err afterwards. +// +// See Map.Get for further caveats around valueOut. +func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { + if mi.err != nil || mi.done { + return false + } + + // For array-like maps NextKeyBytes returns nil only on after maxEntries + // iterations. + for mi.count <= mi.maxEntries { + var nextBytes []byte + nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey) + if mi.err != nil { + return false + } + + if nextBytes == nil { + mi.done = true + return false + } + + // The user can get access to nextBytes since unmarshalBytes + // does not copy when unmarshaling into a []byte. + // Make a copy to prevent accidental corruption of + // iterator state. + copy(mi.prevBytes, nextBytes) + mi.prevKey = mi.prevBytes + + mi.count++ + mi.err = mi.target.Lookup(nextBytes, valueOut) + if errors.Is(mi.err, ErrKeyNotExist) { + // Even though the key should be valid, we couldn't look up + // its value. If we're iterating a hash map this is probably + // because a concurrent delete removed the value before we + // could get it. This means that the next call to NextKeyBytes + // is very likely to restart iteration. + // If we're iterating one of the fd maps like + // ProgramArray it means that a given slot doesn't have + // a valid fd associated. It's OK to continue to the next slot. + continue + } + if mi.err != nil { + return false + } + + mi.err = mi.target.unmarshalKey(keyOut, nextBytes) + return mi.err == nil + } + + mi.err = fmt.Errorf("%w", ErrIterationAborted) + return false +} + +// Err returns any encountered error. +// +// The method must be called after Next returns nil. +// +// Returns ErrIterationAborted if it wasn't possible to do a full iteration. +func (mi *MapIterator) Err() error { + return mi.err +} + +// MapGetNextID returns the ID of the next eBPF map. +// +// Returns ErrNotExist, if there is no next eBPF map. +func MapGetNextID(startID MapID) (MapID, error) { + attr := &sys.MapGetNextIdAttr{Id: uint32(startID)} + return MapID(attr.NextId), sys.MapGetNextId(attr) +} + +// NewMapFromID returns the map for a given id. +// +// Returns ErrNotExist, if there is no eBPF map with the given id. +func NewMapFromID(id MapID) (*Map, error) { + fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{ + Id: uint32(id), + }) + if err != nil { + return nil, err + } + + return newMapFromFD(fd) +} diff --git a/vendor/github.com/cilium/ebpf/marshalers.go b/vendor/github.com/cilium/ebpf/marshalers.go new file mode 100644 index 000000000..544d17f35 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/marshalers.go @@ -0,0 +1,247 @@ +package ebpf + +import ( + "bytes" + "encoding" + "encoding/binary" + "errors" + "fmt" + "reflect" + "runtime" + "sync" + "unsafe" + + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" +) + +// marshalPtr converts an arbitrary value into a pointer suitable +// to be passed to the kernel. +// +// As an optimization, it returns the original value if it is an +// unsafe.Pointer. +func marshalPtr(data interface{}, length int) (sys.Pointer, error) { + if ptr, ok := data.(unsafe.Pointer); ok { + return sys.NewPointer(ptr), nil + } + + buf, err := marshalBytes(data, length) + if err != nil { + return sys.Pointer{}, err + } + + return sys.NewSlicePointer(buf), nil +} + +// marshalBytes converts an arbitrary value into a byte buffer. +// +// Prefer using Map.marshalKey and Map.marshalValue if possible, since +// those have special cases that allow more types to be encoded. +// +// Returns an error if the given value isn't representable in exactly +// length bytes. +func marshalBytes(data interface{}, length int) (buf []byte, err error) { + if data == nil { + return nil, errors.New("can't marshal a nil value") + } + + switch value := data.(type) { + case encoding.BinaryMarshaler: + buf, err = value.MarshalBinary() + case string: + buf = []byte(value) + case []byte: + buf = value + case unsafe.Pointer: + err = errors.New("can't marshal from unsafe.Pointer") + case Map, *Map, Program, *Program: + err = fmt.Errorf("can't marshal %T", value) + default: + var wr bytes.Buffer + err = binary.Write(&wr, internal.NativeEndian, value) + if err != nil { + err = fmt.Errorf("encoding %T: %v", value, err) + } + buf = wr.Bytes() + } + if err != nil { + return nil, err + } + + if len(buf) != length { + return nil, fmt.Errorf("%T doesn't marshal to %d bytes", data, length) + } + return buf, nil +} + +func makeBuffer(dst interface{}, length int) (sys.Pointer, []byte) { + if ptr, ok := dst.(unsafe.Pointer); ok { + return sys.NewPointer(ptr), nil + } + + buf := make([]byte, length) + return sys.NewSlicePointer(buf), buf +} + +var bytesReaderPool = sync.Pool{ + New: func() interface{} { + return new(bytes.Reader) + }, +} + +// unmarshalBytes converts a byte buffer into an arbitrary value. +// +// Prefer using Map.unmarshalKey and Map.unmarshalValue if possible, since +// those have special cases that allow more types to be encoded. +// +// The common int32 and int64 types are directly handled to avoid +// unnecessary heap allocations as happening in the default case. +func unmarshalBytes(data interface{}, buf []byte) error { + switch value := data.(type) { + case unsafe.Pointer: + dst := unsafe.Slice((*byte)(value), len(buf)) + copy(dst, buf) + runtime.KeepAlive(value) + return nil + case Map, *Map, Program, *Program: + return fmt.Errorf("can't unmarshal into %T", value) + case encoding.BinaryUnmarshaler: + return value.UnmarshalBinary(buf) + case *string: + *value = string(buf) + return nil + case *[]byte: + *value = buf + return nil + case *int32: + if len(buf) < 4 { + return errors.New("int32 requires 4 bytes") + } + *value = int32(internal.NativeEndian.Uint32(buf)) + return nil + case *uint32: + if len(buf) < 4 { + return errors.New("uint32 requires 4 bytes") + } + *value = internal.NativeEndian.Uint32(buf) + return nil + case *int64: + if len(buf) < 8 { + return errors.New("int64 requires 8 bytes") + } + *value = int64(internal.NativeEndian.Uint64(buf)) + return nil + case *uint64: + if len(buf) < 8 { + return errors.New("uint64 requires 8 bytes") + } + *value = internal.NativeEndian.Uint64(buf) + return nil + case string: + return errors.New("require pointer to string") + case []byte: + return errors.New("require pointer to []byte") + default: + rd := bytesReaderPool.Get().(*bytes.Reader) + rd.Reset(buf) + defer bytesReaderPool.Put(rd) + if err := binary.Read(rd, internal.NativeEndian, value); err != nil { + return fmt.Errorf("decoding %T: %v", value, err) + } + return nil + } +} + +// marshalPerCPUValue encodes a slice containing one value per +// possible CPU into a buffer of bytes. +// +// Values are initialized to zero if the slice has less elements than CPUs. +// +// slice must have a type like []elementType. +func marshalPerCPUValue(slice interface{}, elemLength int) (sys.Pointer, error) { + sliceType := reflect.TypeOf(slice) + if sliceType.Kind() != reflect.Slice { + return sys.Pointer{}, errors.New("per-CPU value requires slice") + } + + possibleCPUs, err := internal.PossibleCPUs() + if err != nil { + return sys.Pointer{}, err + } + + sliceValue := reflect.ValueOf(slice) + sliceLen := sliceValue.Len() + if sliceLen > possibleCPUs { + return sys.Pointer{}, fmt.Errorf("per-CPU value exceeds number of CPUs") + } + + alignedElemLength := internal.Align(elemLength, 8) + buf := make([]byte, alignedElemLength*possibleCPUs) + + for i := 0; i < sliceLen; i++ { + elem := sliceValue.Index(i).Interface() + elemBytes, err := marshalBytes(elem, elemLength) + if err != nil { + return sys.Pointer{}, err + } + + offset := i * alignedElemLength + copy(buf[offset:offset+elemLength], elemBytes) + } + + return sys.NewSlicePointer(buf), nil +} + +// unmarshalPerCPUValue decodes a buffer into a slice containing one value per +// possible CPU. +// +// valueOut must have a type like *[]elementType +func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error { + slicePtrType := reflect.TypeOf(slicePtr) + if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { + return fmt.Errorf("per-cpu value requires pointer to slice") + } + + possibleCPUs, err := internal.PossibleCPUs() + if err != nil { + return err + } + + sliceType := slicePtrType.Elem() + slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) + + sliceElemType := sliceType.Elem() + sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr + if sliceElemIsPointer { + sliceElemType = sliceElemType.Elem() + } + + step := len(buf) / possibleCPUs + if step < elemLength { + return fmt.Errorf("per-cpu element length is larger than available data") + } + for i := 0; i < possibleCPUs; i++ { + var elem interface{} + if sliceElemIsPointer { + newElem := reflect.New(sliceElemType) + slice.Index(i).Set(newElem) + elem = newElem.Interface() + } else { + elem = slice.Index(i).Addr().Interface() + } + + // Make a copy, since unmarshal can hold on to itemBytes + elemBytes := make([]byte, elemLength) + copy(elemBytes, buf[:elemLength]) + + err := unmarshalBytes(elem, elemBytes) + if err != nil { + return fmt.Errorf("cpu %d: %w", i, err) + } + + buf = buf[step:] + } + + reflect.ValueOf(slicePtr).Elem().Set(slice) + return nil +} diff --git a/vendor/github.com/cilium/ebpf/prog.go b/vendor/github.com/cilium/ebpf/prog.go new file mode 100644 index 000000000..675edc711 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/prog.go @@ -0,0 +1,875 @@ +package ebpf + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "math" + "path/filepath" + "runtime" + "strings" + "time" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// ErrNotSupported is returned whenever the kernel doesn't support a feature. +var ErrNotSupported = internal.ErrNotSupported + +// ProgramID represents the unique ID of an eBPF program. +type ProgramID uint32 + +const ( + // Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN. + // This is currently the maximum of spare space allocated for SKB + // and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN. + outputPad = 256 + 2 +) + +// DefaultVerifierLogSize is the default number of bytes allocated for the +// verifier log. +const DefaultVerifierLogSize = 64 * 1024 + +// ProgramOptions control loading a program into the kernel. +type ProgramOptions struct { + // Controls the detail emitted by the kernel verifier. Set to non-zero + // to enable logging. + LogLevel uint32 + // Controls the output buffer size for the verifier. Defaults to + // DefaultVerifierLogSize. + LogSize int + // Type information used for CO-RE relocations and when attaching to + // kernel functions. + // + // This is useful in environments where the kernel BTF is not available + // (containers) or where it is in a non-standard location. Defaults to + // use the kernel BTF from a well-known location if nil. + KernelTypes *btf.Spec +} + +// ProgramSpec defines a Program. +type ProgramSpec struct { + // Name is passed to the kernel as a debug aid. Must only contain + // alpha numeric and '_' characters. + Name string + + // Type determines at which hook in the kernel a program will run. + Type ProgramType + + // AttachType of the program, needed to differentiate allowed context + // accesses in some newer program types like CGroupSockAddr. + // + // Available on kernels 4.17 and later. + AttachType AttachType + + // Name of a kernel data structure or function to attach to. Its + // interpretation depends on Type and AttachType. + AttachTo string + + // The program to attach to. Must be provided manually. + AttachTarget *Program + + // The name of the ELF section this program orininated from. + SectionName string + + Instructions asm.Instructions + + // Flags is passed to the kernel and specifies additional program + // load attributes. + Flags uint32 + + // License of the program. Some helpers are only available if + // the license is deemed compatible with the GPL. + // + // See https://www.kernel.org/doc/html/latest/process/license-rules.html#id1 + License string + + // Version used by Kprobe programs. + // + // Deprecated on kernels 5.0 and later. Leave empty to let the library + // detect this value automatically. + KernelVersion uint32 + + // The BTF associated with this program. Changing Instructions + // will most likely invalidate the contained data, and may + // result in errors when attempting to load it into the kernel. + BTF *btf.Spec + + // The byte order this program was compiled for, may be nil. + ByteOrder binary.ByteOrder +} + +// Copy returns a copy of the spec. +func (ps *ProgramSpec) Copy() *ProgramSpec { + if ps == nil { + return nil + } + + cpy := *ps + cpy.Instructions = make(asm.Instructions, len(ps.Instructions)) + copy(cpy.Instructions, ps.Instructions) + return &cpy +} + +// Tag calculates the kernel tag for a series of instructions. +// +// Use asm.Instructions.Tag if you need to calculate for non-native endianness. +func (ps *ProgramSpec) Tag() (string, error) { + return ps.Instructions.Tag(internal.NativeEndian) +} + +type VerifierError = internal.VerifierError + +// Program represents BPF program loaded into the kernel. +// +// It is not safe to close a Program which is used by other goroutines. +type Program struct { + // Contains the output of the kernel verifier if enabled, + // otherwise it is empty. + VerifierLog string + + fd *sys.FD + name string + pinnedPath string + typ ProgramType +} + +// NewProgram creates a new Program. +// +// See NewProgramWithOptions for details. +func NewProgram(spec *ProgramSpec) (*Program, error) { + return NewProgramWithOptions(spec, ProgramOptions{}) +} + +// NewProgramWithOptions creates a new Program. +// +// Loading a program for the first time will perform +// feature detection by loading small, temporary programs. +// +// Returns an error wrapping VerifierError if the program or its BTF is rejected +// by the kernel. +func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) { + if spec == nil { + return nil, errors.New("can't load a program from a nil spec") + } + + handles := newHandleCache() + defer handles.close() + + prog, err := newProgramWithOptions(spec, opts, handles) + if errors.Is(err, asm.ErrUnsatisfiedMapReference) { + return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err) + } + return prog, err +} + +func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *handleCache) (*Program, error) { + if len(spec.Instructions) == 0 { + return nil, errors.New("instructions cannot be empty") + } + + if spec.Type == UnspecifiedProgram { + return nil, errors.New("can't load program of unspecified type") + } + + if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian { + return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian) + } + + // Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load") + // require the version field to be set to the value of the KERNEL_VERSION + // macro for kprobe-type programs. + // Overwrite Kprobe program version if set to zero or the magic version constant. + kv := spec.KernelVersion + if spec.Type == Kprobe && (kv == 0 || kv == internal.MagicKernelVersion) { + v, err := internal.KernelVersion() + if err != nil { + return nil, fmt.Errorf("detecting kernel version: %w", err) + } + kv = v.Kernel() + } + + attr := &sys.ProgLoadAttr{ + ProgType: sys.ProgType(spec.Type), + ProgFlags: spec.Flags, + ExpectedAttachType: sys.AttachType(spec.AttachType), + License: sys.NewStringPointer(spec.License), + KernVersion: kv, + } + + if haveObjName() == nil { + attr.ProgName = sys.NewObjName(spec.Name) + } + + kernelTypes := opts.KernelTypes + + insns := make(asm.Instructions, len(spec.Instructions)) + copy(insns, spec.Instructions) + + var btfDisabled bool + if spec.BTF != nil { + if err := applyRelocations(insns, spec.BTF, kernelTypes); err != nil { + return nil, fmt.Errorf("apply CO-RE relocations: %w", err) + } + + handle, err := handles.btfHandle(spec.BTF) + btfDisabled = errors.Is(err, btf.ErrNotSupported) + if err != nil && !btfDisabled { + return nil, fmt.Errorf("load BTF: %w", err) + } + + if handle != nil { + attr.ProgBtfFd = uint32(handle.FD()) + + fib, lib, err := btf.MarshalExtInfos(insns, spec.BTF.TypeID) + if err != nil { + return nil, err + } + + attr.FuncInfoRecSize = btf.FuncInfoSize + attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize + attr.FuncInfo = sys.NewSlicePointer(fib) + + attr.LineInfoRecSize = btf.LineInfoSize + attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize + attr.LineInfo = sys.NewSlicePointer(lib) + } + } + + if err := fixupAndValidate(insns); err != nil { + return nil, err + } + + buf := bytes.NewBuffer(make([]byte, 0, insns.Size())) + err := insns.Marshal(buf, internal.NativeEndian) + if err != nil { + return nil, err + } + + bytecode := buf.Bytes() + attr.Insns = sys.NewSlicePointer(bytecode) + attr.InsnCnt = uint32(len(bytecode) / asm.InstructionSize) + + if spec.AttachTarget != nil { + targetID, err := findTargetInProgram(spec.AttachTarget, spec.AttachTo, spec.Type, spec.AttachType) + if err != nil { + return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err) + } + + attr.AttachBtfId = uint32(targetID) + attr.AttachProgFd = uint32(spec.AttachTarget.FD()) + defer runtime.KeepAlive(spec.AttachTarget) + } else if spec.AttachTo != "" { + targetID, err := findTargetInKernel(kernelTypes, spec.AttachTo, spec.Type, spec.AttachType) + if err != nil && !errors.Is(err, errUnrecognizedAttachType) { + // We ignore errUnrecognizedAttachType since AttachTo may be non-empty + // for programs that don't attach anywhere. + return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err) + } + + attr.AttachBtfId = uint32(targetID) + } + + logSize := DefaultVerifierLogSize + if opts.LogSize > 0 { + logSize = opts.LogSize + } + + var logBuf []byte + if opts.LogLevel > 0 { + logBuf = make([]byte, logSize) + attr.LogLevel = opts.LogLevel + attr.LogSize = uint32(len(logBuf)) + attr.LogBuf = sys.NewSlicePointer(logBuf) + } + + fd, err := sys.ProgLoad(attr) + if err == nil { + return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil + } + + if opts.LogLevel == 0 && opts.LogSize >= 0 { + // Re-run with the verifier enabled to get better error messages. + logBuf = make([]byte, logSize) + attr.LogLevel = 1 + attr.LogSize = uint32(len(logBuf)) + attr.LogBuf = sys.NewSlicePointer(logBuf) + _, _ = sys.ProgLoad(attr) + } + + switch { + case errors.Is(err, unix.EPERM): + if len(logBuf) > 0 && logBuf[0] == 0 { + // EPERM due to RLIMIT_MEMLOCK happens before the verifier, so we can + // check that the log is empty to reduce false positives. + return nil, fmt.Errorf("load program: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) + } + + fallthrough + + case errors.Is(err, unix.EINVAL): + if hasFunctionReferences(spec.Instructions) { + if err := haveBPFToBPFCalls(); err != nil { + return nil, fmt.Errorf("load program: %w", err) + } + } + } + + err = internal.ErrorWithLog(err, logBuf) + if btfDisabled { + return nil, fmt.Errorf("load program: %w (BTF disabled)", err) + } + return nil, fmt.Errorf("load program: %w", err) +} + +// NewProgramFromFD creates a program from a raw fd. +// +// You should not use fd after calling this function. +// +// Requires at least Linux 4.10. +func NewProgramFromFD(fd int) (*Program, error) { + f, err := sys.NewFD(fd) + if err != nil { + return nil, err + } + + return newProgramFromFD(f) +} + +// NewProgramFromID returns the program for a given id. +// +// Returns ErrNotExist, if there is no eBPF program with the given id. +func NewProgramFromID(id ProgramID) (*Program, error) { + fd, err := sys.ProgGetFdById(&sys.ProgGetFdByIdAttr{ + Id: uint32(id), + }) + if err != nil { + return nil, fmt.Errorf("get program by id: %w", err) + } + + return newProgramFromFD(fd) +} + +func newProgramFromFD(fd *sys.FD) (*Program, error) { + info, err := newProgramInfoFromFd(fd) + if err != nil { + fd.Close() + return nil, fmt.Errorf("discover program type: %w", err) + } + + return &Program{"", fd, "", "", info.Type}, nil +} + +func (p *Program) String() string { + if p.name != "" { + return fmt.Sprintf("%s(%s)#%v", p.typ, p.name, p.fd) + } + return fmt.Sprintf("%s(%v)", p.typ, p.fd) +} + +// Type returns the underlying type of the program. +func (p *Program) Type() ProgramType { + return p.typ +} + +// Info returns metadata about the program. +// +// Requires at least 4.10. +func (p *Program) Info() (*ProgramInfo, error) { + return newProgramInfoFromFd(p.fd) +} + +// Handle returns a reference to the program's type information in the kernel. +// +// Returns ErrNotSupported if the kernel has no BTF support, or if there is no +// BTF associated with the program. +func (p *Program) Handle() (*btf.Handle, error) { + info, err := p.Info() + if err != nil { + return nil, err + } + + id, ok := info.BTFID() + if !ok { + return nil, fmt.Errorf("program %s: retrieve BTF ID: %w", p, ErrNotSupported) + } + + return btf.NewHandleFromID(id) +} + +// FD gets the file descriptor of the Program. +// +// It is invalid to call this function after Close has been called. +func (p *Program) FD() int { + return p.fd.Int() +} + +// Clone creates a duplicate of the Program. +// +// Closing the duplicate does not affect the original, and vice versa. +// +// Cloning a nil Program returns nil. +func (p *Program) Clone() (*Program, error) { + if p == nil { + return nil, nil + } + + dup, err := p.fd.Dup() + if err != nil { + return nil, fmt.Errorf("can't clone program: %w", err) + } + + return &Program{p.VerifierLog, dup, p.name, "", p.typ}, nil +} + +// Pin persists the Program on the BPF virtual file system past the lifetime of +// the process that created it +// +// Calling Pin on a previously pinned program will overwrite the path, except when +// the new path already exists. Re-pinning across filesystems is not supported. +// +// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs +func (p *Program) Pin(fileName string) error { + if err := internal.Pin(p.pinnedPath, fileName, p.fd); err != nil { + return err + } + p.pinnedPath = fileName + return nil +} + +// Unpin removes the persisted state for the Program from the BPF virtual filesystem. +// +// Failed calls to Unpin will not alter the state returned by IsPinned. +// +// Unpinning an unpinned Program returns nil. +func (p *Program) Unpin() error { + if err := internal.Unpin(p.pinnedPath); err != nil { + return err + } + p.pinnedPath = "" + return nil +} + +// IsPinned returns true if the Program has a non-empty pinned path. +func (p *Program) IsPinned() bool { + return p.pinnedPath != "" +} + +// Close the Program's underlying file descriptor, which could unload +// the program from the kernel if it is not pinned or attached to a +// kernel hook. +func (p *Program) Close() error { + if p == nil { + return nil + } + + return p.fd.Close() +} + +// Various options for Run'ing a Program +type RunOptions struct { + // Program's data input. Required field. + Data []byte + // Program's data after Program has run. Caller must allocate. Optional field. + DataOut []byte + // Program's context input. Optional field. + Context interface{} + // Program's context after Program has run. Must be a pointer or slice. Optional field. + ContextOut interface{} + // Number of times to run Program. Optional field. Defaults to 1. + Repeat uint32 + // Optional flags. + Flags uint32 + // CPU to run Program on. Optional field. + // Note not all program types support this field. + CPU uint32 + // Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer + // or similar. Typically used during benchmarking. Optional field. + Reset func() +} + +// Test runs the Program in the kernel with the given input and returns the +// value returned by the eBPF program. outLen may be zero. +// +// Note: the kernel expects at least 14 bytes input for an ethernet header for +// XDP and SKB programs. +// +// This function requires at least Linux 4.12. +func (p *Program) Test(in []byte) (uint32, []byte, error) { + // Older kernels ignore the dataSizeOut argument when copying to user space. + // Combined with things like bpf_xdp_adjust_head() we don't really know what the final + // size will be. Hence we allocate an output buffer which we hope will always be large + // enough, and panic if the kernel wrote past the end of the allocation. + // See https://patchwork.ozlabs.org/cover/1006822/ + var out []byte + if len(in) > 0 { + out = make([]byte, len(in)+outputPad) + } + + opts := RunOptions{ + Data: in, + DataOut: out, + Repeat: 1, + } + + ret, _, err := p.testRun(&opts) + if err != nil { + return ret, nil, fmt.Errorf("can't test program: %w", err) + } + return ret, opts.DataOut, nil +} + +// Run runs the Program in kernel with given RunOptions. +// +// Note: the same restrictions from Test apply. +func (p *Program) Run(opts *RunOptions) (uint32, error) { + ret, _, err := p.testRun(opts) + if err != nil { + return ret, fmt.Errorf("can't test program: %w", err) + } + return ret, nil +} + +// Benchmark runs the Program with the given input for a number of times +// and returns the time taken per iteration. +// +// Returns the result of the last execution of the program and the time per +// run or an error. reset is called whenever the benchmark syscall is +// interrupted, and should be set to testing.B.ResetTimer or similar. +// +// Note: profiling a call to this function will skew it's results, see +// https://github.com/cilium/ebpf/issues/24 +// +// This function requires at least Linux 4.12. +func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) { + if uint(repeat) > math.MaxUint32 { + return 0, 0, fmt.Errorf("repeat is too high") + } + + opts := RunOptions{ + Data: in, + Repeat: uint32(repeat), + Reset: reset, + } + + ret, total, err := p.testRun(&opts) + if err != nil { + return ret, total, fmt.Errorf("can't benchmark program: %w", err) + } + return ret, total, nil +} + +var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() error { + prog, err := NewProgram(&ProgramSpec{ + // SocketFilter does not require privileges on newer kernels. + Type: SocketFilter, + Instructions: asm.Instructions{ + asm.LoadImm(asm.R0, 0, asm.DWord), + asm.Return(), + }, + License: "MIT", + }) + if err != nil { + // This may be because we lack sufficient permissions, etc. + return err + } + defer prog.Close() + + // Programs require at least 14 bytes input + in := make([]byte, 14) + attr := sys.ProgRunAttr{ + ProgFd: uint32(prog.FD()), + DataSizeIn: uint32(len(in)), + DataIn: sys.NewSlicePointer(in), + } + + err = sys.ProgRun(&attr) + switch { + case errors.Is(err, unix.EINVAL): + // Check for EINVAL specifically, rather than err != nil since we + // otherwise misdetect due to insufficient permissions. + return internal.ErrNotSupported + + case errors.Is(err, unix.EINTR): + // We know that PROG_TEST_RUN is supported if we get EINTR. + return nil + + case errors.Is(err, unix.ENOTSUPP): + // The first PROG_TEST_RUN patches shipped in 4.12 didn't include + // a test runner for SocketFilter. ENOTSUPP means PROG_TEST_RUN is + // supported, but not for the program type used in the probe. + return nil + } + + return err +}) + +func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) { + if uint(len(opts.Data)) > math.MaxUint32 { + return 0, 0, fmt.Errorf("input is too long") + } + + if err := haveProgTestRun(); err != nil { + return 0, 0, err + } + + var ctxBytes []byte + if opts.Context != nil { + ctx := new(bytes.Buffer) + if err := binary.Write(ctx, internal.NativeEndian, opts.Context); err != nil { + return 0, 0, fmt.Errorf("cannot serialize context: %v", err) + } + ctxBytes = ctx.Bytes() + } + + var ctxOut []byte + if opts.ContextOut != nil { + ctxOut = make([]byte, binary.Size(opts.ContextOut)) + } + + attr := sys.ProgRunAttr{ + ProgFd: p.fd.Uint(), + DataSizeIn: uint32(len(opts.Data)), + DataSizeOut: uint32(len(opts.DataOut)), + DataIn: sys.NewSlicePointer(opts.Data), + DataOut: sys.NewSlicePointer(opts.DataOut), + Repeat: uint32(opts.Repeat), + CtxSizeIn: uint32(len(ctxBytes)), + CtxSizeOut: uint32(len(ctxOut)), + CtxIn: sys.NewSlicePointer(ctxBytes), + CtxOut: sys.NewSlicePointer(ctxOut), + Flags: opts.Flags, + Cpu: opts.CPU, + } + + for { + err := sys.ProgRun(&attr) + if err == nil { + break + } + + if errors.Is(err, unix.EINTR) { + if opts.Reset != nil { + opts.Reset() + } + continue + } + + if errors.Is(err, unix.ENOTSUPP) { + return 0, 0, fmt.Errorf("kernel doesn't support testing program type %s: %w", p.Type(), ErrNotSupported) + } + + return 0, 0, fmt.Errorf("can't run test: %w", err) + } + + if opts.DataOut != nil { + if int(attr.DataSizeOut) > cap(opts.DataOut) { + // Houston, we have a problem. The program created more data than we allocated, + // and the kernel wrote past the end of our buffer. + panic("kernel wrote past end of output buffer") + } + opts.DataOut = opts.DataOut[:int(attr.DataSizeOut)] + } + + if len(ctxOut) != 0 { + b := bytes.NewReader(ctxOut) + if err := binary.Read(b, internal.NativeEndian, opts.ContextOut); err != nil { + return 0, 0, fmt.Errorf("failed to decode ContextOut: %v", err) + } + } + + total := time.Duration(attr.Duration) * time.Nanosecond + return attr.Retval, total, nil +} + +func unmarshalProgram(buf []byte) (*Program, error) { + if len(buf) != 4 { + return nil, errors.New("program id requires 4 byte value") + } + + // Looking up an entry in a nested map or prog array returns an id, + // not an fd. + id := internal.NativeEndian.Uint32(buf) + return NewProgramFromID(ProgramID(id)) +} + +func marshalProgram(p *Program, length int) ([]byte, error) { + if length != 4 { + return nil, fmt.Errorf("can't marshal program to %d bytes", length) + } + + buf := make([]byte, 4) + internal.NativeEndian.PutUint32(buf, p.fd.Uint()) + return buf, nil +} + +// LoadPinnedProgram loads a Program from a BPF file. +// +// Requires at least Linux 4.11. +func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error) { + fd, err := sys.ObjGet(&sys.ObjGetAttr{ + Pathname: sys.NewStringPointer(fileName), + FileFlags: opts.Marshal(), + }) + if err != nil { + return nil, err + } + + info, err := newProgramInfoFromFd(fd) + if err != nil { + _ = fd.Close() + return nil, fmt.Errorf("info for %s: %w", fileName, err) + } + + return &Program{"", fd, filepath.Base(fileName), fileName, info.Type}, nil +} + +// SanitizeName replaces all invalid characters in name with replacement. +// Passing a negative value for replacement will delete characters instead +// of replacing them. Use this to automatically generate valid names for maps +// and programs at runtime. +// +// The set of allowed characters depends on the running kernel version. +// Dots are only allowed as of kernel 5.2. +func SanitizeName(name string, replacement rune) string { + return strings.Map(func(char rune) rune { + if invalidBPFObjNameChar(char) { + return replacement + } + return char + }, name) +} + +// ProgramGetNextID returns the ID of the next eBPF program. +// +// Returns ErrNotExist, if there is no next eBPF program. +func ProgramGetNextID(startID ProgramID) (ProgramID, error) { + attr := &sys.ProgGetNextIdAttr{Id: uint32(startID)} + return ProgramID(attr.NextId), sys.ProgGetNextId(attr) +} + +// BindMap binds map to the program and is only released once program is released. +// +// This may be used in cases where metadata should be associated with the program +// which otherwise does not contain any references to the map. +func (p *Program) BindMap(m *Map) error { + attr := &sys.ProgBindMapAttr{ + ProgFd: uint32(p.FD()), + MapFd: uint32(m.FD()), + } + + return sys.ProgBindMap(attr) +} + +var errUnrecognizedAttachType = errors.New("unrecognized attach type") + +// find an attach target type in the kernel. +// +// spec may be nil and defaults to the canonical kernel BTF. name together with +// progType and attachType determine which type we need to attach to. +// +// Returns errUnrecognizedAttachType. +func findTargetInKernel(spec *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) { + type match struct { + p ProgramType + a AttachType + } + + var ( + typeName, featureName string + isBTFTypeFunc = true + ) + + switch (match{progType, attachType}) { + case match{LSM, AttachLSMMac}: + typeName = "bpf_lsm_" + name + featureName = name + " LSM hook" + case match{Tracing, AttachTraceIter}: + typeName = "bpf_iter_" + name + featureName = name + " iterator" + case match{Tracing, AttachTraceFEntry}: + typeName = name + featureName = fmt.Sprintf("fentry %s", name) + case match{Tracing, AttachTraceFExit}: + typeName = name + featureName = fmt.Sprintf("fexit %s", name) + case match{Tracing, AttachModifyReturn}: + typeName = name + featureName = fmt.Sprintf("fmod_ret %s", name) + case match{Tracing, AttachTraceRawTp}: + typeName = fmt.Sprintf("btf_trace_%s", name) + featureName = fmt.Sprintf("raw_tp %s", name) + isBTFTypeFunc = false + default: + return 0, errUnrecognizedAttachType + } + + spec, err := maybeLoadKernelBTF(spec) + if err != nil { + return 0, fmt.Errorf("load kernel spec: %w", err) + } + + var target btf.Type + if isBTFTypeFunc { + var targetFunc *btf.Func + err = spec.TypeByName(typeName, &targetFunc) + target = targetFunc + } else { + var targetTypedef *btf.Typedef + err = spec.TypeByName(typeName, &targetTypedef) + target = targetTypedef + } + + if err != nil { + if errors.Is(err, btf.ErrNotFound) { + return 0, &internal.UnsupportedFeatureError{ + Name: featureName, + } + } + return 0, fmt.Errorf("find target for %s: %w", featureName, err) + } + + return spec.TypeID(target) +} + +// find an attach target type in a program. +// +// Returns errUnrecognizedAttachType. +func findTargetInProgram(prog *Program, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) { + type match struct { + p ProgramType + a AttachType + } + + var typeName string + switch (match{progType, attachType}) { + case match{Extension, AttachNone}: + typeName = name + default: + return 0, errUnrecognizedAttachType + } + + btfHandle, err := prog.Handle() + if err != nil { + return 0, fmt.Errorf("load target BTF: %w", err) + } + defer btfHandle.Close() + + spec, err := btfHandle.Spec(nil) + if err != nil { + return 0, err + } + + var targetFunc *btf.Func + err = spec.TypeByName(typeName, &targetFunc) + if err != nil { + return 0, fmt.Errorf("find target %s: %w", typeName, err) + } + + return spec.TypeID(targetFunc) +} diff --git a/vendor/github.com/cilium/ebpf/run-tests.sh b/vendor/github.com/cilium/ebpf/run-tests.sh new file mode 100644 index 000000000..c21cca9e5 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/run-tests.sh @@ -0,0 +1,145 @@ +#!/usr/bin/env bash +# Test the current package under a different kernel. +# Requires virtme and qemu to be installed. +# Examples: +# Run all tests on a 5.4 kernel +# $ ./run-tests.sh 5.4 +# Run a subset of tests: +# $ ./run-tests.sh 5.4 ./link + +set -euo pipefail + +script="$(realpath "$0")" +readonly script + +# This script is a bit like a Matryoshka doll since it keeps re-executing itself +# in various different contexts: +# +# 1. invoked by the user like run-tests.sh 5.4 +# 2. invoked by go test like run-tests.sh --exec-vm +# 3. invoked by init in the vm like run-tests.sh --exec-test +# +# This allows us to use all available CPU on the host machine to compile our +# code, and then only use the VM to execute the test. This is because the VM +# is usually slower at compiling than the host. +if [[ "${1:-}" = "--exec-vm" ]]; then + shift + + input="$1" + shift + + # Use sudo if /dev/kvm isn't accessible by the current user. + sudo="" + if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then + sudo="sudo" + fi + readonly sudo + + testdir="$(dirname "$1")" + output="$(mktemp -d)" + printf -v cmd "%q " "$@" + + if [[ "$(stat -c '%t:%T' -L /proc/$$/fd/0)" == "1:3" ]]; then + # stdin is /dev/null, which doesn't play well with qemu. Use a fifo as a + # blocking substitute. + mkfifo "${output}/fake-stdin" + # Open for reading and writing to avoid blocking. + exec 0<> "${output}/fake-stdin" + rm "${output}/fake-stdin" + fi + + for ((i = 0; i < 3; i++)); do + if ! $sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \ + --rwdir="${testdir}=${testdir}" \ + --rodir=/run/input="${input}" \ + --rwdir=/run/output="${output}" \ + --script-sh "PATH=\"$PATH\" CI_MAX_KERNEL_VERSION="${CI_MAX_KERNEL_VERSION:-}" \"$script\" --exec-test $cmd" \ + --kopt possible_cpus=2; then # need at least two CPUs for some tests + exit 23 + fi + + if [[ -e "${output}/status" ]]; then + break + fi + + if [[ -v CI ]]; then + echo "Retrying test run due to qemu crash" + continue + fi + + exit 42 + done + + rc=$(<"${output}/status") + $sudo rm -r "$output" + exit $rc +elif [[ "${1:-}" = "--exec-test" ]]; then + shift + + mount -t bpf bpf /sys/fs/bpf + mount -t tracefs tracefs /sys/kernel/debug/tracing + + if [[ -d "/run/input/bpf" ]]; then + export KERNEL_SELFTESTS="/run/input/bpf" + fi + + if [[ -f "/run/input/bpf/bpf_testmod/bpf_testmod.ko" ]]; then + insmod "/run/input/bpf/bpf_testmod/bpf_testmod.ko" + fi + + dmesg --clear + rc=0 + "$@" || rc=$? + dmesg + echo $rc > "/run/output/status" + exit $rc # this return code is "swallowed" by qemu +fi + +readonly kernel_version="${1:-}" +if [[ -z "${kernel_version}" ]]; then + echo "Expecting kernel version as first argument" + exit 1 +fi +shift + +readonly kernel="linux-${kernel_version}.bz" +readonly selftests="linux-${kernel_version}-selftests-bpf.tgz" +readonly input="$(mktemp -d)" +readonly tmp_dir="${TMPDIR:-/tmp}" +readonly branch="${BRANCH:-master}" + +fetch() { + echo Fetching "${1}" + pushd "${tmp_dir}" > /dev/null + curl -s -L -O --fail --etag-compare "${1}.etag" --etag-save "${1}.etag" "https://github.com/cilium/ci-kernels/raw/${branch}/${1}" + local ret=$? + popd > /dev/null + return $ret +} + +fetch "${kernel}" +cp "${tmp_dir}/${kernel}" "${input}/bzImage" + +if fetch "${selftests}"; then + echo "Decompressing selftests" + mkdir "${input}/bpf" + tar --strip-components=4 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf" +else + echo "No selftests found, disabling" +fi + +args=(-short -coverpkg=./... -coverprofile=coverage.out -count 1 ./...) +if (( $# > 0 )); then + args=("$@") +fi + +export GOFLAGS=-mod=readonly +export CGO_ENABLED=0 +# LINUX_VERSION_CODE test compares this to discovered value. +export KERNEL_VERSION="${kernel_version}" + +echo Testing on "${kernel_version}" +go test -exec "$script --exec-vm $input" "${args[@]}" +echo "Test successful on ${kernel_version}" + +rm -r "${input}" diff --git a/vendor/github.com/cilium/ebpf/syscalls.go b/vendor/github.com/cilium/ebpf/syscalls.go new file mode 100644 index 000000000..e5c270a55 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/syscalls.go @@ -0,0 +1,264 @@ +package ebpf + +import ( + "bytes" + "errors" + "fmt" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// invalidBPFObjNameChar returns true if char may not appear in +// a BPF object name. +func invalidBPFObjNameChar(char rune) bool { + dotAllowed := objNameAllowsDot() == nil + + switch { + case char >= 'A' && char <= 'Z': + return false + case char >= 'a' && char <= 'z': + return false + case char >= '0' && char <= '9': + return false + case dotAllowed && char == '.': + return false + case char == '_': + return false + default: + return true + } +} + +func progLoad(insns asm.Instructions, typ ProgramType, license string) (*sys.FD, error) { + buf := bytes.NewBuffer(make([]byte, 0, insns.Size())) + if err := insns.Marshal(buf, internal.NativeEndian); err != nil { + return nil, err + } + bytecode := buf.Bytes() + + return sys.ProgLoad(&sys.ProgLoadAttr{ + ProgType: sys.ProgType(typ), + License: sys.NewStringPointer(license), + Insns: sys.NewSlicePointer(bytecode), + InsnCnt: uint32(len(bytecode) / asm.InstructionSize), + }) +} + +var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error { + _, err := sys.MapCreate(&sys.MapCreateAttr{ + MapType: sys.MapType(ArrayOfMaps), + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + // Invalid file descriptor. + InnerMapFd: ^uint32(0), + }) + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + if errors.Is(err, unix.EBADF) { + return nil + } + return err +}) + +var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() error { + // This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since + // BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check. + m, err := sys.MapCreate(&sys.MapCreateAttr{ + MapType: sys.MapType(Array), + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + MapFlags: unix.BPF_F_RDONLY_PROG, + }) + if err != nil { + return internal.ErrNotSupported + } + _ = m.Close() + return nil +}) + +var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error { + // This checks BPF_F_MMAPABLE, which appeared in 5.5 for array maps. + m, err := sys.MapCreate(&sys.MapCreateAttr{ + MapType: sys.MapType(Array), + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + MapFlags: unix.BPF_F_MMAPABLE, + }) + if err != nil { + return internal.ErrNotSupported + } + _ = m.Close() + return nil +}) + +var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error { + // This checks BPF_F_INNER_MAP, which appeared in 5.10. + m, err := sys.MapCreate(&sys.MapCreateAttr{ + MapType: sys.MapType(Array), + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + MapFlags: unix.BPF_F_INNER_MAP, + }) + if err != nil { + return internal.ErrNotSupported + } + _ = m.Close() + return nil +}) + +var haveNoPreallocMaps = internal.FeatureTest("prealloc maps", "4.6", func() error { + // This checks BPF_F_NO_PREALLOC, which appeared in 4.6. + m, err := sys.MapCreate(&sys.MapCreateAttr{ + MapType: sys.MapType(Hash), + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + MapFlags: unix.BPF_F_NO_PREALLOC, + }) + if err != nil { + return internal.ErrNotSupported + } + _ = m.Close() + return nil +}) + +func wrapMapError(err error) error { + if err == nil { + return nil + } + + if errors.Is(err, unix.ENOENT) { + return sys.Error(ErrKeyNotExist, unix.ENOENT) + } + + if errors.Is(err, unix.EEXIST) { + return sys.Error(ErrKeyExist, unix.EEXIST) + } + + if errors.Is(err, unix.ENOTSUPP) { + return sys.Error(ErrNotSupported, unix.ENOTSUPP) + } + + if errors.Is(err, unix.E2BIG) { + return fmt.Errorf("key too big for map: %w", err) + } + + return err +} + +var haveObjName = internal.FeatureTest("object names", "4.15", func() error { + attr := sys.MapCreateAttr{ + MapType: sys.MapType(Array), + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + MapName: sys.NewObjName("feature_test"), + } + + fd, err := sys.MapCreate(&attr) + if err != nil { + return internal.ErrNotSupported + } + + _ = fd.Close() + return nil +}) + +var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() error { + if err := haveObjName(); err != nil { + return err + } + + attr := sys.MapCreateAttr{ + MapType: sys.MapType(Array), + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + MapName: sys.NewObjName(".test"), + } + + fd, err := sys.MapCreate(&attr) + if err != nil { + return internal.ErrNotSupported + } + + _ = fd.Close() + return nil +}) + +var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error { + var maxEntries uint32 = 2 + attr := sys.MapCreateAttr{ + MapType: sys.MapType(Hash), + KeySize: 4, + ValueSize: 4, + MaxEntries: maxEntries, + } + + fd, err := sys.MapCreate(&attr) + if err != nil { + return internal.ErrNotSupported + } + defer fd.Close() + + keys := []uint32{1, 2} + values := []uint32{3, 4} + kp, _ := marshalPtr(keys, 8) + vp, _ := marshalPtr(values, 8) + + err = sys.MapUpdateBatch(&sys.MapUpdateBatchAttr{ + MapFd: fd.Uint(), + Keys: kp, + Values: vp, + Count: maxEntries, + }) + if err != nil { + return internal.ErrNotSupported + } + return nil +}) + +var haveProbeReadKernel = internal.FeatureTest("bpf_probe_read_kernel", "5.5", func() error { + insns := asm.Instructions{ + asm.Mov.Reg(asm.R1, asm.R10), + asm.Add.Imm(asm.R1, -8), + asm.Mov.Imm(asm.R2, 8), + asm.Mov.Imm(asm.R3, 0), + asm.FnProbeReadKernel.Call(), + asm.Return(), + } + + fd, err := progLoad(insns, Kprobe, "GPL") + if err != nil { + return internal.ErrNotSupported + } + _ = fd.Close() + return nil +}) + +var haveBPFToBPFCalls = internal.FeatureTest("bpf2bpf calls", "4.16", func() error { + insns := asm.Instructions{ + asm.Call.Label("prog2").WithSymbol("prog1"), + asm.Return(), + asm.Mov.Imm(asm.R0, 0).WithSymbol("prog2"), + asm.Return(), + } + + fd, err := progLoad(insns, SocketFilter, "MIT") + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + if err != nil { + return err + } + _ = fd.Close() + return nil +}) diff --git a/vendor/github.com/cilium/ebpf/types.go b/vendor/github.com/cilium/ebpf/types.go new file mode 100644 index 000000000..a27b44247 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/types.go @@ -0,0 +1,284 @@ +package ebpf + +import ( + "github.com/cilium/ebpf/internal/unix" +) + +//go:generate stringer -output types_string.go -type=MapType,ProgramType,PinType + +// MapType indicates the type map structure +// that will be initialized in the kernel. +type MapType uint32 + +// Max returns the latest supported MapType. +func (MapType) Max() MapType { + return maxMapType - 1 +} + +// All the various map types that can be created +const ( + UnspecifiedMap MapType = iota + // Hash is a hash map + Hash + // Array is an array map + Array + // ProgramArray - A program array map is a special kind of array map whose map + // values contain only file descriptors referring to other eBPF + // programs. Thus, both the key_size and value_size must be + // exactly four bytes. This map is used in conjunction with the + // TailCall helper. + ProgramArray + // PerfEventArray - A perf event array is used in conjunction with PerfEventRead + // and PerfEventOutput calls, to read the raw bpf_perf_data from the registers. + PerfEventArray + // PerCPUHash - This data structure is useful for people who have high performance + // network needs and can reconcile adds at the end of some cycle, so that + // hashes can be lock free without the use of XAdd, which can be costly. + PerCPUHash + // PerCPUArray - This data structure is useful for people who have high performance + // network needs and can reconcile adds at the end of some cycle, so that + // hashes can be lock free without the use of XAdd, which can be costly. + // Each CPU gets a copy of this hash, the contents of all of which can be reconciled + // later. + PerCPUArray + // StackTrace - This holds whole user and kernel stack traces, it can be retrieved with + // GetStackID + StackTrace + // CGroupArray - This is a very niche structure used to help SKBInCGroup determine + // if an skb is from a socket belonging to a specific cgroup + CGroupArray + // LRUHash - This allows you to create a small hash structure that will purge the + // least recently used items rather than thow an error when you run out of memory + LRUHash + // LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs, + // it has more to do with including the CPU id with the LRU calculation so that if a + // particular CPU is using a value over-and-over again, then it will be saved, but if + // a value is being retrieved a lot but sparsely across CPUs it is not as important, basically + // giving weight to CPU locality over overall usage. + LRUCPUHash + // LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful, + // for storing things like IP addresses which can be bit masked allowing for keys of differing + // values to refer to the same reference based on their masks. See wikipedia for more details. + LPMTrie + // ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps + // itself. + ArrayOfMaps + // HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps + // itself. + HashOfMaps + // DevMap - Specialized map to store references to network devices. + DevMap + // SockMap - Specialized map to store references to sockets. + SockMap + // CPUMap - Specialized map to store references to CPUs. + CPUMap + // XSKMap - Specialized map for XDP programs to store references to open sockets. + XSKMap + // SockHash - Specialized hash to store references to sockets. + SockHash + // CGroupStorage - Special map for CGroups. + CGroupStorage + // ReusePortSockArray - Specialized map to store references to sockets that can be reused. + ReusePortSockArray + // PerCPUCGroupStorage - Special per CPU map for CGroups. + PerCPUCGroupStorage + // Queue - FIFO storage for BPF programs. + Queue + // Stack - LIFO storage for BPF programs. + Stack + // SkStorage - Specialized map for local storage at SK for BPF programs. + SkStorage + // DevMapHash - Hash-based indexing scheme for references to network devices. + DevMapHash + // StructOpsMap - This map holds a kernel struct with its function pointer implemented in a BPF + // program. + StructOpsMap + // RingBuf - Similar to PerfEventArray, but shared across all CPUs. + RingBuf + // InodeStorage - Specialized local storage map for inodes. + InodeStorage + // TaskStorage - Specialized local storage map for task_struct. + TaskStorage + // maxMapType - Bound enum of MapTypes, has to be last in enum. + maxMapType +) + +// hasPerCPUValue returns true if the Map stores a value per CPU. +func (mt MapType) hasPerCPUValue() bool { + return mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash || mt == PerCPUCGroupStorage +} + +// canStoreMap returns true if the map type accepts a map fd +// for update and returns a map id for lookup. +func (mt MapType) canStoreMap() bool { + return mt == ArrayOfMaps || mt == HashOfMaps +} + +// canStoreProgram returns true if the map type accepts a program fd +// for update and returns a program id for lookup. +func (mt MapType) canStoreProgram() bool { + return mt == ProgramArray +} + +// hasBTF returns true if the map type supports BTF key/value metadata. +func (mt MapType) hasBTF() bool { + switch mt { + case PerfEventArray, CGroupArray, StackTrace, ArrayOfMaps, HashOfMaps, DevMap, + DevMapHash, CPUMap, XSKMap, SockMap, SockHash, Queue, Stack, RingBuf: + return false + default: + return true + } +} + +// ProgramType of the eBPF program +type ProgramType uint32 + +// Max return the latest supported ProgramType. +func (ProgramType) Max() ProgramType { + return maxProgramType - 1 +} + +// eBPF program types +const ( + UnspecifiedProgram ProgramType = iota + SocketFilter + Kprobe + SchedCLS + SchedACT + TracePoint + XDP + PerfEvent + CGroupSKB + CGroupSock + LWTIn + LWTOut + LWTXmit + SockOps + SkSKB + CGroupDevice + SkMsg + RawTracepoint + CGroupSockAddr + LWTSeg6Local + LircMode2 + SkReuseport + FlowDissector + CGroupSysctl + RawTracepointWritable + CGroupSockopt + Tracing + StructOps + Extension + LSM + SkLookup + Syscall + maxProgramType +) + +// AttachType of the eBPF program, needed to differentiate allowed context accesses in +// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required. +// Will cause invalid argument (EINVAL) at program load time if set incorrectly. +type AttachType uint32 + +//go:generate stringer -type AttachType -trimprefix Attach + +// AttachNone is an alias for AttachCGroupInetIngress for readability reasons. +const AttachNone AttachType = 0 + +const ( + AttachCGroupInetIngress AttachType = iota + AttachCGroupInetEgress + AttachCGroupInetSockCreate + AttachCGroupSockOps + AttachSkSKBStreamParser + AttachSkSKBStreamVerdict + AttachCGroupDevice + AttachSkMsgVerdict + AttachCGroupInet4Bind + AttachCGroupInet6Bind + AttachCGroupInet4Connect + AttachCGroupInet6Connect + AttachCGroupInet4PostBind + AttachCGroupInet6PostBind + AttachCGroupUDP4Sendmsg + AttachCGroupUDP6Sendmsg + AttachLircMode2 + AttachFlowDissector + AttachCGroupSysctl + AttachCGroupUDP4Recvmsg + AttachCGroupUDP6Recvmsg + AttachCGroupGetsockopt + AttachCGroupSetsockopt + AttachTraceRawTp + AttachTraceFEntry + AttachTraceFExit + AttachModifyReturn + AttachLSMMac + AttachTraceIter + AttachCgroupInet4GetPeername + AttachCgroupInet6GetPeername + AttachCgroupInet4GetSockname + AttachCgroupInet6GetSockname + AttachXDPDevMap + AttachCgroupInetSockRelease + AttachXDPCPUMap + AttachSkLookup + AttachXDP + AttachSkSKBVerdict + AttachSkReuseportSelect + AttachSkReuseportSelectOrMigrate + AttachPerfEvent +) + +// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command +type AttachFlags uint32 + +// PinType determines whether a map is pinned into a BPFFS. +type PinType int + +// Valid pin types. +// +// Mirrors enum libbpf_pin_type. +const ( + PinNone PinType = iota + // Pin an object by using its name as the filename. + PinByName +) + +// LoadPinOptions control how a pinned object is loaded. +type LoadPinOptions struct { + // Request a read-only or write-only object. The default is a read-write + // object. Only one of the flags may be set. + ReadOnly bool + WriteOnly bool + + // Raw flags for the syscall. Other fields of this struct take precedence. + Flags uint32 +} + +// Marshal returns a value suitable for BPF_OBJ_GET syscall file_flags parameter. +func (lpo *LoadPinOptions) Marshal() uint32 { + if lpo == nil { + return 0 + } + + flags := lpo.Flags + if lpo.ReadOnly { + flags |= unix.BPF_F_RDONLY + } + if lpo.WriteOnly { + flags |= unix.BPF_F_WRONLY + } + return flags +} + +// BatchOptions batch map operations options +// +// Mirrors libbpf struct bpf_map_batch_opts +// Currently BPF_F_FLAG is the only supported +// flag (for ElemFlags). +type BatchOptions struct { + ElemFlags uint64 + Flags uint64 +} diff --git a/vendor/github.com/cilium/ebpf/types_string.go b/vendor/github.com/cilium/ebpf/types_string.go new file mode 100644 index 000000000..e80b948b0 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/types_string.go @@ -0,0 +1,120 @@ +// Code generated by "stringer -output types_string.go -type=MapType,ProgramType,PinType"; DO NOT EDIT. + +package ebpf + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[UnspecifiedMap-0] + _ = x[Hash-1] + _ = x[Array-2] + _ = x[ProgramArray-3] + _ = x[PerfEventArray-4] + _ = x[PerCPUHash-5] + _ = x[PerCPUArray-6] + _ = x[StackTrace-7] + _ = x[CGroupArray-8] + _ = x[LRUHash-9] + _ = x[LRUCPUHash-10] + _ = x[LPMTrie-11] + _ = x[ArrayOfMaps-12] + _ = x[HashOfMaps-13] + _ = x[DevMap-14] + _ = x[SockMap-15] + _ = x[CPUMap-16] + _ = x[XSKMap-17] + _ = x[SockHash-18] + _ = x[CGroupStorage-19] + _ = x[ReusePortSockArray-20] + _ = x[PerCPUCGroupStorage-21] + _ = x[Queue-22] + _ = x[Stack-23] + _ = x[SkStorage-24] + _ = x[DevMapHash-25] + _ = x[StructOpsMap-26] + _ = x[RingBuf-27] + _ = x[InodeStorage-28] + _ = x[TaskStorage-29] + _ = x[maxMapType-30] +} + +const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOpsMapRingBufInodeStorageTaskStoragemaxMapType" + +var _MapType_index = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 260, 267, 279, 290, 300} + +func (i MapType) String() string { + if i >= MapType(len(_MapType_index)-1) { + return "MapType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _MapType_name[_MapType_index[i]:_MapType_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[UnspecifiedProgram-0] + _ = x[SocketFilter-1] + _ = x[Kprobe-2] + _ = x[SchedCLS-3] + _ = x[SchedACT-4] + _ = x[TracePoint-5] + _ = x[XDP-6] + _ = x[PerfEvent-7] + _ = x[CGroupSKB-8] + _ = x[CGroupSock-9] + _ = x[LWTIn-10] + _ = x[LWTOut-11] + _ = x[LWTXmit-12] + _ = x[SockOps-13] + _ = x[SkSKB-14] + _ = x[CGroupDevice-15] + _ = x[SkMsg-16] + _ = x[RawTracepoint-17] + _ = x[CGroupSockAddr-18] + _ = x[LWTSeg6Local-19] + _ = x[LircMode2-20] + _ = x[SkReuseport-21] + _ = x[FlowDissector-22] + _ = x[CGroupSysctl-23] + _ = x[RawTracepointWritable-24] + _ = x[CGroupSockopt-25] + _ = x[Tracing-26] + _ = x[StructOps-27] + _ = x[Extension-28] + _ = x[LSM-29] + _ = x[SkLookup-30] + _ = x[Syscall-31] + _ = x[maxProgramType-32] +} + +const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscallmaxProgramType" + +var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301, 315} + +func (i ProgramType) String() string { + if i >= ProgramType(len(_ProgramType_index)-1) { + return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[PinNone-0] + _ = x[PinByName-1] +} + +const _PinType_name = "PinNonePinByName" + +var _PinType_index = [...]uint8{0, 7, 16} + +func (i PinType) String() string { + if i < 0 || i >= PinType(len(_PinType_index)-1) { + return "PinType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _PinType_name[_PinType_index[i]:_PinType_index[i+1]] +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go index b9ba889b7..ba2b2266c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -1,24 +1,9 @@ package cgroups import ( - "errors" - "github.com/opencontainers/runc/libcontainer/configs" ) -var ( - // ErrDevicesUnsupported is an error returned when a cgroup manager - // is not configured to set device rules. - ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules") - - // DevicesSetV1 and DevicesSetV2 are functions to set devices for - // cgroup v1 and v2, respectively. Unless libcontainer/cgroups/devices - // package is imported, it is set to nil, so cgroup managers can't - // manage devices. - DevicesSetV1 func(path string, r *configs.Resources) error - DevicesSetV2 func(path string, r *configs.Resources) error -) - type Manager interface { // Apply creates a cgroup, if not yet created, and adds a process // with the specified pid into that cgroup. A special value of -1 diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go new file mode 100644 index 000000000..6c61ee4c0 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2020 Aleksa Sarai + * Copyright (C) 2020 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package devices + +import ( + "bufio" + "fmt" + "io" + "sort" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/devices" +) + +// deviceMeta is a Rule without the Allow or Permissions fields, and no +// wildcard-type support. It's effectively the "match" portion of a metadata +// rule, for the purposes of our emulation. +type deviceMeta struct { + node devices.Type + major int64 + minor int64 +} + +// deviceRule is effectively the tuple (deviceMeta, Permissions). +type deviceRule struct { + meta deviceMeta + perms devices.Permissions +} + +// deviceRules is a mapping of device metadata rules to the associated +// permissions in the ruleset. +type deviceRules map[deviceMeta]devices.Permissions + +func (r deviceRules) orderedEntries() []deviceRule { + var rules []deviceRule + for meta, perms := range r { + rules = append(rules, deviceRule{meta: meta, perms: perms}) + } + sort.Slice(rules, func(i, j int) bool { + // Sort by (major, minor, type). + a, b := rules[i].meta, rules[j].meta + return a.major < b.major || + (a.major == b.major && a.minor < b.minor) || + (a.major == b.major && a.minor == b.minor && a.node < b.node) + }) + return rules +} + +type Emulator struct { + defaultAllow bool + rules deviceRules +} + +func (e *Emulator) IsBlacklist() bool { + return e.defaultAllow +} + +func (e *Emulator) IsAllowAll() bool { + return e.IsBlacklist() && len(e.rules) == 0 +} + +func parseLine(line string) (*deviceRule, error) { + // Input: node major:minor perms. + fields := strings.FieldsFunc(line, func(r rune) bool { + return r == ' ' || r == ':' + }) + if len(fields) != 4 { + return nil, fmt.Errorf("malformed devices.list rule %s", line) + } + + var ( + rule deviceRule + node = fields[0] + major = fields[1] + minor = fields[2] + perms = fields[3] + ) + + // Parse the node type. + switch node { + case "a": + // Super-special case -- "a" always means every device with every + // access mode. In fact, for devices.list this actually indicates that + // the cgroup is in black-list mode. + // TODO: Double-check that the entire file is "a *:* rwm". + return nil, nil + case "b": + rule.meta.node = devices.BlockDevice + case "c": + rule.meta.node = devices.CharDevice + default: + return nil, fmt.Errorf("unknown device type %q", node) + } + + // Parse the major number. + if major == "*" { + rule.meta.major = devices.Wildcard + } else { + val, err := strconv.ParseUint(major, 10, 32) + if err != nil { + return nil, fmt.Errorf("invalid major number: %w", err) + } + rule.meta.major = int64(val) + } + + // Parse the minor number. + if minor == "*" { + rule.meta.minor = devices.Wildcard + } else { + val, err := strconv.ParseUint(minor, 10, 32) + if err != nil { + return nil, fmt.Errorf("invalid minor number: %w", err) + } + rule.meta.minor = int64(val) + } + + // Parse the access permissions. + rule.perms = devices.Permissions(perms) + if !rule.perms.IsValid() || rule.perms.IsEmpty() { + return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms) + } + return &rule, nil +} + +func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam + if e.rules == nil { + e.rules = make(map[deviceMeta]devices.Permissions) + } + + // Merge with any pre-existing permissions. + oldPerms := e.rules[rule.meta] + newPerms := rule.perms.Union(oldPerms) + e.rules[rule.meta] = newPerms + return nil +} + +func (e *Emulator) rmRule(rule deviceRule) error { + // Give an error if any of the permissions requested to be removed are + // present in a partially-matching wildcard rule, because such rules will + // be ignored by cgroupv1. + // + // This is a diversion from cgroupv1, but is necessary to avoid leading + // users into a false sense of security. cgroupv1 will silently(!) ignore + // requests to remove partial exceptions, but we really shouldn't do that. + // + // It may seem like we could just "split" wildcard rules which hit this + // issue, but unfortunately there are 2^32 possible major and minor + // numbers, which would exhaust kernel memory quickly if we did this. Not + // to mention it'd be really slow (the kernel side is implemented as a + // linked-list of exceptions). + for _, partialMeta := range []deviceMeta{ + {node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor}, + {node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard}, + {node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard}, + } { + // This wildcard rule is equivalent to the requested rule, so skip it. + if rule.meta == partialMeta { + continue + } + // Only give an error if the set of permissions overlap. + partialPerms := e.rules[partialMeta] + if !partialPerms.Intersection(rule.perms).IsEmpty() { + return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms) + } + } + + // Subtract all of the permissions listed from the full match rule. If the + // rule didn't exist, all of this is a no-op. + newPerms := e.rules[rule.meta].Difference(rule.perms) + if newPerms.IsEmpty() { + delete(e.rules, rule.meta) + } else { + e.rules[rule.meta] = newPerms + } + // TODO: The actual cgroup code doesn't care if an exception didn't exist + // during removal, so not erroring out here is /accurate/ but quite + // worrying. Maybe we should do additional validation, but again we + // have to worry about backwards-compatibility. + return nil +} + +func (e *Emulator) allow(rule *deviceRule) error { + // This cgroup is configured as a black-list. Reset the entire emulator, + // and put is into black-list mode. + if rule == nil || rule.meta.node == devices.WildcardDevice { + *e = Emulator{ + defaultAllow: true, + rules: nil, + } + return nil + } + + var err error + if e.defaultAllow { + err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception") + } else { + err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception") + } + return err +} + +func (e *Emulator) deny(rule *deviceRule) error { + // This cgroup is configured as a white-list. Reset the entire emulator, + // and put is into white-list mode. + if rule == nil || rule.meta.node == devices.WildcardDevice { + *e = Emulator{ + defaultAllow: false, + rules: nil, + } + return nil + } + + var err error + if e.defaultAllow { + err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception") + } else { + err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception") + } + return err +} + +func (e *Emulator) Apply(rule devices.Rule) error { + if !rule.Type.CanCgroup() { + return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type) + } + + innerRule := &deviceRule{ + meta: deviceMeta{ + node: rule.Type, + major: rule.Major, + minor: rule.Minor, + }, + perms: rule.Permissions, + } + if innerRule.meta.node == devices.WildcardDevice { + innerRule = nil + } + + if rule.Allow { + return e.allow(innerRule) + } + + return e.deny(innerRule) +} + +// EmulatorFromList takes a reader to a "devices.list"-like source, and returns +// a new Emulator that represents the state of the devices cgroup. Note that +// black-list devices cgroups cannot be fully reconstructed, due to limitations +// in the devices cgroup API. Instead, such cgroups are always treated as +// "allow all" cgroups. +func EmulatorFromList(list io.Reader) (*Emulator, error) { + // Normally cgroups are in black-list mode by default, but the way we + // figure out the current mode is whether or not devices.list has an + // allow-all rule. So we default to a white-list, and the existence of an + // "a *:* rwm" entry will tell us otherwise. + e := &Emulator{ + defaultAllow: false, + } + + // Parse the "devices.list". + s := bufio.NewScanner(list) + for s.Scan() { + line := s.Text() + deviceRule, err := parseLine(line) + if err != nil { + return nil, fmt.Errorf("error parsing line %q: %w", line, err) + } + // "devices.list" is an allow list. Note that this means that in + // black-list mode, we have no idea what rules are in play. As a + // result, we need to be very careful in Transition(). + if err := e.allow(deviceRule); err != nil { + return nil, fmt.Errorf("error adding devices.list rule: %w", err) + } + } + if err := s.Err(); err != nil { + return nil, fmt.Errorf("error reading devices.list lines: %w", err) + } + return e, nil +} + +// Transition calculates what is the minimally-disruptive set of rules need to +// be applied to a devices cgroup in order to transition to the given target. +// This means that any already-existing rules will not be applied, and +// disruptive rules (like denying all device access) will only be applied if +// necessary. +// +// This function is the sole reason for all of Emulator -- to allow us +// to figure out how to update a containers' cgroups without causing spurious +// device errors (if possible). +func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) { + var transitionRules []*devices.Rule + oldRules := source.rules + + // If the default policy doesn't match, we need to include a "disruptive" + // rule (either allow-all or deny-all) in order to switch the cgroup to the + // correct default policy. + // + // However, due to a limitation in "devices.list" we cannot be sure what + // deny rules are in place in a black-list cgroup. Thus if the source is a + // black-list we also have to include a disruptive rule. + if source.IsBlacklist() || source.defaultAllow != target.defaultAllow { + transitionRules = append(transitionRules, &devices.Rule{ + Type: 'a', + Major: -1, + Minor: -1, + Permissions: devices.Permissions("rwm"), + Allow: target.defaultAllow, + }) + // The old rules are only relevant if we aren't starting out with a + // disruptive rule. + oldRules = nil + } + + // NOTE: We traverse through the rules in a sorted order so we always write + // the same set of rules (this is to aid testing). + + // First, we create inverse rules for any old rules not in the new set. + // This includes partial-inverse rules for specific permissions. This is a + // no-op if we added a disruptive rule, since oldRules will be empty. + for _, rule := range oldRules.orderedEntries() { + meta, oldPerms := rule.meta, rule.perms + newPerms := target.rules[meta] + droppedPerms := oldPerms.Difference(newPerms) + if !droppedPerms.IsEmpty() { + transitionRules = append(transitionRules, &devices.Rule{ + Type: meta.node, + Major: meta.major, + Minor: meta.minor, + Permissions: droppedPerms, + Allow: target.defaultAllow, + }) + } + } + + // Add any additional rules which weren't in the old set. We happen to + // filter out rules which are present in both sets, though this isn't + // strictly necessary. + for _, rule := range target.rules.orderedEntries() { + meta, newPerms := rule.meta, rule.perms + oldPerms := oldRules[meta] + gainedPerms := newPerms.Difference(oldPerms) + if !gainedPerms.IsEmpty() { + transitionRules = append(transitionRules, &devices.Rule{ + Type: meta.node, + Major: meta.major, + Minor: meta.minor, + Permissions: gainedPerms, + Allow: !target.defaultAllow, + }) + } + } + return transitionRules, nil +} + +// Rules returns the minimum set of rules necessary to convert a *deny-all* +// cgroup to the emulated filter state (note that this is not the same as a +// default cgroupv1 cgroup -- which is allow-all). This is effectively just a +// wrapper around Transition() with the source emulator being an empty cgroup. +func (e *Emulator) Rules() ([]*devices.Rule, error) { + defaultCgroup := &Emulator{defaultAllow: false} + return defaultCgroup.Transition(e) +} + +func wrapErr(err error, text string) error { + if err == nil { + return nil + } + return fmt.Errorf(text+": %w", err) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go new file mode 100644 index 000000000..4e69b35bc --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go @@ -0,0 +1,208 @@ +// Package devicefilter contains eBPF device filter program +// +// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c +// +// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) +// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 +package devicefilter + +import ( + "errors" + "fmt" + "math" + "strconv" + + "github.com/cilium/ebpf/asm" + devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices" + "github.com/opencontainers/runc/libcontainer/devices" + "golang.org/x/sys/unix" +) + +const ( + // license string format is same as kernel MODULE_LICENSE macro + license = "Apache" +) + +// DeviceFilter returns eBPF device filter program and its license string +func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { + // Generate the minimum ruleset for the device rules we are given. While we + // don't care about minimum transitions in cgroupv2, using the emulator + // gives us a guarantee that the behaviour of devices filtering is the same + // as cgroupv1, including security hardenings to avoid misconfiguration + // (such as punching holes in wildcard rules). + emu := new(devicesemulator.Emulator) + for _, rule := range rules { + if err := emu.Apply(*rule); err != nil { + return nil, "", err + } + } + cleanRules, err := emu.Rules() + if err != nil { + return nil, "", err + } + + p := &program{ + defaultAllow: emu.IsBlacklist(), + } + p.init() + + for idx, rule := range cleanRules { + if rule.Type == devices.WildcardDevice { + // We can safely skip over wildcard entries because there should + // only be one (at most) at the very start to instruct cgroupv1 to + // go into allow-list mode. However we do double-check this here. + if idx != 0 || rule.Allow != emu.IsBlacklist() { + return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) + } + continue + } + if rule.Allow == p.defaultAllow { + // There should be no rules which have an action equal to the + // default action, the emulator removes those. + return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) + } + if err := p.appendRule(rule); err != nil { + return nil, "", err + } + } + return p.finalize(), license, nil +} + +type program struct { + insts asm.Instructions + defaultAllow bool + blockID int +} + +func (p *program) init() { + // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 + /* + u32 access_type + u32 major + u32 minor + */ + // R2 <- type (lower 16 bit of u32 access_type at R1[0]) + p.insts = append(p.insts, + asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), + asm.And.Imm32(asm.R2, 0xFFFF)) + + // R3 <- access (upper 16 bit of u32 access_type at R1[0]) + p.insts = append(p.insts, + asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), + // RSh: bitwise shift right + asm.RSh.Imm32(asm.R3, 16)) + + // R4 <- major (u32 major at R1[4]) + p.insts = append(p.insts, + asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) + + // R5 <- minor (u32 minor at R1[8]) + p.insts = append(p.insts, + asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) +} + +// appendRule rule converts an OCI rule to the relevant eBPF block and adds it +// to the in-progress filter program. In order to operate properly, it must be +// called with a "clean" rule list (generated by devices.Emulator.Rules() -- +// with any "a" rules removed). +func (p *program) appendRule(rule *devices.Rule) error { + if p.blockID < 0 { + return errors.New("the program is finalized") + } + + var bpfType int32 + switch rule.Type { + case devices.CharDevice: + bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) + case devices.BlockDevice: + bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) + default: + // We do not permit 'a', nor any other types we don't know about. + return fmt.Errorf("invalid type %q", string(rule.Type)) + } + if rule.Major > math.MaxUint32 { + return fmt.Errorf("invalid major %d", rule.Major) + } + if rule.Minor > math.MaxUint32 { + return fmt.Errorf("invalid minor %d", rule.Major) + } + hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 + hasMinor := rule.Minor >= 0 + bpfAccess := int32(0) + for _, r := range rule.Permissions { + switch r { + case 'r': + bpfAccess |= unix.BPF_DEVCG_ACC_READ + case 'w': + bpfAccess |= unix.BPF_DEVCG_ACC_WRITE + case 'm': + bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD + default: + return fmt.Errorf("unknown device access %v", r) + } + } + // If the access is rwm, skip the check. + hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) + + var ( + blockSym = "block-" + strconv.Itoa(p.blockID) + nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) + prevBlockLastIdx = len(p.insts) - 1 + ) + p.insts = append(p.insts, + // if (R2 != bpfType) goto next + asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), + ) + if hasAccess { + p.insts = append(p.insts, + // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next + asm.Mov.Reg32(asm.R1, asm.R3), + asm.And.Imm32(asm.R1, bpfAccess), + asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), + ) + } + if hasMajor { + p.insts = append(p.insts, + // if (R4 != major) goto next + asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), + ) + } + if hasMinor { + p.insts = append(p.insts, + // if (R5 != minor) goto next + asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), + ) + } + p.insts = append(p.insts, acceptBlock(rule.Allow)...) + // set blockSym to the first instruction we added in this iteration + p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) + p.blockID++ + return nil +} + +func (p *program) finalize() asm.Instructions { + var v int32 + if p.defaultAllow { + v = 1 + } + blockSym := "block-" + strconv.Itoa(p.blockID) + p.insts = append(p.insts, + // R0 <- v + asm.Mov.Imm32(asm.R0, v).Sym(blockSym), + asm.Return(), + ) + p.blockID = -1 + return p.insts +} + +func acceptBlock(accept bool) asm.Instructions { + var v int32 + if accept { + v = 1 + } + return []asm.Instruction{ + // R0 <- v + asm.Mov.Imm32(asm.R0, v), + asm.Return(), + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go new file mode 100644 index 000000000..35b00aaf0 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go @@ -0,0 +1,253 @@ +package ebpf + +import ( + "errors" + "fmt" + "os" + "runtime" + "sync" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/link" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func nilCloser() error { + return nil +} + +func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) { + type bpfAttrQuery struct { + TargetFd uint32 + AttachType uint32 + QueryType uint32 + AttachFlags uint32 + ProgIds uint64 // __aligned_u64 + ProgCnt uint32 + } + + // Currently you can only have 64 eBPF programs attached to a cgroup. + size := 64 + retries := 0 + for retries < 10 { + progIds := make([]uint32, size) + query := bpfAttrQuery{ + TargetFd: uint32(dirFd), + AttachType: uint32(unix.BPF_CGROUP_DEVICE), + ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))), + ProgCnt: uint32(len(progIds)), + } + + // Fetch the list of program ids. + _, _, errno := unix.Syscall(unix.SYS_BPF, + uintptr(unix.BPF_PROG_QUERY), + uintptr(unsafe.Pointer(&query)), + unsafe.Sizeof(query)) + size = int(query.ProgCnt) + runtime.KeepAlive(query) + if errno != 0 { + // On ENOSPC we get the correct number of programs. + if errno == unix.ENOSPC { + retries++ + continue + } + return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno) + } + + // Convert the ids to program handles. + progIds = progIds[:size] + programs := make([]*ebpf.Program, 0, len(progIds)) + for _, progId := range progIds { + program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId)) + if err != nil { + // We skip over programs that give us -EACCES or -EPERM. This + // is necessary because there may be BPF programs that have + // been attached (such as with --systemd-cgroup) which have an + // LSM label that blocks us from interacting with the program. + // + // Because additional BPF_CGROUP_DEVICE programs only can add + // restrictions, there's no real issue with just ignoring these + // programs (and stops runc from breaking on distributions with + // very strict SELinux policies). + if errors.Is(err, os.ErrPermission) { + logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err) + continue + } + return nil, fmt.Errorf("cannot fetch program from id: %w", err) + } + programs = append(programs, program) + } + runtime.KeepAlive(progIds) + return programs, nil + } + + return nil, errors.New("could not get complete list of CGROUP_DEVICE programs") +} + +var ( + haveBpfProgReplaceBool bool + haveBpfProgReplaceOnce sync.Once +) + +// Loosely based on the BPF_F_REPLACE support check in +// https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go. +// +// TODO: move this logic to cilium/ebpf +func haveBpfProgReplace() bool { + haveBpfProgReplaceOnce.Do(func() { + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Type: ebpf.CGroupDevice, + License: "MIT", + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + }) + if err != nil { + logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err) + return + } + defer prog.Close() + + devnull, err := os.Open("/dev/null") + if err != nil { + logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err) + return + } + defer devnull.Close() + + // We know that we have BPF_PROG_ATTACH since we can load + // BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL + // we know that the feature isn't present. + err = link.RawAttachProgram(link.RawAttachProgramOptions{ + // We rely on this fd being checked after attachFlags. + Target: int(devnull.Fd()), + // Attempt to "replace" bad fds with this program. + Program: prog, + Attach: ebpf.AttachCGroupDevice, + Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE, + }) + if errors.Is(err, unix.EINVAL) { + // not supported + return + } + // attach_flags test succeeded. + if !errors.Is(err, unix.EBADF) { + logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err) + } + haveBpfProgReplaceBool = true + }) + return haveBpfProgReplaceBool +} + +// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/ directory. +// +// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . +// +// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 +func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) { + // Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167). + // This limit is not inherited into the container. + memlockLimit := &unix.Rlimit{ + Cur: unix.RLIM_INFINITY, + Max: unix.RLIM_INFINITY, + } + _ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit) + + // Get the list of existing programs. + oldProgs, err := findAttachedCgroupDeviceFilters(dirFd) + if err != nil { + return nilCloser, err + } + useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1 + + // Generate new program. + spec := &ebpf.ProgramSpec{ + Type: ebpf.CGroupDevice, + Instructions: insts, + License: license, + } + prog, err := ebpf.NewProgram(spec) + if err != nil { + return nilCloser, err + } + + // If there is only one old program, we can just replace it directly. + var ( + replaceProg *ebpf.Program + attachFlags uint32 = unix.BPF_F_ALLOW_MULTI + ) + if useReplaceProg { + replaceProg = oldProgs[0] + attachFlags |= unix.BPF_F_REPLACE + } + err = link.RawAttachProgram(link.RawAttachProgramOptions{ + Target: dirFd, + Program: prog, + Replace: replaceProg, + Attach: ebpf.AttachCGroupDevice, + Flags: attachFlags, + }) + if err != nil { + return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err) + } + closer := func() error { + err = link.RawDetachProgram(link.RawDetachProgramOptions{ + Target: dirFd, + Program: prog, + Attach: ebpf.AttachCGroupDevice, + }) + if err != nil { + return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err) + } + // TODO: Should we attach the old filters back in this case? Otherwise + // we fail-open on a security feature, which is a bit scary. + return nil + } + if !useReplaceProg { + logLevel := logrus.DebugLevel + // If there was more than one old program, give a warning (since this + // really shouldn't happen with runc-managed cgroups) and then detach + // all the old programs. + if len(oldProgs) > 1 { + // NOTE: Ideally this should be a warning but it turns out that + // systemd-managed cgroups trigger this warning (apparently + // systemd doesn't delete old non-systemd programs when + // setting properties). + logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs)) + logLevel = logrus.InfoLevel + } + for idx, oldProg := range oldProgs { + // Output some extra debug info. + if info, err := oldProg.Info(); err == nil { + fields := logrus.Fields{ + "type": info.Type.String(), + "tag": info.Tag, + "name": info.Name, + } + if id, ok := info.ID(); ok { + fields["id"] = id + } + if runCount, ok := info.RunCount(); ok { + fields["run_count"] = runCount + } + if runtime, ok := info.Runtime(); ok { + fields["runtime"] = runtime.String() + } + logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx) + } + err = link.RawDetachProgram(link.RawDetachProgramOptions{ + Target: dirFd, + Program: oldProg, + Attach: ebpf.AttachCGroupDevice, + }) + if err != nil { + return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err) + } + } + } + return closer, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go index 0bf3d9deb..4527a70eb 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go @@ -1,11 +1,20 @@ package fs import ( + "bytes" + "errors" + "reflect" + "github.com/opencontainers/runc/libcontainer/cgroups" + cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/opencontainers/runc/libcontainer/userns" ) -type DevicesGroup struct{} +type DevicesGroup struct { + TestingSkipFinalCheck bool +} func (s *DevicesGroup) Name() string { return "devices" @@ -24,14 +33,75 @@ func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error { return apply(path, pid) } +func loadEmulator(path string) (*cgroupdevices.Emulator, error) { + list, err := cgroups.ReadFile(path, "devices.list") + if err != nil { + return nil, err + } + return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list)) +} + +func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) { + // This defaults to a white-list -- which is what we want! + emu := &cgroupdevices.Emulator{} + for _, rule := range rules { + if err := emu.Apply(*rule); err != nil { + return nil, err + } + } + return emu, nil +} + func (s *DevicesGroup) Set(path string, r *configs.Resources) error { - if cgroups.DevicesSetV1 == nil { - if len(r.Devices) == 0 { - return nil + if userns.RunningInUserNS() || r.SkipDevices { + return nil + } + + // Generate two emulators, one for the current state of the cgroup and one + // for the requested state by the user. + current, err := loadEmulator(path) + if err != nil { + return err + } + target, err := buildEmulator(r.Devices) + if err != nil { + return err + } + + // Compute the minimal set of transition rules needed to achieve the + // requested state. + transitionRules, err := current.Transition(target) + if err != nil { + return err + } + for _, rule := range transitionRules { + file := "devices.deny" + if rule.Allow { + file = "devices.allow" + } + if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil { + return err } - return cgroups.ErrDevicesUnsupported } - return cgroups.DevicesSetV1(path, r) + + // Final safety check -- ensure that the resulting state is what was + // requested. This is only really correct for white-lists, but for + // black-lists we can at least check that the cgroup is in the right mode. + // + // This safety-check is skipped for the unit tests because we cannot + // currently mock devices.list correctly. + if !s.TestingSkipFinalCheck { + currentAfter, err := loadEmulator(path) + if err != nil { + return err + } + if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) { + return errors.New("resulting devices cgroup doesn't precisely match target") + } else if target.IsBlacklist() != currentAfter.IsBlacklist() { + return errors.New("resulting devices cgroup doesn't match target mode") + } + } + return nil } func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go index be4dcc341..9e2f0ec04 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go @@ -28,6 +28,7 @@ var subsystems = []subsystem{ &FreezerGroup{}, &RdmaGroup{}, &NameGroup{GroupName: "name=systemd", Join: true}, + &NameGroup{GroupName: "misc", Join: true}, } var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") @@ -182,7 +183,7 @@ func (m *manager) Set(r *configs.Resources) error { if err := sys.Set(path, r); err != nil { // When rootless is true, errors from the device subsystem // are ignored, as it is really not expected to work. - if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) { + if m.cgroups.Rootless && sys.Name() == "devices" { continue } // However, errors from other subsystems are not ignored. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go new file mode 100644 index 000000000..0d2345607 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go @@ -0,0 +1,75 @@ +package fs2 + +import ( + "fmt" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/cgroups/ebpf" + "github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/opencontainers/runc/libcontainer/userns" +) + +func isRWM(perms devices.Permissions) bool { + var r, w, m bool + for _, perm := range perms { + switch perm { + case 'r': + r = true + case 'w': + w = true + case 'm': + m = true + } + } + return r && w && m +} + +// This is similar to the logic applied in crun for handling errors from bpf(2) +// . +func canSkipEBPFError(r *configs.Resources) bool { + // If we're running in a user namespace we can ignore eBPF rules because we + // usually cannot use bpf(2), as well as rootless containers usually don't + // have the necessary privileges to mknod(2) device inodes or access + // host-level instances (though ideally we would be blocking device access + // for rootless containers anyway). + if userns.RunningInUserNS() { + return true + } + + // We cannot ignore an eBPF load error if any rule if is a block rule or it + // doesn't permit all access modes. + // + // NOTE: This will sometimes trigger in cases where access modes are split + // between different rules but to handle this correctly would require + // using ".../libcontainer/cgroup/devices".Emulator. + for _, dev := range r.Devices { + if !dev.Allow || !isRWM(dev.Permissions) { + return false + } + } + return true +} + +func setDevices(dirPath string, r *configs.Resources) error { + if r.SkipDevices { + return nil + } + insts, license, err := devicefilter.DeviceFilter(r.Devices) + if err != nil { + return err + } + dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600) + if err != nil { + return fmt.Errorf("cannot get dir FD for %s", dirPath) + } + defer unix.Close(dirFD) + if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { + if !canSkipEBPFError(r) { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go index d5208d778..492778e31 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go @@ -175,10 +175,8 @@ func (m *manager) Set(r *configs.Resources) error { // When rootless is true, errors from the device subsystem are ignored because it is really not expected to work. // However, errors from other subsystems are not ignored. // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" - if err := setDevices(m.dirPath, r); err != nil { - if !m.config.Rootless || errors.Is(err, cgroups.ErrDevicesUnsupported) { - return err - } + if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless { + return err } // cpuset (since kernel 5.0) if err := setCpuset(m.dirPath, r); err != nil { @@ -203,16 +201,6 @@ func (m *manager) Set(r *configs.Resources) error { return nil } -func setDevices(dirPath string, r *configs.Resources) error { - if cgroups.DevicesSetV2 == nil { - if len(r.Devices) > 0 { - return cgroups.ErrDevicesUnsupported - } - return nil - } - return cgroups.DevicesSetV2(dirPath, r) -} - func (m *manager) setUnified(res map[string]string) error { for k, v := range res { if strings.Contains(k, "/") { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index b32af4ee5..fc4ae44a4 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -162,8 +162,10 @@ func readProcsFile(dir string) ([]int, error) { // ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup // or /proc//cgroup, into a map of subsystems to cgroup paths, e.g. -// "cpu": "/user.slice/user-1000.slice" -// "pids": "/user.slice/user-1000.slice" +// +// "cpu": "/user.slice/user-1000.slice" +// "pids": "/user.slice/user-1000.slice" +// // etc. // // Note that for cgroup v2 unified hierarchy, there are no per-controller diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go index 865344f99..fa195bf90 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go @@ -2,8 +2,8 @@ package configs import "fmt" -// BlockIODevice holds major:minor format supported in blkio cgroup. -type BlockIODevice struct { +// blockIODevice holds major:minor format supported in blkio cgroup +type blockIODevice struct { // Major is the device's major number Major int64 `json:"major"` // Minor is the device's minor number @@ -12,7 +12,7 @@ type BlockIODevice struct { // WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair type WeightDevice struct { - BlockIODevice + blockIODevice // Weight is the bandwidth rate for the device, range is from 10 to 1000 Weight uint16 `json:"weight"` // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only @@ -41,7 +41,7 @@ func (wd *WeightDevice) LeafWeightString() string { // ThrottleDevice struct holds a `major:minor rate_per_second` pair type ThrottleDevice struct { - BlockIODevice + blockIODevice // Rate is the IO rate limit per cgroup per device Rate uint64 `json:"rate"` } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index 7cf2fb657..c1b4a0041 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -83,6 +83,9 @@ type Syscall struct { Args []*Arg `json:"args"` } +// TODO Windows. Many of these fields should be factored out into those parts +// which are common across platforms, and those which are platform specific. + // Config defines configuration options for executing a process inside a contained environment. type Config struct { // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go index b4c616d55..784c61820 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -35,6 +35,12 @@ type Mount struct { // Extensions are additional flags that are specific to runc. Extensions int `json:"extensions"` + + // Optional Command to be run before Source is mounted. + PremountCmds []Command `json:"premount_cmds"` + + // Optional Command to be run after Source is mounted. + PostmountCmds []Command `json:"postmount_cmds"` } func (m *Mount) IsBind() bool { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go index 2473c5ead..a1e216683 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go @@ -280,13 +280,13 @@ func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath // found in any entry in passwd and group respectively. // // Examples of valid user specifications are: -// * "" -// * "user" -// * "uid" -// * "user:group" -// * "uid:gid -// * "user:gid" -// * "uid:group" +// - "" +// - "user" +// - "uid" +// - "user:group" +// - "uid:gid +// - "user:gid" +// - "uid:group" // // It should be noted that if you specify a numeric user or group id, they will // not be evaluated as usernames (only the metadata will be filled). So attempting diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go index dbd435341..6b9fc3435 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go @@ -132,16 +132,19 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { return fn(procfd) } -// SearchLabels searches through a list of key=value pairs for a given key, -// returning its value, and the binary flag telling whether the key exist. -func SearchLabels(labels []string, key string) (string, bool) { - key += "=" - for _, s := range labels { - if strings.HasPrefix(s, key) { - return s[len(key):], true +// SearchLabels searches a list of key-value pairs for the provided key and +// returns the corresponding value. The pairs must be separated with '='. +func SearchLabels(labels []string, query string) string { + for _, l := range labels { + parts := strings.SplitN(l, "=", 2) + if len(parts) < 2 { + continue + } + if parts[0] == query { + return parts[1] } } - return "", false + return "" } // Annotations returns the bundle path and user defined annotations from the diff --git a/vendor/modules.txt b/vendor/modules.txt index 78b6a4849..ba144f6f3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -56,6 +56,15 @@ github.com/asaskevich/govalidator # github.com/chzyer/readline v1.5.1 ## explicit; go 1.15 github.com/chzyer/readline +# github.com/cilium/ebpf v0.9.1 +## explicit; go 1.17 +github.com/cilium/ebpf +github.com/cilium/ebpf/asm +github.com/cilium/ebpf/btf +github.com/cilium/ebpf/internal +github.com/cilium/ebpf/internal/sys +github.com/cilium/ebpf/internal/unix +github.com/cilium/ebpf/link # github.com/container-orchestrated-devices/container-device-interface v0.6.0 ## explicit; go 1.17 github.com/container-orchestrated-devices/container-device-interface/internal/multierror @@ -487,10 +496,13 @@ github.com/opencontainers/go-digest ## explicit; go 1.18 github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.1.7 => github.com/opencontainers/runc v1.1.1-0.20220617142545-8b9452f75cbc +# github.com/opencontainers/runc v1.1.8 => github.com/opencontainers/runc v1.1.8 ## explicit; go 1.17 github.com/opencontainers/runc/libcontainer/apparmor github.com/opencontainers/runc/libcontainer/cgroups +github.com/opencontainers/runc/libcontainer/cgroups/devices +github.com/opencontainers/runc/libcontainer/cgroups/ebpf +github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter github.com/opencontainers/runc/libcontainer/cgroups/fs github.com/opencontainers/runc/libcontainer/cgroups/fs2 github.com/opencontainers/runc/libcontainer/cgroups/fscommon @@ -837,4 +849,4 @@ gopkg.in/yaml.v3 # sigs.k8s.io/yaml v1.3.0 ## explicit; go 1.12 sigs.k8s.io/yaml -# github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.1-0.20220617142545-8b9452f75cbc +# github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.8