From 603c08c78129bcd455373b66477658e86926e891 Mon Sep 17 00:00:00 2001 From: Tim Ramlot <42113979+inteon@users.noreply.github.com> Date: Mon, 21 Aug 2023 14:39:46 +0200 Subject: [PATCH] add support for setting POSIX capabilities on the binary generated by Go Signed-off-by: Tim Ramlot <42113979+inteon@users.noreply.github.com> --- pkg/build/gobuild.go | 52 +++++- pkg/build/options.go | 8 + pkg/build/posixcapability.go | 307 ++++++++++++++++++++++++++++++++++ pkg/commands/options/build.go | 3 + pkg/commands/resolver.go | 12 ++ 5 files changed, 378 insertions(+), 4 deletions(-) create mode 100644 pkg/build/posixcapability.go diff --git a/pkg/build/gobuild.go b/pkg/build/gobuild.go index f289e6cf85..1b7ade3977 100644 --- a/pkg/build/gobuild.go +++ b/pkg/build/gobuild.go @@ -18,6 +18,7 @@ import ( "archive/tar" "bytes" "context" + "encoding/binary" "errors" "fmt" gb "go/build" @@ -82,6 +83,7 @@ type gobuild struct { disableOptimizations bool trimpath bool buildConfigs map[string]Config + capabilities []Cap platformMatcher *platformMatcher dir string labels map[string]string @@ -104,6 +106,7 @@ type gobuildOpener struct { disableOptimizations bool trimpath bool buildConfigs map[string]Config + capabilities []Cap platforms []string labels map[string]string dir string @@ -132,6 +135,7 @@ func (gbo *gobuildOpener) Open() (Interface, error) { disableOptimizations: gbo.disableOptimizations, trimpath: gbo.trimpath, buildConfigs: gbo.buildConfigs, + capabilities: gbo.capabilities, labels: gbo.labels, dir: gbo.dir, platformMatcher: matcher, @@ -489,7 +493,7 @@ func appFilename(importpath string) string { // owner: BUILTIN/Users group: BUILTIN/Users ($sddlValue="O:BUG:BU") const userOwnerAndGroupSID = "AQAAgBQAAAAkAAAAAAAAAAAAAAABAgAAAAAABSAAAAAhAgAAAQIAAAAAAAUgAAAAIQIAAA==" -func tarBinary(name, binary string, platform *v1.Platform) (*bytes.Buffer, error) { +func tarBinary(name, binary string, platform *v1.Platform, caps []Cap) (*bytes.Buffer, error) { buf := bytes.NewBuffer(nil) tw := tar.NewWriter(buf) defer tw.Close() @@ -544,6 +548,12 @@ func tarBinary(name, binary string, platform *v1.Platform) (*bytes.Buffer, error header.PAXRecords = map[string]string{ "MSWINDOWS.rawsd": userOwnerAndGroupSID, } + } else if len(caps) > 0 { + // see: https://github.com/testwill/moby/blob/master/pkg/archive/archive.go#L503-L504 + header.PAXRecords = map[string]string{ + "SCHILY.xattr.security.capability": string(capabilityValue(caps)), + } + header.Format = tar.FormatPAX } // write the header to the tarball archive if err := tw.WriteHeader(header); err != nil { @@ -557,6 +567,40 @@ func tarBinary(name, binary string, platform *v1.Platform) (*bytes.Buffer, error return buf, nil } +func capabilityValue(caps []Cap) []byte { + vfsCapVer2 := uint32(0x02000000) + vfsCapFlageffective := uint32(0x000001) + + // This is the full encoded capbility set for CAP_IPC_LOCK + // 02 00 00 01 (version 2, effective) + // XX XX XX XX (permitted_v1) + // 00 00 00 00 (inheritable_v1: 0) + // XX XX XX XX (permitted_v2) + // 00 00 00 00 (inheritable_v2: 0) + + permitted_v1 := uint32(0) + inheritable_v1 := uint32(0) + permitted_v2 := uint32(0) + inheritable_v2 := uint32(0) + + for _, cap := range caps { + if cap > 32 { + permitted_v2 |= 1 << (cap - 32) + } else { + permitted_v1 |= 1 << cap + } + } + + capability := make([]byte, 0, 20) + capability = binary.LittleEndian.AppendUint32(capability, vfsCapVer2|vfsCapFlageffective) + capability = binary.LittleEndian.AppendUint32(capability, permitted_v1) + capability = binary.LittleEndian.AppendUint32(capability, inheritable_v1) + capability = binary.LittleEndian.AppendUint32(capability, permitted_v2) + capability = binary.LittleEndian.AppendUint32(capability, inheritable_v2) + + return capability +} + func (g *gobuild) kodataPath(ref reference) (string, error) { dir := filepath.Clean(g.dir) if dir == "." { @@ -866,7 +910,7 @@ func (g *gobuild) buildOne(ctx context.Context, refStr string, base v1.Image, pl appPath := path.Join(appDir, appFileName) miss := func() (v1.Layer, error) { - return buildLayer(appPath, file, platform, layerMediaType) + return buildLayer(appPath, file, platform, layerMediaType, g.capabilities) } binaryLayer, err := g.cache.get(ctx, file, miss) @@ -949,9 +993,9 @@ func (g *gobuild) buildOne(ctx context.Context, refStr string, base v1.Image, pl return si, nil } -func buildLayer(appPath, file string, platform *v1.Platform, layerMediaType types.MediaType) (v1.Layer, error) { +func buildLayer(appPath, file string, platform *v1.Platform, layerMediaType types.MediaType, caps []Cap) (v1.Layer, error) { // Construct a tarball with the binary and produce a layer. - binaryLayerBuf, err := tarBinary(appPath, file, platform) + binaryLayerBuf, err := tarBinary(appPath, file, platform, caps) if err != nil { return nil, fmt.Errorf("tarring binary: %w", err) } diff --git a/pkg/build/options.go b/pkg/build/options.go index 4cedf4d0fb..5aaf5a37c9 100644 --- a/pkg/build/options.go +++ b/pkg/build/options.go @@ -177,3 +177,11 @@ func WithSBOMDir(dir string) Option { return nil } } + +// WithPOSIXCapabilities is a functional option for overriding the POSIX capabilities encoded in the binary file. +func WithPOSIXCapabilities(capabilities []Cap) Option { + return func(gbo *gobuildOpener) error { + gbo.capabilities = capabilities + return nil + } +} diff --git a/pkg/build/posixcapability.go b/pkg/build/posixcapability.go new file mode 100644 index 0000000000..3ac3c676f4 --- /dev/null +++ b/pkg/build/posixcapability.go @@ -0,0 +1,307 @@ +package build + +import "strings" + +type Cap int + +// POSIX-draft defined capabilities. +const ( + // In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this + // overrides the restriction of changing file ownership and group + // ownership. + CAP_CHOWN = Cap(0) + + // Override all DAC access, including ACL execute access if + // [_POSIX_ACL] is defined. Excluding DAC access covered by + // CAP_LINUX_IMMUTABLE. + CAP_DAC_OVERRIDE = Cap(1) + + // Overrides all DAC restrictions regarding read and search on files + // and directories, including ACL restrictions if [_POSIX_ACL] is + // defined. Excluding DAC access covered by CAP_LINUX_IMMUTABLE. + CAP_DAC_READ_SEARCH = Cap(2) + + // Overrides all restrictions about allowed operations on files, where + // file owner ID must be equal to the user ID, except where CAP_FSETID + // is applicable. It doesn't override MAC and DAC restrictions. + CAP_FOWNER = Cap(3) + + // Overrides the following restrictions that the effective user ID + // shall match the file owner ID when setting the S_ISUID and S_ISGID + // bits on that file; that the effective group ID (or one of the + // supplementary group IDs) shall match the file owner ID when setting + // the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are + // cleared on successful return from chown(2) (not implemented). + CAP_FSETID = Cap(4) + + // Overrides the restriction that the real or effective user ID of a + // process sending a signal must match the real or effective user ID + // of the process receiving the signal. + CAP_KILL = Cap(5) + + // Allows setgid(2) manipulation + // Allows setgroups(2) + // Allows forged gids on socket credentials passing. + CAP_SETGID = Cap(6) + + // Allows set*uid(2) manipulation (including fsuid). + // Allows forged pids on socket credentials passing. + CAP_SETUID = Cap(7) + + // Linux-specific capabilities + + // Without VFS support for capabilities: + // Transfer any capability in your permitted set to any pid, + // remove any capability in your permitted set from any pid + // With VFS support for capabilities (neither of above, but) + // Add any capability from current's capability bounding set + // to the current process' inheritable set + // Allow taking bits out of capability bounding set + // Allow modification of the securebits for a process + CAP_SETPCAP = Cap(8) + + // Allow modification of S_IMMUTABLE and S_APPEND file attributes + CAP_LINUX_IMMUTABLE = Cap(9) + + // Allows binding to TCP/UDP sockets below 1024 + // Allows binding to ATM VCIs below 32 + CAP_NET_BIND_SERVICE = Cap(10) + + // Allow broadcasting, listen to multicast + CAP_NET_BROADCAST = Cap(11) + + // Allow interface configuration + // Allow administration of IP firewall, masquerading and accounting + // Allow setting debug option on sockets + // Allow modification of routing tables + // Allow setting arbitrary process / process group ownership on + // sockets + // Allow binding to any address for transparent proxying (also via NET_RAW) + // Allow setting TOS (type of service) + // Allow setting promiscuous mode + // Allow clearing driver statistics + // Allow multicasting + // Allow read/write of device-specific registers + // Allow activation of ATM control sockets + CAP_NET_ADMIN = Cap(12) + + // Allow use of RAW sockets + // Allow use of PACKET sockets + // Allow binding to any address for transparent proxying (also via NET_ADMIN) + CAP_NET_RAW = Cap(13) + + // Allow locking of shared memory segments + // Allow mlock and mlockall (which doesn't really have anything to do + // with IPC) + CAP_IPC_LOCK = Cap(14) + + // Override IPC ownership checks + CAP_IPC_OWNER = Cap(15) + + // Insert and remove kernel modules - modify kernel without limit + CAP_SYS_MODULE = Cap(16) + + // Allow ioperm/iopl access + // Allow sending USB messages to any device via /proc/bus/usb + CAP_SYS_RAWIO = Cap(17) + + // Allow use of chroot() + CAP_SYS_CHROOT = Cap(18) + + // Allow ptrace() of any process + CAP_SYS_PTRACE = Cap(19) + + // Allow configuration of process accounting + CAP_SYS_PACCT = Cap(20) + + // Allow configuration of the secure attention key + // Allow administration of the random device + // Allow examination and configuration of disk quotas + // Allow setting the domainname + // Allow setting the hostname + // Allow calling bdflush() + // Allow mount() and umount(), setting up new smb connection + // Allow some autofs root ioctls + // Allow nfsservctl + // Allow VM86_REQUEST_IRQ + // Allow to read/write pci config on alpha + // Allow irix_prctl on mips (setstacksize) + // Allow flushing all cache on m68k (sys_cacheflush) + // Allow removing semaphores + // Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores + // and shared memory + // Allow locking/unlocking of shared memory segment + // Allow turning swap on/off + // Allow forged pids on socket credentials passing + // Allow setting readahead and flushing buffers on block devices + // Allow setting geometry in floppy driver + // Allow turning DMA on/off in xd driver + // Allow administration of md devices (mostly the above, but some + // extra ioctls) + // Allow tuning the ide driver + // Allow access to the nvram device + // Allow administration of apm_bios, serial and bttv (TV) device + // Allow manufacturer commands in isdn CAPI support driver + // Allow reading non-standardized portions of pci configuration space + // Allow DDI debug ioctl on sbpcd driver + // Allow setting up serial ports + // Allow sending raw qic-117 commands + // Allow enabling/disabling tagged queuing on SCSI controllers and sending + // arbitrary SCSI commands + // Allow setting encryption key on loopback filesystem + // Allow setting zone reclaim policy + CAP_SYS_ADMIN = Cap(21) + + // Allow use of reboot() + CAP_SYS_BOOT = Cap(22) + + // Allow raising priority and setting priority on other (different + // UID) processes + // Allow use of FIFO and round-robin (realtime) scheduling on own + // processes and setting the scheduling algorithm used by another + // process. + // Allow setting cpu affinity on other processes + CAP_SYS_NICE = Cap(23) + + // Override resource limits. Set resource limits. + // Override quota limits. + // Override reserved space on ext2 filesystem + // Modify data journaling mode on ext3 filesystem (uses journaling + // resources) + // NOTE: ext2 honors fsuid when checking for resource overrides, so + // you can override using fsuid too + // Override size restrictions on IPC message queues + // Allow more than 64hz interrupts from the real-time clock + // Override max number of consoles on console allocation + // Override max number of keymaps + CAP_SYS_RESOURCE = Cap(24) + + // Allow manipulation of system clock + // Allow irix_stime on mips + // Allow setting the real-time clock + CAP_SYS_TIME = Cap(25) + + // Allow configuration of tty devices + // Allow vhangup() of tty + CAP_SYS_TTY_CONFIG = Cap(26) + + // Allow the privileged aspects of mknod() + CAP_MKNOD = Cap(27) + + // Allow taking of leases on files + CAP_LEASE = Cap(28) + + CAP_AUDIT_WRITE = Cap(29) + CAP_AUDIT_CONTROL = Cap(30) + CAP_SETFCAP = Cap(31) + + // Override MAC access. + // The base kernel enforces no MAC policy. + // An LSM may enforce a MAC policy, and if it does and it chooses + // to implement capability based overrides of that policy, this is + // the capability it should use to do so. + CAP_MAC_OVERRIDE = Cap(32) + + // Allow MAC configuration or state changes. + // The base kernel requires no MAC configuration. + // An LSM may enforce a MAC policy, and if it does and it chooses + // to implement capability based checks on modifications to that + // policy or the data required to maintain it, this is the + // capability it should use to do so. + CAP_MAC_ADMIN = Cap(33) + + // Allow configuring the kernel's syslog (printk behaviour) + CAP_SYSLOG = Cap(34) + + // Allow triggering something that will wake the system + CAP_WAKE_ALARM = Cap(35) + + // Allow preventing system suspends + CAP_BLOCK_SUSPEND = Cap(36) + + // Allow reading audit messages from the kernel + CAP_AUDIT_READ = Cap(37) +) + +func CapFromString(value string) Cap { + switch strings.ToUpper(value) { + case "CAP_CHOWN": + return CAP_CHOWN // 0 + case "CAP_DAC_OVERRIDE": + return CAP_DAC_OVERRIDE // 1 + case "CAP_DAC_READ_SEARCH": + return CAP_DAC_READ_SEARCH // 2 + case "CAP_FOWNER": + return CAP_FOWNER // 3 + case "CAP_FSETID": + return CAP_FSETID // 4 + case "CAP_KILL": + return CAP_KILL // 5 + case "CAP_SETGID": + return CAP_SETGID // 6 + case "CAP_SETUID": + return CAP_SETUID // 7 + case "CAP_SETPCAP": + return CAP_SETPCAP // 8 + case "CAP_LINUX_IMMUTABLE": + return CAP_LINUX_IMMUTABLE // 9 + case "CAP_NET_BIND_SERVICE": + return CAP_NET_BIND_SERVICE // 10 + case "CAP_NET_BROADCAST": + return CAP_NET_BROADCAST // 11 + case "CAP_NET_ADMIN": + return CAP_NET_ADMIN // 12 + case "CAP_NET_RAW": + return CAP_NET_RAW // 13 + case "CAP_IPC_LOCK": + return CAP_IPC_LOCK // 14 + case "CAP_IPC_OWNER": + return CAP_IPC_OWNER // 15 + case "CAP_SYS_MODULE": + return CAP_SYS_MODULE // 16 + case "CAP_SYS_RAWIO": + return CAP_SYS_RAWIO // 17 + case "CAP_SYS_CHROOT": + return CAP_SYS_CHROOT // 18 + case "CAP_SYS_PTRACE": + return CAP_SYS_PTRACE // 19 + case "CAP_SYS_PACCT": + return CAP_SYS_PACCT // 20 + case "CAP_SYS_ADMIN": + return CAP_SYS_ADMIN // 21 + case "CAP_SYS_BOOT": + return CAP_SYS_BOOT // 22 + case "CAP_SYS_NICE": + return CAP_SYS_NICE // 23 + case "CAP_SYS_RESOURCE": + return CAP_SYS_RESOURCE // 24 + case "CAP_SYS_TIME": + return CAP_SYS_TIME // 25 + case "CAP_SYS_TTY_CONFIG": + return CAP_SYS_TTY_CONFIG // 26 + case "CAP_MKNOD": + return CAP_MKNOD // 27 + case "CAP_LEASE": + return CAP_LEASE // 28 + case "CAP_AUDIT_WRITE": + return CAP_AUDIT_WRITE // 29 + case "CAP_AUDIT_CONTROL": + return CAP_AUDIT_CONTROL // 30 + case "CAP_SETFCAP": + return CAP_SETFCAP // 31 + case "CAP_MAC_OVERRIDE": + return CAP_MAC_OVERRIDE // 32 + case "CAP_MAC_ADMIN": + return CAP_MAC_ADMIN // 33 + case "CAP_SYSLOG": + return CAP_SYSLOG // 34 + case "CAP_WAKE_ALARM": + return CAP_WAKE_ALARM // 35 + case "CAP_BLOCK_SUSPEND": + return CAP_BLOCK_SUSPEND // 36 + case "CAP_AUDIT_READ": + return CAP_AUDIT_READ // 37 + default: + return -1 + } +} diff --git a/pkg/commands/options/build.go b/pkg/commands/options/build.go index a16c1033ad..6006bbffaf 100644 --- a/pkg/commands/options/build.go +++ b/pkg/commands/options/build.go @@ -55,6 +55,7 @@ type BuildOptions struct { SBOMDir string Platforms []string Labels []string + POSIXCapabilities []string // UserAgent enables overriding the default value of the `User-Agent` HTTP // request header used when retrieving the base image. UserAgent string @@ -84,6 +85,8 @@ func AddBuildOptions(cmd *cobra.Command, bo *BuildOptions) { "Which platform to use when pulling a multi-platform base. Format: all | [/[/]][,platform]*") cmd.Flags().StringSliceVar(&bo.Labels, "image-label", []string{}, "Which labels (key=value) to add to the image.") + cmd.Flags().StringSliceVar(&bo.POSIXCapabilities, "posix-capabilities", []string{}, + "Which POSIX capabilities to set on the binary. Eg. CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER") bo.Trimpath = true } diff --git a/pkg/commands/resolver.go b/pkg/commands/resolver.go index b9dc43414d..d644633d31 100644 --- a/pkg/commands/resolver.go +++ b/pkg/commands/resolver.go @@ -126,6 +126,18 @@ func gobuildOptions(bo *options.BuildOptions) ([]build.Option, error) { opts = append(opts, build.WithSBOMDir(bo.SBOMDir)) } + if bo.POSIXCapabilities != nil { + caps := make([]build.Cap, len(bo.POSIXCapabilities)) + for i, c := range bo.POSIXCapabilities { + v := build.CapFromString(c) + if v < 0 { + return nil, fmt.Errorf("invalid POSIX capability: %q", c) + } + caps[i] = v + } + opts = append(opts, build.WithPOSIXCapabilities(caps)) + } + return opts, nil }