Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[testing] #4327

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/moby/sys/mountinfo v0.7.1
github.com/moby/sys/user v0.1.0
github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers/runtime-spec v1.2.0
github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95
github.com/opencontainers/selinux v1.11.0
github.com/seccomp/libseccomp-golang v0.10.0
github.com/sirupsen/logrus v1.9.3
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ github.com/moby/sys/user v0.1.0 h1:WmZ93f5Ux6het5iituh9x2zAG7NFY9Aqi49jjE1PaQg=
github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU=
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 h1:Ghl8Z3l+yPQUDSxAp7Kg7fJLRNNXjOsR6ooDcca7PjU=
github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand Down
5 changes: 5 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ type Config struct {

// IOPriority is the container's I/O priority.
IOPriority *IOPriority `json:"io_priority,omitempty"`

// ExecCPUAffinity is CPU affinity for a non-init process to be run in the container.
ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
Expand Down Expand Up @@ -300,6 +303,8 @@ type (
Hooks map[HookName]HookList
)

type CPUAffinity = specs.CPUAffinity

const (
// Prestart commands are executed after the container namespaces are created,
// but before the user supplied command is executed from init.
Expand Down
24 changes: 24 additions & 0 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,9 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
}

func setupScheduler(config *configs.Config) error {
if config.Scheduler == nil {
return nil
}
attr, err := configs.ToSchedAttr(config.Scheduler)
if err != nil {
return err
Expand All @@ -689,6 +692,27 @@ func setupScheduler(config *configs.Config) error {
return nil
}

func setIOPriority(ioprio *configs.IOPriority) error {
const ioprioWhoPgrp = 1

if ioprio == nil {
return nil
}
class, ok := configs.IOPrioClassMapping[ioprio.Class]
if !ok {
return fmt.Errorf("invalid io priority class: %s", ioprio.Class)
}

// Combine class and priority into a single value
// https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17
iop := (class << 13) | ioprio.Priority
_, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, ioprioWhoPgrp, 0, uintptr(iop))
if errno != 0 {
return fmt.Errorf("failed to set io priority: %w", errno)
}
return nil
}

func setupPersonality(config *configs.Config) error {
return system.SetLinuxPersonality(config.Personality.Domain)
}
Expand Down
115 changes: 89 additions & 26 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -122,19 +123,96 @@ func (p *setnsProcess) signal(sig os.Signal) error {
return unix.Kill(p.pid(), s)
}

func (p *setnsProcess) start() (retErr error) {
defer p.comm.closeParent()
func affToUnix(str string) (*unix.CPUSet, error) {
s := new(unix.CPUSet)
for _, r := range strings.Split(str, ",") {
// Allow extra spaces around.
r = strings.TrimSpace(r)
// Allow empty elements (extra commas).
if r == "" {
continue
}
if r0, r1, found := strings.Cut(r, "-"); found {
start, err := strconv.ParseUint(r0, 10, 32)
if err != nil {
return nil, err
}
end, err := strconv.ParseUint(r1, 10, 32)
if err != nil {
return nil, err
}
if start > end {
return nil, errors.New("invalid range: " + r)
}
for i := int(start); i <= int(end); i++ {
s.Set(i)
}
} else {
val, err := strconv.ParseUint(r, 10, 32)
if err != nil {
return nil, err
}
s.Set(int(val))
}
}

if p.process.IOPriority != nil {
if err := setIOPriority(p.process.IOPriority); err != nil {
return err
return s, nil
}

// Starts setns process with specified initial CPU affinity.
func (p *setnsProcess) startWithCPUAffinity() error {
aff := p.config.Config.ExecCPUAffinity
if aff == nil || aff.Initial == "" {
return p.cmd.Start()
}
cpus, err := affToUnix(aff.Initial)
if err != nil {
return fmt.Errorf("invalid execCPUAffinity.initial: %w", err)
}

errCh := make(chan error)
defer close(errCh)

// Use a goroutine to dedicate an OS thread.
go func() {
// Don't call runtime.UnlockOSThread to terminate the OS thread
// when goroutine exits.
runtime.LockOSThread()

// Command inherits the CPU affinity.
if err := unix.SchedSetaffinity(unix.Gettid(), cpus); err != nil {
errCh <- fmt.Errorf("setting initial CPU affinity: %w", err)
return
}

errCh <- p.cmd.Start()
}()

return <-errCh
}

func (p *setnsProcess) setFinalCPUAffinity() error {
aff := p.config.Config.ExecCPUAffinity
if aff == nil || aff.Final == "" {
return nil
}
cpus, err := affToUnix(aff.Final)
if err != nil {
return fmt.Errorf("invalid execCPUAffinity.final: %w", err)
}
if err := unix.SchedSetaffinity(p.pid(), cpus); err != nil {
return fmt.Errorf("setting final CPU affinity: %w", err)
}
return nil
}

func (p *setnsProcess) start() (retErr error) {
defer p.comm.closeParent()

// get the "before" value of oom kill count
// Get the "before" value of oom kill count.
oom, _ := p.manager.OOMKillCount()
err := p.cmd.Start()
// close the child-side of the pipes (controlled by child)
err := p.startWithCPUAffinity()
// Close the child-side of the pipes (controlled by child).
p.comm.closeChild()
if err != nil {
return fmt.Errorf("error starting setns process: %w", err)
Expand Down Expand Up @@ -202,6 +280,9 @@ func (p *setnsProcess) start() (retErr error) {
}
}
}
if err := p.setFinalCPUAffinity(); err != nil {
return err
}

if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil {
return fmt.Errorf("error writing config to pipe: %w", err)
Expand Down Expand Up @@ -988,21 +1069,3 @@ func initWaiter(r io.Reader) chan error {

return ch
}

func setIOPriority(ioprio *configs.IOPriority) error {
const ioprioWhoPgrp = 1

class, ok := configs.IOPrioClassMapping[ioprio.Class]
if !ok {
return fmt.Errorf("invalid io priority class: %s", ioprio.Class)
}

// Combine class and priority into a single value
// https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17
iop := (class << 13) | ioprio.Priority
_, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, ioprioWhoPgrp, 0, uintptr(iop))
if errno != 0 {
return fmt.Errorf("failed to set io priority: %w", errno)
}
return nil
}
9 changes: 5 additions & 4 deletions libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,13 @@ func (l *linuxSetnsInit) Init() error {
unix.Umask(int(*l.config.Config.Umask))
}

if l.config.Config.Scheduler != nil {
if err := setupScheduler(l.config.Config); err != nil {
return err
}
if err := setupScheduler(l.config.Config); err != nil {
return err
}

if err := setIOPriority(l.config.Config.IOPriority); err != nil {
return err
}
// Tell our parent that we're ready to exec. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
5 changes: 5 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
ioPriority := *spec.Process.IOPriority
config.IOPriority = &ioPriority
}
if spec.Process.ExecCPUAffinity != nil {
a := *spec.Process.ExecCPUAffinity
config.ExecCPUAffinity = &a
}

}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
13 changes: 5 additions & 8 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,12 @@ func (l *linuxStandardInit) Init() error {
}
}

if l.config.Config.Scheduler != nil {
if err := setupScheduler(l.config.Config); err != nil {
return err
}
if err := setupScheduler(l.config.Config); err != nil {
return err
}
if l.config.Config.IOPriority != nil {
if err := setIOPriority(l.config.Config.IOPriority); err != nil {
return err
}

if err := setIOPriority(l.config.Config.IOPriority); err != nil {
return err
}

// Tell our parent that we're ready to exec. This must be done before the
Expand Down
77 changes: 77 additions & 0 deletions tests/integration/cpu_affinity.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env bats
# Exec CPU affinity tests. For more details, see:
# - https://github.com/opencontainers/runtime-spec/pull/1253

load helpers

function setup() {
requires smp cgroups_cpuset
setup_busybox
}

function teardown() {
teardown_bundle
}

function all_cpus() {
cat /sys/devices/system/cpu/online
}

function first_cpu() {
all_cpus | sed 's/[-,].*//g'
}

@test "runc exec [CPU affinity inherited from runc]" {
requires root # For taskset.

first="$(first_cpu)"

# Container's process CPU affinity is inherited from that of runc.
taskset -p -c "$first" $$

runc run -d --console-socket "$CONSOLE_SOCKET" ct1
[ "$status" -eq 0 ]

# Check init.
runc exec ct1 grep "Cpus_allowed_list:" /proc/1/status
[ "$status" -eq 0 ]
[[ "${lines[0]}" == "Cpus_allowed_list: $first" ]]

# Check exec.
runc exec ct1 grep "Cpus_allowed_list:" /proc/self/status
[ "$status" -eq 0 ]
[[ "${lines[0]}" == "Cpus_allowed_list: $first" ]]
}

@test "runc exec [CPU affinity, only initial is set]" {
requires root # For taskset.

first="$(first_cpu)"

update_config ".process.execCPUAffinity.initial = \"$first\""

runc run -d --console-socket "$CONSOLE_SOCKET" ct1
[ "$status" -eq 0 ]

runc exec ct1 grep "Cpus_allowed_list:" /proc/self/status
[ "$status" -eq 0 ]
[[ "${lines[0]}" == "Cpus_allowed_list: $first" ]]
}

@test "runc exec [CPU affinity, initial and final are set]" {
requires root # For taskset.

first="$(first_cpu)"
second=$((first+1)) # Hacky; might not work in all environments.

update_config " .process.execCPUAffinity.initial = \"$first\"
| .process.execCPUAffinity.final = \"$second\""

taskset -p -c "$first" $$
runc run -d --console-socket "$CONSOLE_SOCKET" ct1
[ "$status" -eq 0 ]

runc exec ct1 grep "Cpus_allowed_list:" /proc/self/status
[ "$status" -eq 0 ]
[[ "${lines[0]}" == "Cpus_allowed_list: $second" ]]
}
12 changes: 2 additions & 10 deletions utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,15 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
Label: p.SelinuxLabel,
NoNewPrivileges: &p.NoNewPrivileges,
AppArmorProfile: p.ApparmorProfile,
Scheduler: p.Scheduler,
IOPriority: p.IOPriority,
}

if p.ConsoleSize != nil {
lp.ConsoleWidth = uint16(p.ConsoleSize.Width)
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}

if p.Scheduler != nil {
s := *p.Scheduler
lp.Scheduler = &s
}

if p.IOPriority != nil {
ioPriority := *p.IOPriority
lp.IOPriority = &ioPriority
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading