Skip to content

Commit

Permalink
Pass the NVIDIA_DRIVER_CAPABILITIES env var to nvidia-container-cli.
Browse files Browse the repository at this point in the history
runsc attempts to emulate nvidia-container-runtime-hook. But it was always
passing "--compute --utility" as driver capability flags to
`nvidia-container-cli configure` command.

Fix runsc to emulate nvidia-container-runtime-hook correctly by parsing
NVIDIA_DRIVER_CAPABILITIES and converting that comma-separated list to flags.

This is in preparation for adding support for non-compute GPU workloads in
nvproxy :)

Updates #9452
Updates #10856

PiperOrigin-RevId: 671644915
  • Loading branch information
ayushr2 authored and gvisor-bot committed Sep 6, 2024
1 parent 40bde6c commit 46624d6
Showing 1 changed file with 23 additions and 3 deletions.
26 changes: 23 additions & 3 deletions runsc/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -1936,6 +1936,11 @@ func nvproxyLoadKernelModules() {
}
}

// Compare nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/capabilities.go:capabilityToCLI().
func nvproxyDriverCapToFlag(cap string) string {
return "--" + cap
}

// nvproxySetupAfterGoferUserns runs `nvidia-container-cli configure`.
// This sets up the container filesystem with bind mounts that allow it to
// use NVIDIA devices.
Expand Down Expand Up @@ -2011,12 +2016,27 @@ func nvproxySetupAfterGoferUserns(spec *specs.Spec, conf *config.Config, goferCm
"configure",
fmt.Sprintf("--ldconfig=@%s", ldconfigPath),
"--no-cgroups", // runsc doesn't configure device cgroups yet
"--utility",
"--compute",
fmt.Sprintf("--pid=%d", goferCmd.Process.Pid),
fmt.Sprintf("--device=%s", devices),
spec.Root.Path,
}
// Initialize driverCaps with default driver capabilities. See
// nvidia-container-toolkit/internal/config/image/capabilities.go:DefaultDriverCapabilities.
driverCapsFlags := []string{
nvproxyDriverCapToFlag("utility"),
nvproxyDriverCapToFlag("compute"),
}
// Override with NVIDIA_DRIVER_CAPABILITIES env var. See
// nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/main.go:doPrestart().
if driverCapsEnv, ok := specutils.EnvVar(spec.Process.Env, "NVIDIA_DRIVER_CAPABILITIES"); ok {
clear(driverCapsFlags)
for _, cap := range strings.Split(driverCapsEnv, ",") {
if len(cap) == 0 {
break
}
driverCapsFlags = append(driverCapsFlags, nvproxyDriverCapToFlag(cap))
}
}
argv = append(append(argv, driverCapsFlags...), spec.Root.Path)
log.Debugf("Executing %q", argv)
var stdout, stderr strings.Builder
cmd := exec.Cmd{
Expand Down

0 comments on commit 46624d6

Please sign in to comment.