diff --git a/runsc/config/config.go b/runsc/config/config.go index 6ec62982d0..ca36247ebd 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -325,6 +325,10 @@ type Config struct { // the latest supported NVIDIA driver ABI. NVProxyDriverVersion string `flag:"nvproxy-driver-version"` + // NVProxyAllowUnsupportedCapabilities is a comma-separated list of driver + // capabilities that are allowed to be requested by the container. + NVProxyAllowedDriverCapabilities string `flag:"nvproxy-allowed-driver-capabilities"` + // TPUProxy enables support for TPUs. TPUProxy bool `flag:"tpuproxy"` diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 0307de2261..0418476d9a 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -134,6 +134,7 @@ func RegisterFlags(flagSet *flag.FlagSet) { flagSet.Bool("nvproxy", false, "EXPERIMENTAL: enable support for Nvidia GPUs") flagSet.Bool("nvproxy-docker", false, "DEPRECATED: use nvidia-container-runtime or `docker run --gpus` directly. Or manually add nvidia-container-runtime-hook as a prestart hook and set up NVIDIA_VISIBLE_DEVICES container environment variable.") flagSet.String("nvproxy-driver-version", "", "NVIDIA driver ABI version to use. If empty, autodetect installed driver version. The special value 'latest' may also be used to use the latest ABI.") + flagSet.String("nvproxy-allowed-driver-capabilities", "utility,compute", "Comma separated list of NVIDIA driver capabilities that are allowed to be requested by the container.") flagSet.Bool("tpuproxy", false, "EXPERIMENTAL: enable support for TPU device passthrough.") // Test flags, not to be used outside tests, ever. diff --git a/runsc/container/container.go b/runsc/container/container.go index fb0467d04f..89426919ef 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -1936,6 +1936,11 @@ func nvproxyLoadKernelModules() { } } +// See nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/capabilities.go:capabilityToCLI(). +func nvproxyDriverCapToFlag(cap specutils.NvidiaDriverCap) string { + return "--" + string(cap) +} + // nvproxySetupAfterGoferUserns runs `nvidia-container-cli configure`. // This sets up the container filesystem with bind mounts that allow it to // use NVIDIA devices. @@ -2011,12 +2016,20 @@ func nvproxySetupAfterGoferUserns(spec *specs.Spec, conf *config.Config, goferCm "configure", fmt.Sprintf("--ldconfig=@%s", ldconfigPath), "--no-cgroups", // runsc doesn't configure device cgroups yet - "--utility", - "--compute", fmt.Sprintf("--pid=%d", goferCmd.Process.Pid), fmt.Sprintf("--device=%s", devices), - spec.Root.Path, } + // Pass driver capabilities specified via NVIDIA_DRIVER_CAPABILITIES as flags. See + // nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/main.go:doPrestart(). + driverCaps, err := specutils.NvproxyDriverCapsFromEnv(spec, conf) + if err != nil { + return fmt.Errorf("failed to get driver capabilities: %w", err) + } + for cap := range driverCaps { + argv = append(argv, nvproxyDriverCapToFlag(cap)) + } + // Add rootfs path as the final argument. + argv = append(argv, spec.Root.Path) log.Debugf("Executing %q", argv) var stdout, stderr strings.Builder cmd := exec.Cmd{ diff --git a/runsc/specutils/nvidia.go b/runsc/specutils/nvidia.go index 6eda896ea7..c6450ded6d 100644 --- a/runsc/specutils/nvidia.go +++ b/runsc/specutils/nvidia.go @@ -23,10 +23,67 @@ import ( "gvisor.dev/gvisor/runsc/config" ) -const nvdEnvVar = "NVIDIA_VISIBLE_DEVICES" +const ( + nvidiaVisibleDevsEnv = "NVIDIA_VISIBLE_DEVICES" + nvdiaDriverCapsEnv = "NVIDIA_DRIVER_CAPABILITIES" + cudaVersionEnv = "CUDA_VERSION" + requireCudaEnv = "NVIDIA_REQUIRE_CUDA" + // AnnotationNVProxy enables nvproxy. + AnnotationNVProxy = "dev.gvisor.internal.nvproxy" +) + +// NvidiaDriverCap is a GPU driver capability (like compute, graphics, etc.). +type NvidiaDriverCap string + +const ( + computeCap NvidiaDriverCap = "compute" + utilityCap NvidiaDriverCap = "utility" + // allCap is a special value that means all supported driver capabilities. + allCap NvidiaDriverCap = "all" +) + +// NvidiaDriverCaps is a set of GPU driver capabilities. +type NvidiaDriverCaps map[NvidiaDriverCap]struct{} + +// See nvidia-container-toolkit/internal/config/image/capabilities.go:DefaultDriverCapabilities. +var nvproxyDefaultDriverCaps = NvidiaDriverCaps{ + computeCap: struct{}{}, + utilityCap: struct{}{}, +} -// AnnotationNVProxy enables nvproxy. -const AnnotationNVProxy = "dev.gvisor.internal.nvproxy" +func nvidiaDriverCapsFromString(caps string) NvidiaDriverCaps { + res := make(NvidiaDriverCaps) + for _, cap := range strings.Split(caps, ",") { + trimmed := strings.TrimSpace(cap) + if len(trimmed) == 0 { + continue + } + res[NvidiaDriverCap(trimmed)] = struct{}{} + } + return res +} + +func (c NvidiaDriverCaps) hasAll() bool { + _, ok := c[allCap] + return ok +} + +// Intersect returns the intersection of two sets of driver capabilities. +func (c NvidiaDriverCaps) Intersect(c2 NvidiaDriverCaps) NvidiaDriverCaps { + if c2.hasAll() { + return c + } + if c.hasAll() { + return c2 + } + res := make(NvidiaDriverCaps) + for cap := range c2 { + if _, ok := c[cap]; ok { + res[cap] = struct{}{} + } + } + return res +} // NVProxyEnabled checks both the nvproxy annotation and conf.NVProxy to see if nvproxy is enabled. func NVProxyEnabled(spec *specs.Spec, conf *config.Config) bool { @@ -78,7 +135,7 @@ func gpuFunctionalityRequestedViaHook(spec *specs.Spec, conf *config.Config) boo if spec.Process == nil { return false } - nvd, _ := EnvVar(spec.Process.Env, nvdEnvVar) + nvd, _ := EnvVar(spec.Process.Env, nvidiaVisibleDevsEnv) // A value of "none" means "no GPU device, but still access to driver // functionality", so it is not a value we check for here. return nvd != "" && nvd != "void" @@ -105,7 +162,7 @@ func isNvidiaHookPresent(spec *specs.Spec, conf *config.Config) bool { // // Precondition: conf.NVProxyDocker && GPUFunctionalityRequested(spec, conf). func ParseNvidiaVisibleDevices(spec *specs.Spec) (string, error) { - nvd, _ := EnvVar(spec.Process.Env, nvdEnvVar) + nvd, _ := EnvVar(spec.Process.Env, nvidiaVisibleDevsEnv) if nvd == "none" { return "", nil } @@ -130,3 +187,34 @@ func ParseNvidiaVisibleDevices(spec *specs.Spec) (string, error) { } return nvd, nil } + +// NvproxyDriverCapsFromEnv returns the driver capabilities requested by the +// application via the NVIDIA_DRIVER_CAPABILITIES env var. See +// nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/container_config.go:getDriverCapabilities(). +func NvproxyDriverCapsFromEnv(spec *specs.Spec, conf *config.Config) (NvidiaDriverCaps, error) { + allowedDriverCaps := nvidiaDriverCapsFromString(conf.NVProxyAllowedDriverCapabilities) + driverCapsEnvStr, ok := EnvVar(spec.Process.Env, nvdiaDriverCapsEnv) + if !ok { + if IsLegacyCudaImage(spec) { + return allowedDriverCaps, nil + } + return nvproxyDefaultDriverCaps, nil + } + if len(driverCapsEnvStr) == 0 { + return nvproxyDefaultDriverCaps, nil + } + envDriverCaps := nvidiaDriverCapsFromString(driverCapsEnvStr) + driverCaps := allowedDriverCaps.Intersect(envDriverCaps) + if !envDriverCaps.hasAll() && len(driverCaps) != len(envDriverCaps) { + return nil, fmt.Errorf("disallowed driver capabilities requested: '%v' (allowed '%v'), update --nvproxy-allowed-driver-capabilities to allow them", envDriverCaps, driverCaps) + } + return driverCaps, nil +} + +// IsLegacyCudaImage returns true if spec represents a legacy CUDA image. +// See nvidia-container-toolkit/internal/config/image/cuda_image.go:IsLegacy(). +func IsLegacyCudaImage(spec *specs.Spec) bool { + cudaVersion, _ := EnvVar(spec.Process.Env, cudaVersionEnv) + requireCuda, _ := EnvVar(spec.Process.Env, requireCudaEnv) + return len(cudaVersion) > 0 && len(requireCuda) == 0 +}