diff --git a/runsc/config/config.go b/runsc/config/config.go index 6ec62982d0..7adefbd13f 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -325,6 +325,10 @@ type Config struct { // the latest supported NVIDIA driver ABI. NVProxyDriverVersion string `flag:"nvproxy-driver-version"` + // NVProxyAllowUnsupportedCapabilities is a comma-separated list of driver + // capabilities that are allowed to be requested by the container. + NVProxyAllowedDriverCapabilities string `flag:"nvproxy-allowed-driver-capabilities"` + // TPUProxy enables support for TPUs. TPUProxy bool `flag:"tpuproxy"` @@ -408,6 +412,16 @@ func (c *Config) validate() error { if len(c.ProfilingMetrics) > 0 && len(c.ProfilingMetricsLog) == 0 { return fmt.Errorf("profiling-metrics flag requires defining a profiling-metrics-log for output") } + if c.NVProxyAllowedDriverCapabilities == "all" { + return fmt.Errorf("nvproxy-allowed-driver-capabilities cannot be set to 'all', please set it to the exact capabilities you want to allow") + } + for _, cap := range strings.Split(c.NVProxyAllowedDriverCapabilities, ",") { + switch cap { + case "compute", "compat32", "graphics", "utility", "video", "display", "ngx": + default: + return fmt.Errorf("nvproxy-allowed-driver-capabilities contains invalid capability %q", cap) + } + } return nil } diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 0307de2261..0418476d9a 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -134,6 +134,7 @@ func RegisterFlags(flagSet *flag.FlagSet) { flagSet.Bool("nvproxy", false, "EXPERIMENTAL: enable support for Nvidia GPUs") flagSet.Bool("nvproxy-docker", false, "DEPRECATED: use nvidia-container-runtime or `docker run --gpus` directly. Or manually add nvidia-container-runtime-hook as a prestart hook and set up NVIDIA_VISIBLE_DEVICES container environment variable.") flagSet.String("nvproxy-driver-version", "", "NVIDIA driver ABI version to use. If empty, autodetect installed driver version. The special value 'latest' may also be used to use the latest ABI.") + flagSet.String("nvproxy-allowed-driver-capabilities", "utility,compute", "Comma separated list of NVIDIA driver capabilities that are allowed to be requested by the container.") flagSet.Bool("tpuproxy", false, "EXPERIMENTAL: enable support for TPU device passthrough.") // Test flags, not to be used outside tests, ever. diff --git a/runsc/container/container.go b/runsc/container/container.go index fb0467d04f..52016c3d5b 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -2011,12 +2011,20 @@ func nvproxySetupAfterGoferUserns(spec *specs.Spec, conf *config.Config, goferCm "configure", fmt.Sprintf("--ldconfig=@%s", ldconfigPath), "--no-cgroups", // runsc doesn't configure device cgroups yet - "--utility", - "--compute", fmt.Sprintf("--pid=%d", goferCmd.Process.Pid), fmt.Sprintf("--device=%s", devices), - spec.Root.Path, } + // Pass driver capabilities specified via NVIDIA_DRIVER_CAPABILITIES as flags. See + // nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/main.go:doPrestart(). + driverCaps, err := specutils.NvproxyDriverCapsFromEnv(spec, conf) + if err != nil { + return fmt.Errorf("failed to get driver capabilities: %w", err) + } + for cap := range driverCaps { + argv = append(argv, cap.ToFlag()) + } + // Add rootfs path as the final argument. + argv = append(argv, spec.Root.Path) log.Debugf("Executing %q", argv) var stdout, stderr strings.Builder cmd := exec.Cmd{ diff --git a/runsc/specutils/nvidia.go b/runsc/specutils/nvidia.go index 6eda896ea7..d2919439b9 100644 --- a/runsc/specutils/nvidia.go +++ b/runsc/specutils/nvidia.go @@ -23,10 +23,73 @@ import ( "gvisor.dev/gvisor/runsc/config" ) -const nvdEnvVar = "NVIDIA_VISIBLE_DEVICES" +const ( + nvidiaVisibleDevsEnv = "NVIDIA_VISIBLE_DEVICES" + nvidiaDriverCapsEnv = "NVIDIA_DRIVER_CAPABILITIES" + cudaVersionEnv = "CUDA_VERSION" + requireCudaEnv = "NVIDIA_REQUIRE_CUDA" + // AnnotationNVProxy enables nvproxy. + AnnotationNVProxy = "dev.gvisor.internal.nvproxy" +) + +// NvidiaDriverCap is a GPU driver capability (like compute, graphics, etc.). +type NvidiaDriverCap string + +const ( + computeCap NvidiaDriverCap = "compute" + utilityCap NvidiaDriverCap = "utility" + // allCap is a special value that means all supported driver capabilities. + allCap NvidiaDriverCap = "all" +) -// AnnotationNVProxy enables nvproxy. -const AnnotationNVProxy = "dev.gvisor.internal.nvproxy" +// ToFlag converts the driver capability to a flag for nvidia-container-cli. +// See nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/capabilities.go:capabilityToCLI(). +func (c NvidiaDriverCap) ToFlag() string { + return "--" + string(c) +} + +// NvidiaDriverCaps is a set of GPU driver capabilities. +type NvidiaDriverCaps map[NvidiaDriverCap]struct{} + +// See nvidia-container-toolkit/internal/config/image/capabilities.go:DefaultDriverCapabilities. +var nvproxyDefaultDriverCaps = NvidiaDriverCaps{ + computeCap: struct{}{}, + utilityCap: struct{}{}, +} + +func nvidiaDriverCapsFromString(caps string) NvidiaDriverCaps { + res := make(NvidiaDriverCaps) + for _, cap := range strings.Split(caps, ",") { + trimmed := strings.TrimSpace(cap) + if len(trimmed) == 0 { + continue + } + res[NvidiaDriverCap(trimmed)] = struct{}{} + } + return res +} + +func (c NvidiaDriverCaps) hasAll() bool { + _, ok := c[allCap] + return ok +} + +// Intersect returns the intersection of two sets of driver capabilities. +func (c NvidiaDriverCaps) Intersect(c2 NvidiaDriverCaps) NvidiaDriverCaps { + if c2.hasAll() { + return c + } + if c.hasAll() { + return c2 + } + res := make(NvidiaDriverCaps) + for cap := range c2 { + if _, ok := c[cap]; ok { + res[cap] = struct{}{} + } + } + return res +} // NVProxyEnabled checks both the nvproxy annotation and conf.NVProxy to see if nvproxy is enabled. func NVProxyEnabled(spec *specs.Spec, conf *config.Config) bool { @@ -78,7 +141,7 @@ func gpuFunctionalityRequestedViaHook(spec *specs.Spec, conf *config.Config) boo if spec.Process == nil { return false } - nvd, _ := EnvVar(spec.Process.Env, nvdEnvVar) + nvd, _ := EnvVar(spec.Process.Env, nvidiaVisibleDevsEnv) // A value of "none" means "no GPU device, but still access to driver // functionality", so it is not a value we check for here. return nvd != "" && nvd != "void" @@ -105,7 +168,7 @@ func isNvidiaHookPresent(spec *specs.Spec, conf *config.Config) bool { // // Precondition: conf.NVProxyDocker && GPUFunctionalityRequested(spec, conf). func ParseNvidiaVisibleDevices(spec *specs.Spec) (string, error) { - nvd, _ := EnvVar(spec.Process.Env, nvdEnvVar) + nvd, _ := EnvVar(spec.Process.Env, nvidiaVisibleDevsEnv) if nvd == "none" { return "", nil } @@ -130,3 +193,34 @@ func ParseNvidiaVisibleDevices(spec *specs.Spec) (string, error) { } return nvd, nil } + +// NvproxyDriverCapsFromEnv returns the driver capabilities requested by the +// application via the NVIDIA_DRIVER_CAPABILITIES env var. See +// nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/container_config.go:getDriverCapabilities(). +func NvproxyDriverCapsFromEnv(spec *specs.Spec, conf *config.Config) (NvidiaDriverCaps, error) { + allowedDriverCaps := nvidiaDriverCapsFromString(conf.NVProxyAllowedDriverCapabilities) + driverCapsEnvStr, ok := EnvVar(spec.Process.Env, nvidiaDriverCapsEnv) + if !ok { + if IsLegacyCudaImage(spec) { + return allowedDriverCaps, nil + } + return nvproxyDefaultDriverCaps, nil + } + if len(driverCapsEnvStr) == 0 { + return nvproxyDefaultDriverCaps, nil + } + envDriverCaps := nvidiaDriverCapsFromString(driverCapsEnvStr) + driverCaps := allowedDriverCaps.Intersect(envDriverCaps) + if !envDriverCaps.hasAll() && len(driverCaps) != len(envDriverCaps) { + return nil, fmt.Errorf("disallowed driver capabilities requested: '%v' (allowed '%v'), update --nvproxy-allowed-driver-capabilities to allow them", envDriverCaps, driverCaps) + } + return driverCaps, nil +} + +// IsLegacyCudaImage returns true if spec represents a legacy CUDA image. +// See nvidia-container-toolkit/internal/config/image/cuda_image.go:IsLegacy(). +func IsLegacyCudaImage(spec *specs.Spec) bool { + cudaVersion, _ := EnvVar(spec.Process.Env, cudaVersionEnv) + requireCuda, _ := EnvVar(spec.Process.Env, requireCudaEnv) + return len(cudaVersion) > 0 && len(requireCuda) == 0 +}