kubernetes-sigs · lukeogg · Jun 11, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jul 17, 2023
diff --git a/pkg/apis/config/v1alpha4/types.go b/pkg/apis/config/v1alpha4/types.go
@@ -118,6 +118,10 @@ type Node struct {
 	// binded to a host Port
 	ExtraPortMappings []PortMapping `yaml:"extraPortMappings,omitempty" json:"extraPortMappings,omitempty"`
 
+	// Devices allows access to GPUs through CDI using the --devices flag added in Docker v25.
+	// https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#container-device-interface-cdi-support
+	Devices []string `yaml:"devices,omitempty" json:"devices,omitempty"`
+
 	// KubeadmConfigPatches are applied to the generated kubeadm config as
 	// merge patches. The `kind` field must match the target object, and
 	// if `apiVersion` is specified it will only be applied to matching objects.

diff --git a/pkg/apis/config/v1alpha4/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha4/zz_generated.deepcopy.go
diff --git a/pkg/cluster/internal/providers/docker/provision.go b/pkg/cluster/internal/providers/docker/provision.go
@@ -255,6 +255,20 @@ func runArgsForNode(node *config.Node, clusterIPFamily config.ClusterIPFamily, n
 		args = append(args, "-e", "KUBECONFIG=/etc/kubernetes/admin.conf")
 	}
 
+	// Append CDI device args (used for GPU support)
+	if len(node.Devices) > 0 {
+		// Check for docker > 25
+		ver := Version()
+		if strings.Split(ver, ".")[0] < "25" {
+			return nil, errors.Errorf("using devices api in kind requires Docker >= v25, but found %q", ver)
+		}
+
+		// Append args for each device
+		for _, device := range node.Devices {
+			args = append(args, "--device", strings.TrimSpace(device))
+		}
+	}
+
 	// finally, specify the image to run
 	return append(args, node.Image), nil
 }

diff --git a/pkg/cluster/internal/providers/docker/util.go b/pkg/cluster/internal/providers/docker/util.go
@@ -33,6 +33,16 @@ func IsAvailable() bool {
 	return strings.HasPrefix(lines[0], "Docker version")
 }
 
+// Version gets the version of docker available on the system
+func Version() string {
+	cmd := exec.Command("docker", "version", "--format", "'{{.Server.Version}}'")
+	lines, err := exec.OutputLines(cmd)
+	if err != nil || len(lines) != 1 {
+		return ""
+	}
+	return strings.Trim(lines[0], "'")
+}
+
 // usernsRemap checks if userns-remap is enabled in dockerd
 func usernsRemap() bool {
 	cmd := exec.Command("docker", "info", "--format", "'{{json .SecurityOptions}}'")

diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go
@@ -212,6 +212,13 @@ func runArgsForNode(node *config.Node, clusterIPFamily config.ClusterIPFamily, n
 		args...,
 	)
 
+	// Append CDI device args (used for GPU support)
+	if len(node.Devices) > 0 {
+		for _, device := range node.Devices {
+			args = append(args, "--device", strings.TrimSpace(device))
+		}
+	}
+
 	// convert mounts and port mappings to container run args
 	args = append(args, generateMountBindings(node.ExtraMounts...)...)
 	mappingArgs, err := generatePortMappings(clusterIPFamily, node.ExtraPortMappings...)
@@ -302,7 +309,6 @@ type podmanNetworks []struct {
 func getSubnets(networkName string) ([]string, error) {
 	cmd := exec.Command("podman", "network", "inspect", networkName)
 	out, err := exec.Output(cmd)
-
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to get subnets")
 	}

diff --git a/pkg/internal/apis/config/convert_v1alpha4.go b/pkg/internal/apis/config/convert_v1alpha4.go
@@ -56,6 +56,7 @@ func convertv1alpha4Node(in *v1alpha4.Node, out *Node) {
 	out.ExtraMounts = make([]Mount, len(in.ExtraMounts))
 	out.ExtraPortMappings = make([]PortMapping, len(in.ExtraPortMappings))
 	out.KubeadmConfigPatchesJSON6902 = make([]PatchJSON6902, len(in.KubeadmConfigPatchesJSON6902))
+	out.Devices = make([]string, len(in.Devices))
 
 	for i := range in.ExtraMounts {
 		convertv1alpha4Mount(&in.ExtraMounts[i], &out.ExtraMounts[i])
@@ -68,6 +69,10 @@ func convertv1alpha4Node(in *v1alpha4.Node, out *Node) {
 	for i := range in.KubeadmConfigPatchesJSON6902 {
 		convertv1alpha4PatchJSON6902(&in.KubeadmConfigPatchesJSON6902[i], &out.KubeadmConfigPatchesJSON6902[i])
 	}
+
+	for i := range in.Devices {
+		out.Devices[i] = in.Devices[i]
+	}
 }
 
 func convertv1alpha4PatchJSON6902(in *v1alpha4.PatchJSON6902, out *PatchJSON6902) {

diff --git a/pkg/internal/apis/config/types.go b/pkg/internal/apis/config/types.go
@@ -98,6 +98,10 @@ type Node struct {
 	// binded to a host Port
 	ExtraPortMappings []PortMapping
 
+	// Devices allows access to GPUs through CDI using the --devices flag added in Docker v25.
+	// https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#container-device-interface-cdi-support
+	Devices []string
+
 	// KubeadmConfigPatches are applied to the generated kubeadm config as
 	// strategic merge patches to `kustomize build` internally
 	// https://github.com/kubernetes/community/blob/a9cf5c8f3380bb52ebe57b1e2dbdec136d8dd484/contributors/devel/sig-api-machinery/strategic-merge-patch.md

diff --git a/pkg/internal/apis/config/validate.go b/pkg/internal/apis/config/validate.go
@@ -114,6 +114,10 @@ func (n *Node) Validate() error {
 		errs = append(errs, errors.New("image is a required field"))
 	}
 
+	if err := validateDevices(n.Devices); err != nil {
+		errs = append(errs, errors.Wrapf(err, "invalid devices"))
+	}
+
 	// validate extra port forwards
 	for _, mapping := range n.ExtraPortMappings {
 		if err := validatePort(mapping.HostPort); err != nil {
@@ -192,6 +196,17 @@ func validatePortMappings(portMappings []PortMapping) error {
 	return nil
 }
 
+func validateDevices(devices []string) error {
+	for _, device := range devices {
+		device := strings.TrimSpace(device)
+		// validate device string is not empty
+		if len(device) == 0 {
+			return errors.Errorf("invalid device string: '%v'. Empty Strings not allowed", device)
+		}
+	}
+	return nil
+}
+
 func validatePort(port int32) error {
 	// NOTE: -1 is a special value for auto-selecting the port in the container
 	// backend where possible as opposed to in kind itself.

diff --git a/pkg/internal/apis/config/validate_test.go b/pkg/internal/apis/config/validate_test.go
@@ -18,9 +18,10 @@ package config
 
 import (
 	"fmt"
-	"sigs.k8s.io/kind/pkg/internal/assert"
 	"testing"
 
+	"sigs.k8s.io/kind/pkg/internal/assert"
+
 	"sigs.k8s.io/kind/pkg/errors"
 )
 
@@ -251,7 +252,7 @@ func TestClusterValidate(t *testing.T) {
 	}
 
 	for _, tc := range cases {
-		tc := tc //capture loop variable
+		tc := tc // capture loop variable
 		t.Run(tc.Name, func(t *testing.T) {
 			t.Parallel()
 			err := tc.Cluster.Validate()
@@ -343,6 +344,24 @@ func TestNodeValidate(t *testing.T) {
 			}(),
 			ExpectErrors: 1,
 		},
+		{
+			TestName: "Empty Devices",
+			Node: func() Node {
+				cfg := newDefaultedNode(ControlPlaneRole)
+				cfg.Devices = []string{"    ", ""}
+				return cfg
+			}(),
+			ExpectErrors: 1,
+		},
+		{
+			TestName: "Valid Devices",
+			Node: func() Node {
+				cfg := newDefaultedNode(ControlPlaneRole)
+				cfg.Devices = []string{"vendor1.com/device=test", "nvidia.com/gpu=1", "nvidia.com/gpu=all", "vendor.com/foo=1", "foo.bar.baz/foo-bar123.B_az=all"}
+				return cfg
+			}(),
+			ExpectErrors: 0,
+		},
 		{
 			TestName: "Invalid HostPort",
 			Node: func() Node {
@@ -360,7 +379,7 @@ func TestNodeValidate(t *testing.T) {
 	}
 
 	for _, tc := range cases {
-		tc := tc //capture loop variable
+		tc := tc // capture loop variable
 		t.Run(tc.TestName, func(t *testing.T) {
 			t.Parallel()
 			err := tc.Node.Validate()
@@ -414,7 +433,7 @@ func TestPortValidate(t *testing.T) {
 	}
 
 	for _, tc := range cases {
-		tc := tc //capture loop variable
+		tc := tc // capture loop variable
 		t.Run(tc.TestName, func(t *testing.T) {
 			t.Parallel()
 			err := validatePort(tc.Port)
@@ -537,7 +556,7 @@ func TestValidatePortMappings(t *testing.T) {
 	}
 
 	for _, tc := range cases {
-		tc := tc //capture loop variable
+		tc := tc // capture loop variable
 		t.Run(tc.testName, func(t *testing.T) {
 			t.Parallel()
 

diff --git a/pkg/internal/apis/config/zz_generated.deepcopy.go b/pkg/internal/apis/config/zz_generated.deepcopy.go
diff --git a/site/content/docs/user/configuration.md b/site/content/docs/user/configuration.md
@@ -8,7 +8,7 @@ menu:
 toc: true
 description: |-
   This guide covers how to configure KIND cluster creation.
-  
+
   We know this is currently a bit lacking and will expand it over time - PRs welcome!
 ---
 ## Getting Started
@@ -281,10 +281,71 @@ nodes:
   image: kindest/node:v1.16.4@sha256:b91a2c2317a000f3a783489dfb755064177dbc3a0b2f4147d50f04825d016f55
 {{< /codeFromInline >}}
 
-[Reference](https://kind.sigs.k8s.io/docs/user/quick-start/#creating-a-cluster) 
+[Reference](https://kind.sigs.k8s.io/docs/user/quick-start/#creating-a-cluster)
 
 **Note**: Kubernetes versions are expressed as x.y.z, where x is the major version, y is the minor version, and z is the patch version, following [Semantic Versioning](https://semver.org/) terminology. For more information, see [Kubernetes Release Versioning.](https://github.com/kubernetes/sig-release/blob/master/release-engineering/versioning.md#kubernetes-release-versioning)
 
+### GPU Support
+
+There are two ways to map GPUs in to a KinD cluster. The first is using the `devices` API and the second is using the `extraMounts` API.
+
+#### Using the Devices API
+
+As a pre-requisite you install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed on the host.
+
+Using `devices` for GPU support requires Docker v25 or later. See notes on CDI Container Support [here.](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#container-device-interface-cdi-support)
+
+GPU devices can be mapped to Kind node copntainers with the devices API:
+
+All GPUs mapped to a single control-plane:
+
+{{< codeFromInline lang="yaml" >}}
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+  devices:
+  - "nvidia.com/gpu=all"
+{{< /codeFromInline >}}
+
+Specific GPUs mapped to specific worker nodes based on index:
+
+{{< codeFromInline lang="yaml" >}}
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+- role: worker
+  devices:
+  - "nvidia.com/gpu=0"
+- role: worker
+  devices:
+  - "nvidia.com/gpu=1"
+{{< /codeFromInline >}}
+
+#### Using the Extra Mounts API
+
+GPUs can also be mapped using the `extraMounts` API. This method passes a list of GPUs to inject as volume mounts rather than the environment variable `NVIDIA_VISIBLE_DEVICES`.
+
+Steps to enable this:
+
+1. Add nvidia as your default runtime in /etc/docker/daemon.json
+1. Restart docker (as necessary)
+1. Set `accept-nvidia-visible-devices-as-volume-mounts = true` in `/etc/nvidia-container-runtime/config.toml`
+1. Add the `extraMounts` to any kind nodes you want to have access to all GPUs in the system:
+
+{{< codeFromInline lang="yaml" >}}
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+  extraMounts:
+  - hostPath: /dev/null
+    containerPath: /var/run/nvidia-container-devices/all
+{{< /codeFromInline >}}
+
+Note: this method only support adding `all` GPUs to a single node. If you want to add specific GPUs to specific nodes, you will need to use the `devices` API.
+
 ### Extra Mounts
 
 Extra mounts can be used to pass through storage on the host to a kind node
@@ -300,10 +361,10 @@ For more information see the [Docker file sharing guide.](https://docs.docker.co
 
 ### Extra Port Mappings
 
-Extra port mappings can be used to port forward to the kind nodes. This is a 
-cross-platform option to get traffic into your kind cluster. 
+Extra port mappings can be used to port forward to the kind nodes. This is a
+cross-platform option to get traffic into your kind cluster.
 
-If you are running Docker without the Docker Desktop Application on Linux, you can simply send traffic to the node IPs from the host without extra port mappings. 
+If you are running Docker without the Docker Desktop Application on Linux, you can simply send traffic to the node IPs from the host without extra port mappings.
 With the installation of the Docker Desktop Application, whether it is on macOs, Windows or Linux, you'll want to use these.
 
 You may also want to see the [Ingress Guide].
@@ -401,11 +462,11 @@ nodes:
 
 ### Kubeadm Config Patches
 
-KIND uses [`kubeadm`](/docs/design/principles/#leverage-existing-tooling) 
+KIND uses [`kubeadm`](/docs/design/principles/#leverage-existing-tooling)
 to configure cluster nodes.
 
 Formally  KIND runs `kubeadm init` on the first control-plane node, we can customize the flags by using the kubeadm
-[InitConfiguration](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-init/#config-file) 
+[InitConfiguration](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-init/#config-file)
 ([spec](https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3#InitConfiguration))
 
 {{< codeFromInline lang="yaml" >}}
@@ -436,9 +497,9 @@ nodes:
           enable-admission-plugins: NodeRestriction,MutatingAdmissionWebhook,ValidatingAdmissionWebhook
 {{< /codeFromInline >}}
 
-On every additional node configured in the KIND cluster, 
+On every additional node configured in the KIND cluster,
 worker or control-plane (in HA mode),
-KIND runs `kubeadm join` which can be configured using the 
+KIND runs `kubeadm join` which can be configured using the
 [JoinConfiguration](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-join/#config-file)
 ([spec](https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3#JoinConfiguration))