From 273af3c56e7a2482afa2ac7d589a7f439431f4f9 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Fri, 30 Aug 2024 13:38:51 +0100 Subject: [PATCH] Update PodSpecOverride API Signed-off-by: Andrey Velichkevich --- .../2170-kubeflow-training-v2/README.md | 29 +++++++++---------- .../kubeflow.org/v2alpha1/trainjob_types.go | 20 ++++++++----- .../v2alpha1/zz_generated.deepcopy.go | 22 +++++++------- 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/docs/proposals/2170-kubeflow-training-v2/README.md b/docs/proposals/2170-kubeflow-training-v2/README.md index 09b5fd6733..a397f2b558 100644 --- a/docs/proposals/2170-kubeflow-training-v2/README.md +++ b/docs/proposals/2170-kubeflow-training-v2/README.md @@ -301,20 +301,19 @@ type TrainJobSpec struct { Annotations map[string]string `json:"annotations,omitempty"` // Custom overrides for the training runtime. - PodSpecOverrides []PodSpecOverrides `json:"podSpecOverrides,omitempty"` + PodSpecOverrides []PodSpecOverride `json:"podSpecOverrides,omitempty"` // Whether the controller should suspend the running TrainJob. // Defaults to false. Suspend *bool `json:"suspend,omitempty"` // ManagedBy is used to indicate the controller or entity that manages a TrainJob. - // The value must be either an empty, 'kubeflow.org/trainjob-controller' or - // 'kueue.x-k8s.io/multikueue'. - // The built-in TrainJob controller reconciles TrainJob which don't have this - // field at all or the field value is the reserved string - // 'kubeflow.org/trainjob-controller', but delegates reconciling TrainJobs - // with a 'kueue.x-k8s.io/multikueue' to the Kueue. - // The field is immutable. + // The value must be either an empty, `kubeflow.org/trainjob-controller` or + // `kueue.x-k8s.io/multikueue`. The built-in TrainJob controller reconciles TrainJob which + // don't have this field at all or the field value is the reserved string + // `kubeflow.org/trainjob-controller`, but delegates reconciling TrainJobs + // with a 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable. + // Defaults to `kubeflow.org/trainjob-controller` ManagedBy *string `json:"managedBy,omitempty"` } @@ -820,9 +819,9 @@ spec: claimName: model-exporter ``` -### The PodSpecOverrides APIs +### The PodSpecOverride APIs -The `PodSpecOverrides` represents overrides for the `TrainingRuntime` when `TrainJob` is created. +The `PodSpecOverride` represents overrides for the `TrainingRuntime` when `TrainJob` is created. These parameters can include the user's identity or PVC. Usually, these parameters should not be configured by the user and should be attached during the @@ -831,15 +830,15 @@ orchestration (e.g. using Kubernetes admission webhooks or custom clients). In the future, we can add more parameters if we find use-cases when it is required. ```golang -type PodSpecOverrides struct { +type PodSpecOverride struct { // Names of the training job replicas in the training runtime template to apply the overrides. TargetReplicatedJobs []string `json:"targetReplicatedJobs"` // Overrides for the containers in the desired job templates. - Containers []ContainerOverrides `json:"containers,omitempty"` + Containers []ContainerOverride `json:"containers,omitempty"` // Overrides for the init container in the desired job templates. - InitContainers []ContainerOverrides `json:"initContainers,omitempty"` + InitContainers []ContainerOverride `json:"initContainers,omitempty"` // Overrides for the Pod volume configuration. Volumes []corev1.Volume `json:"volumes,omitempty"` @@ -854,9 +853,9 @@ type PodSpecOverrides struct { Tolerations []corev1.Toleration `json:"tolerations,omitempty"` } -// ContainerOverrides represents parameters that can be overridden using PodSpecOverrides. +// ContainerOverride represents parameters that can be overridden using PodSpecOverride. // Parameters from the Trainer, DatasetConfig, and ModelConfig will take precedence. -type ContainerOverrides struct { +type ContainerOverride struct { // Name for the container. TrainingRuntime must have this container. Name string `json:"name"` diff --git a/pkg/apis/kubeflow.org/v2alpha1/trainjob_types.go b/pkg/apis/kubeflow.org/v2alpha1/trainjob_types.go index 99189df4f7..749274c70a 100644 --- a/pkg/apis/kubeflow.org/v2alpha1/trainjob_types.go +++ b/pkg/apis/kubeflow.org/v2alpha1/trainjob_types.go @@ -77,13 +77,19 @@ type TrainJobSpec struct { Annotations map[string]string `json:"annotations,omitempty"` // Custom overrides for the training runtime. - PodSpecOverrides []PodSpecOverrides `json:"podSpecOverrides,omitempty"` + PodSpecOverrides []PodSpecOverride `json:"podSpecOverrides,omitempty"` // Whether the controller should suspend the running TrainJob. // Defaults to false. Suspend *bool `json:"suspend,omitempty"` - // ManagedBy field indicates the controller that manages a TrainJob. + // ManagedBy is used to indicate the controller or entity that manages a TrainJob. + // The value must be either an empty, `kubeflow.org/trainjob-controller` or + // `kueue.x-k8s.io/multikueue`. The built-in TrainJob controller reconciles TrainJob which + // don't have this field at all or the field value is the reserved string + // `kubeflow.org/trainjob-controller`, but delegates reconciling TrainJobs + // with a 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable. + // Defaults to `kubeflow.org/trainjob-controller` ManagedBy *string `json:"managedBy,omitempty"` } @@ -187,16 +193,16 @@ type OutputModel struct { SecretRef *corev1.SecretReference `json:"secretRef,omitempty"` } -// PodSpecOverrides represents the custom overrides that will be applied for the TrainJob's resources. -type PodSpecOverrides struct { +// PodSpecOverride represents the custom overrides that will be applied for the TrainJob's resources. +type PodSpecOverride struct { // Names of the training job replicas in the training runtime template to apply the overrides. TargetReplicatedJobs []string `json:"targetReplicatedJobs"` // Overrides for the containers in the desired job templates. - Containers []ContainerOverrides `json:"containers,omitempty"` + Containers []ContainerOverride `json:"containers,omitempty"` // Overrides for the init container in the desired job templates. - InitContainers []ContainerOverrides `json:"initContainers,omitempty"` + InitContainers []ContainerOverride `json:"initContainers,omitempty"` // Overrides for the Pod volume configuration. Volumes []corev1.Volume `json:"volumes,omitempty"` @@ -213,7 +219,7 @@ type PodSpecOverrides struct { // ContainerOverrides represents parameters that can be overridden using PodSpecOverrides. // Parameters from the Trainer, DatasetConfig, and ModelConfig will take precedence. -type ContainerOverrides struct { +type ContainerOverride struct { // Name for the container. TrainingRuntime must have this container. Name string `json:"name"` diff --git a/pkg/apis/kubeflow.org/v2alpha1/zz_generated.deepcopy.go b/pkg/apis/kubeflow.org/v2alpha1/zz_generated.deepcopy.go index db62e89800..ece81c5755 100644 --- a/pkg/apis/kubeflow.org/v2alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/kubeflow.org/v2alpha1/zz_generated.deepcopy.go @@ -85,7 +85,7 @@ func (in *ClusterTrainingRuntimeList) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ContainerOverrides) DeepCopyInto(out *ContainerOverrides) { +func (in *ContainerOverride) DeepCopyInto(out *ContainerOverride) { *out = *in if in.Command != nil { in, out := &in.Command, &out.Command @@ -120,12 +120,12 @@ func (in *ContainerOverrides) DeepCopyInto(out *ContainerOverrides) { } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerOverrides. -func (in *ContainerOverrides) DeepCopy() *ContainerOverrides { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerOverride. +func (in *ContainerOverride) DeepCopy() *ContainerOverride { if in == nil { return nil } - out := new(ContainerOverrides) + out := new(ContainerOverride) in.DeepCopyInto(out) return out } @@ -406,7 +406,7 @@ func (in *PodGroupPolicySource) DeepCopy() *PodGroupPolicySource { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PodSpecOverrides) DeepCopyInto(out *PodSpecOverrides) { +func (in *PodSpecOverride) DeepCopyInto(out *PodSpecOverride) { *out = *in if in.TargetReplicatedJobs != nil { in, out := &in.TargetReplicatedJobs, &out.TargetReplicatedJobs @@ -415,14 +415,14 @@ func (in *PodSpecOverrides) DeepCopyInto(out *PodSpecOverrides) { } if in.Containers != nil { in, out := &in.Containers, &out.Containers - *out = make([]ContainerOverrides, len(*in)) + *out = make([]ContainerOverride, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } if in.InitContainers != nil { in, out := &in.InitContainers, &out.InitContainers - *out = make([]ContainerOverrides, len(*in)) + *out = make([]ContainerOverride, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } @@ -450,12 +450,12 @@ func (in *PodSpecOverrides) DeepCopyInto(out *PodSpecOverrides) { } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodSpecOverrides. -func (in *PodSpecOverrides) DeepCopy() *PodSpecOverrides { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodSpecOverride. +func (in *PodSpecOverride) DeepCopy() *PodSpecOverride { if in == nil { return nil } - out := new(PodSpecOverrides) + out := new(PodSpecOverride) in.DeepCopyInto(out) return out } @@ -616,7 +616,7 @@ func (in *TrainJobSpec) DeepCopyInto(out *TrainJobSpec) { } if in.PodSpecOverrides != nil { in, out := &in.PodSpecOverrides, &out.PodSpecOverrides - *out = make([]PodSpecOverrides, len(*in)) + *out = make([]PodSpecOverride, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) }