Skip to content

Commit

Permalink
Adding cel validations on trainjob crd
Browse files Browse the repository at this point in the history
Signed-off-by: Akshay Chitneni <[email protected]>
  • Loading branch information
Akshay Chitneni committed Oct 18, 2024
1 parent 6965c1a commit b580661
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 3 deletions.
11 changes: 10 additions & 1 deletion manifests/v2/base/crds/kubeflow.org_trainjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ spec:
They will be merged with the TrainingRuntime values.
type: object
managedBy:
default: kubeflow.org/trainjob-controller
description: |-
ManagedBy is used to indicate the controller or entity that manages a TrainJob.
The value must be either an empty, `kubeflow.org/trainjob-controller` or
Expand All @@ -206,6 +207,12 @@ spec:
with a 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable.
Defaults to `kubeflow.org/trainjob-controller`
type: string
x-kubernetes-validations:
- message: ManagedBy must be kubeflow.org/trainjob-controller or kueue.x-k8s.io/multikueue
if set
rule: self in ['kubeflow.org/trainjob-controller', 'kueue.x-k8s.io/multikueue']
- message: ManagedBy value is immutable
rule: self == oldSelf
modelConfig:
description: Configuration of the pre-trained and trained model.
properties:
Expand Down Expand Up @@ -2736,14 +2743,15 @@ spec:
description: Reference to the training runtime.
properties:
apiGroup:
default: kubeflow.org
description: |-
APIGroup of the runtime being referenced.
Defaults to `kubeflow.org`.
type: string
kind:
default: ClusterTrainingRuntime
description: |-
Kind of the runtime being referenced.
It must be one of TrainingRuntime or ClusterTrainingRuntime.
Defaults to ClusterTrainingRuntime.
type: string
name:
Expand All @@ -2756,6 +2764,7 @@ spec:
- name
type: object
suspend:
default: false
description: |-
Whether the controller should suspend the running TrainJob.
Defaults to false.
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/kubeflow.org/v2alpha1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion pkg/apis/kubeflow.org/v2alpha1/trainjob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ import (
jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2"
)

const (
TrainJobKind string = "TrainJob"
)

// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +kubebuilder:object:root=true
Expand Down Expand Up @@ -87,6 +91,7 @@ type TrainJobSpec struct {

// Whether the controller should suspend the running TrainJob.
// Defaults to false.
// +kubebuilder:default=false
Suspend *bool `json:"suspend,omitempty"`

// ManagedBy is used to indicate the controller or entity that manages a TrainJob.
Expand All @@ -96,6 +101,9 @@ type TrainJobSpec struct {
// `kubeflow.org/trainjob-controller`, but delegates reconciling TrainJobs
// with a 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable.
// Defaults to `kubeflow.org/trainjob-controller`
// +kubebuilder:default="kubeflow.org/trainjob-controller"
// +kubebuilder:validation:XValidation:rule="self in ['kubeflow.org/trainjob-controller', 'kueue.x-k8s.io/multikueue']", message="ManagedBy must be kubeflow.org/trainjob-controller or kueue.x-k8s.io/multikueue if set"
// +kubebuilder:validation:XValidation:rule="self == oldSelf", message="ManagedBy value is immutable"
ManagedBy *string `json:"managedBy,omitempty"`
}

Expand All @@ -108,11 +116,12 @@ type RuntimeRef struct {

// APIGroup of the runtime being referenced.
// Defaults to `kubeflow.org`.
// +kubebuilder:default="kubeflow.org"
APIGroup *string `json:"apiGroup,omitempty"`

// Kind of the runtime being referenced.
// It must be one of TrainingRuntime or ClusterTrainingRuntime.
// Defaults to ClusterTrainingRuntime.
// +kubebuilder:default="ClusterTrainingRuntime"
Kind *string `json:"kind,omitempty"`
}

Expand Down
90 changes: 90 additions & 0 deletions test/integration/controller.v2/trainjob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"

kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1"
Expand Down Expand Up @@ -71,4 +72,93 @@ var _ = ginkgo.Describe("TrainJob controller", ginkgo.Ordered, func() {
gomega.Expect(k8sClient.Create(ctx, trainJob)).Should(gomega.Succeed())
})
})

ginkgo.When("TrainJob CR Validation", func() {
ginkgo.AfterEach(func() {
gomega.Expect(k8sClient.DeleteAllOf(ctx, &kubeflowv2.TrainJob{}, client.InNamespace(ns.Name))).Should(
gomega.Succeed())
})

ginkgo.It("Should succeed in creating TrainJob", func() {

managedBy := "kubeflow.org/trainjob-controller"

trainingRuntimeRef := kubeflowv2.RuntimeRef{
Name: "InvalidRuntimeRef",
APIGroup: ptr.To(kubeflowv2.GroupVersion.Group),
Kind: ptr.To(kubeflowv2.TrainingRuntimeKind),
}
jobSpec := kubeflowv2.TrainJobSpec{
RuntimeRef: trainingRuntimeRef,
ManagedBy: &managedBy,
}
trainJob := &kubeflowv2.TrainJob{
TypeMeta: metav1.TypeMeta{
APIVersion: kubeflowv2.SchemeGroupVersion.String(),
Kind: kubeflowv2.TrainJobKind,
},
ObjectMeta: metav1.ObjectMeta{
GenerateName: "valid-trainjob-",
Namespace: ns.Name,
},
Spec: jobSpec,
}

err := k8sClient.Create(ctx, trainJob)
gomega.Expect(err).Should(gomega.Succeed())
})

ginkgo.It("Should fail in creating TrainJob with invalid spec.managedBy", func() {
managedBy := "invalidManagedBy"
jobSpec := kubeflowv2.TrainJobSpec{
ManagedBy: &managedBy,
}
trainJob := &kubeflowv2.TrainJob{
TypeMeta: metav1.TypeMeta{
APIVersion: kubeflowv2.SchemeGroupVersion.String(),
Kind: kubeflowv2.TrainJobKind,
},
ObjectMeta: metav1.ObjectMeta{
Name: "invalid-trainjob",
Namespace: ns.Name,
},
Spec: jobSpec,
}
gomega.Expect(k8sClient.Create(ctx, trainJob)).To(gomega.MatchError(
gomega.ContainSubstring("spec.managedBy: Invalid value")))
})

ginkgo.It("Should fail in updating spec.managedBy", func() {

managedBy := "kubeflow.org/trainjob-controller"

trainingRuntimeRef := kubeflowv2.RuntimeRef{
Name: "InvalidRuntimeRef",
APIGroup: ptr.To(kubeflowv2.GroupVersion.Group),
Kind: ptr.To(kubeflowv2.TrainingRuntimeKind),
}
jobSpec := kubeflowv2.TrainJobSpec{
RuntimeRef: trainingRuntimeRef,
ManagedBy: &managedBy,
}
trainJob := &kubeflowv2.TrainJob{
TypeMeta: metav1.TypeMeta{
APIVersion: kubeflowv2.SchemeGroupVersion.String(),
Kind: kubeflowv2.TrainJobKind,
},
ObjectMeta: metav1.ObjectMeta{
Name: "job-with-failed-update",
Namespace: ns.Name,
},
Spec: jobSpec,
}

gomega.Expect(k8sClient.Create(ctx, trainJob)).Should(gomega.Succeed())
updatedManagedBy := "kueue.x-k8s.io/multikueue"
jobSpec.ManagedBy = &updatedManagedBy
trainJob.Spec = jobSpec
gomega.Expect(k8sClient.Update(ctx, trainJob)).To(gomega.MatchError(
gomega.ContainSubstring("ManagedBy value is immutable")))
})
})
})

0 comments on commit b580661

Please sign in to comment.