Skip to content

Commit

Permalink
Release-1.8: Cherry-pick of #2243 (#2244)
Browse files Browse the repository at this point in the history
* No cleaning up a job if the job is suspended.

Signed-off-by: Michal Szadkowski <[email protected]>
Signed-off-by: Yuki Iwai <[email protected]>
Co-authored-by: Michał Szadkowski <[email protected]>
  • Loading branch information
tenzen-y and mszadkow authored Aug 30, 2024
1 parent a822688 commit cb83d14
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pkg/controller.v1/common/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ func (jc *JobController) CleanupJob(runPolicy *apiv1.RunPolicy, jobStatus apiv1.
currentTime := time.Now()
metaObject, _ := job.(metav1.Object)
ttl := runPolicy.TTLSecondsAfterFinished
if ttl == nil {
if ttl == nil || trainutil.IsJobSuspended(runPolicy) {
return nil
}
duration := time.Second * time.Duration(*ttl)
Expand Down
24 changes: 24 additions & 0 deletions pkg/controller.v1/tensorflow/job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,30 @@ var _ = Describe("Test for controller.v1/common", func() {
wantTFJobIsRemoved: false,
wantErr: false,
}),
Entry("No error with completionTime is nil if suspended", &cleanUpCases{
tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, nil),
runPolicy: &kubeflowv1.RunPolicy{
TTLSecondsAfterFinished: nil,
Suspend: ptr.To(true),
},
jobStatus: kubeflowv1.JobStatus{
CompletionTime: nil,
},
wantTFJobIsRemoved: false,
wantErr: false,
}),
Entry("No error with TTL is set and completionTime is nil, if suspended", &cleanUpCases{
tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](10)),
runPolicy: &kubeflowv1.RunPolicy{
TTLSecondsAfterFinished: ptr.To[int32](10),
Suspend: ptr.To(true),
},
jobStatus: kubeflowv1.JobStatus{
CompletionTime: nil,
},
wantTFJobIsRemoved: false,
wantErr: false,
}),
Entry("Error is occurred since completionTime is nil", &cleanUpCases{
tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](10)),
runPolicy: &kubeflowv1.RunPolicy{
Expand Down

0 comments on commit cb83d14

Please sign in to comment.