Skip to content

Commit

Permalink
[YUNIKORN-2884] Task fail with post allocated but the pod will keep p…
Browse files Browse the repository at this point in the history
…ending
  • Loading branch information
zhuqi-lucas committed Sep 27, 2024
1 parent c959d23 commit 89171d1
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
11 changes: 5 additions & 6 deletions pkg/cache/scheduler_callback.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,11 @@ func (callback *AsyncRMCallback) UpdateAllocation(response *si.AllocationRespons
// update cache
callback.context.ForgetPod(release.GetAllocationKey())

// TerminationType 0 mean STOPPED_BY_RM
if release.TerminationType != si.TerminationType_STOPPED_BY_RM {
// send release app allocation to application states machine
ev := NewReleaseAppAllocationEvent(release.ApplicationID, release.TerminationType, release.AllocationKey)
dispatcher.Dispatch(ev)
}
// TerminationType 0 mean STOPPED_BY_RM, but we also need to do the release when task failed,
// we also should send release event to application in case task failed but the pod is still pending.
// send release app allocation to application states machine
ev := NewReleaseAppAllocationEvent(release.ApplicationID, release.TerminationType, release.AllocationKey)
dispatcher.Dispatch(ev)
}

return nil
Expand Down
4 changes: 3 additions & 1 deletion pkg/cache/scheduler_callback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,9 @@ func TestUpdateAllocation_AllocationReleased_StoppedByRM(t *testing.T) {
assert.NilError(t, err, "error updating allocation")
assert.Assert(t, !context.schedulerCache.IsAssumedPod(taskUID1))
err = utils.WaitForCondition(deleteCalled.Load, 10*time.Millisecond, 500*time.Millisecond)
assert.Error(t, err, "timeout waiting for condition") // pod is not expected to be deleted
// Pod should be deleted, because TerminationType_STOPPED_BY_RM will also be called when task fail.
// If we don't delete the pod, the pod will be stuck in pending state.
assert.NilError(t, err, "pod has not been deleted")
}

func TestUpdateApplication_Accepted(t *testing.T) {
Expand Down

0 comments on commit 89171d1

Please sign in to comment.