Skip to content

Commit

Permalink
self-adaption backoff timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
sollhui committed Mar 21, 2024
1 parent 4779d14 commit c8a598b
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ public enum InternalErrorCode {
MANUAL_STOP_ERR(101),
TOO_MANY_FAILURE_ROWS_ERR(102),
CREATE_TASKS_ERR(103),
TASKS_ABORT_ERR(104);
TASKS_ABORT_ERR(104),
CANNOT_RESUME_ERR(105),
TIMEOUT_TOO_MUCH(106);

private long errCode;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,18 @@ public void processTimeoutTasks() {
// and after renew, the previous task is removed from routineLoadTaskInfoList,
// so task can no longer be committed successfully.
// the already committed task will not be handled here.
int timeoutBackOffCount = routineLoadTaskInfo.getTimeoutBackOffCount();
if (timeoutBackOffCount > RoutineLoadTaskInfo.MAX_TIMEOUT_BACK_OFF_COUNT) {
try {
updateState(JobState.PAUSED, new ErrorReason(InternalErrorCode.TIMEOUT_TOO_MUCH,
"task " + routineLoadTaskInfo.getId() + " timeout too much"), false);
} catch (UserException e) {
LOG.warn("update job state to pause failed", e);
}
return;
}
routineLoadTaskInfo.setTimeoutBackOffCount(timeoutBackOffCount + 1);
routineLoadTaskInfo.setTimeoutMs((routineLoadTaskInfo.getTimeoutMs() << 1));
RoutineLoadTaskInfo newTask = unprotectRenewTask(routineLoadTaskInfo);
Env.getCurrentEnv().getRoutineLoadTaskScheduler().addTaskInQueue(newTask);
}
Expand Down Expand Up @@ -1212,6 +1224,7 @@ private void executeTaskOnTxnStatusChanged(RoutineLoadTaskInfo routineLoadTaskIn
} else if (checkCommitInfo(rlTaskTxnCommitAttachment, txnState, txnStatusChangeReason)) {
// step2: update job progress
updateProgress(rlTaskTxnCommitAttachment);
routineLoadTaskInfo.selfAdaptTimeout(rlTaskTxnCommitAttachment);
}

if (rlTaskTxnCommitAttachment != null && !Strings.isNullOrEmpty(rlTaskTxnCommitAttachment.getErrorLogUrl())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ public abstract class RoutineLoadTaskInfo {

protected boolean isMultiTable = false;

protected static final int MAX_TIMEOUT_BACK_OFF_COUNT = 3;
protected int timeoutBackOffCount = 0;

// this status will be set when corresponding transaction's status is changed.
// so that user or other logic can know the status of the corresponding txn.
protected TransactionStatus txnStatus = TransactionStatus.UNKNOWN;
Expand Down Expand Up @@ -136,6 +139,10 @@ public void setLastScheduledTime(long lastScheduledTime) {
this.lastScheduledTime = lastScheduledTime;
}

public void setTimeoutMs(long timeoutMs) {
this.timeoutMs = timeoutMs;
}

public long getTimeoutMs() {
return timeoutMs;
}
Expand All @@ -148,6 +155,14 @@ public TransactionStatus getTxnStatus() {
return txnStatus;
}

public void setTimeoutBackOffCount(int timeoutBackOffCount) {
this.timeoutBackOffCount = timeoutBackOffCount;
}

public int getTimeoutBackOffCount() {
return timeoutBackOffCount;
}

public boolean isTimeout() {
if (txnStatus == TransactionStatus.COMMITTED || txnStatus == TransactionStatus.VISIBLE) {
// the corresponding txn is already finished, this task can not be treated as timeout.
Expand All @@ -162,6 +177,21 @@ public boolean isTimeout() {
return false;
}

public void selfAdaptTimeout(RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment) {
long taskExecutionTime = rlTaskTxnCommitAttachment.getTaskExecutionTimeMs();
long timeoutMs = this.timeoutMs;

while (this.timeoutBackOffCount > 0) {
timeoutMs = timeoutMs >> 1;
if (timeoutMs <= taskExecutionTime) {
this.timeoutMs = timeoutMs << 1;
return;
}
this.timeoutBackOffCount--;
}
this.timeoutMs = timeoutMs;
}

abstract TRoutineLoadTask createRoutineLoadTask() throws UserException;

// begin the txn of this task
Expand Down

0 comments on commit c8a598b

Please sign in to comment.