Skip to content

Commit

Permalink
[improve](routine load) delay schedule EOF tasks to avoid too many sm…
Browse files Browse the repository at this point in the history
…all transactions (#39975)

We encountered a scenario where a large number of small transactions
were generated, resulting in an impact on query performance:
Kafka's data comes in batches of very small data every very short time,
which leads to tasks being frequently scheduled and ending very quickly,
resulting in a large number of small transactions.

To solve this problem, we delay the scheduling of tasks that perceive
EOF, which would not delay data consumption, for perceiving EOF
indicates that the consumption speed is greater than the production
speed.
  • Loading branch information
sollhui committed Sep 7, 2024
1 parent 2b568b3 commit b665ddf
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public KafkaTaskInfo(KafkaTaskInfo kafkaTaskInfo, Map<Integer, Long> partitionId
kafkaTaskInfo.getTimeoutMs(), kafkaTaskInfo.getTimeoutBackOffCount(),
kafkaTaskInfo.getBeId(), isMultiTable);
this.partitionIdToOffset = partitionIdToOffset;
this.isEof = kafkaTaskInfo.getIsEof();
}

public List<Integer> getPartitions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1247,7 +1247,7 @@ private void executeTaskOnTxnStatusChanged(RoutineLoadTaskInfo routineLoadTaskIn
} else if (checkCommitInfo(rlTaskTxnCommitAttachment, txnState, txnStatusChangeReason)) {
// step2: update job progress
updateProgress(rlTaskTxnCommitAttachment);
routineLoadTaskInfo.selfAdaptTimeout(rlTaskTxnCommitAttachment);
routineLoadTaskInfo.handleTaskByTxnCommitAttachment(rlTaskTxnCommitAttachment);
}

if (rlTaskTxnCommitAttachment != null && !Strings.isNullOrEmpty(rlTaskTxnCommitAttachment.getErrorLogUrl())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ public abstract class RoutineLoadTaskInfo {
protected static final int MAX_TIMEOUT_BACK_OFF_COUNT = 3;
protected int timeoutBackOffCount = 0;

protected boolean isEof = false;

// this status will be set when corresponding transaction's status is changed.
// so that user or other logic can know the status of the corresponding txn.
protected TransactionStatus txnStatus = TransactionStatus.UNKNOWN;
Expand Down Expand Up @@ -167,6 +169,10 @@ public int getTimeoutBackOffCount() {
return timeoutBackOffCount;
}

public boolean getIsEof() {
return isEof;
}

public boolean isTimeout() {
if (txnStatus == TransactionStatus.COMMITTED || txnStatus == TransactionStatus.VISIBLE) {
// the corresponding txn is already finished, this task can not be treated as timeout.
Expand All @@ -181,7 +187,12 @@ public boolean isTimeout() {
return false;
}

public void selfAdaptTimeout(RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment) {
public void handleTaskByTxnCommitAttachment(RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment) {
selfAdaptTimeout(rlTaskTxnCommitAttachment);
judgeEof(rlTaskTxnCommitAttachment);
}

private void selfAdaptTimeout(RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment) {
long taskExecutionTime = rlTaskTxnCommitAttachment.getTaskExecutionTimeMs();
long timeoutMs = this.timeoutMs;

Expand All @@ -196,6 +207,15 @@ public void selfAdaptTimeout(RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment
this.timeoutMs = timeoutMs;
}

private void judgeEof(RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment) {
RoutineLoadJob routineLoadJob = routineLoadManager.getJob(jobId);
if (rlTaskTxnCommitAttachment.getTotalRows() < routineLoadJob.getMaxBatchRows()
&& rlTaskTxnCommitAttachment.getReceivedBytes() < routineLoadJob.getMaxBatchSizeBytes()
&& rlTaskTxnCommitAttachment.getTaskExecutionTimeMs() < this.timeoutMs) {
this.isEof = true;
}
}

abstract TRoutineLoadTask createRoutineLoadTask() throws UserException;

// begin the txn of this task
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,15 @@ private void process() throws UserException, InterruptedException {
try {
// This step will be blocked when queue is empty
RoutineLoadTaskInfo routineLoadTaskInfo = needScheduleTasksQueue.take();
if (System.currentTimeMillis() - routineLoadTaskInfo.getLastScheduledTime()
< routineLoadTaskInfo.getTimeoutMs()) {
// try to delay scheduling this task for 'timeout', to void too many failure
needScheduleTasksQueue.addLast(routineLoadTaskInfo);
return;
// try to delay scheduling tasks that are perceived as Eof to MaxBatchInterval
// to avoid to much small transaction
if (routineLoadTaskInfo.getIsEof()) {
RoutineLoadJob routineLoadJob = routineLoadManager.getJob(routineLoadTaskInfo.getJobId());
if (System.currentTimeMillis() - routineLoadTaskInfo.getLastScheduledTime()
< routineLoadJob.getMaxBatchIntervalS()) {
needScheduleTasksQueue.addLast(routineLoadTaskInfo);
return;
}
}
scheduleOneTask(routineLoadTaskInfo);
} catch (Exception e) {
Expand All @@ -114,6 +118,7 @@ private void process() throws UserException, InterruptedException {
}

private void scheduleOneTask(RoutineLoadTaskInfo routineLoadTaskInfo) throws Exception {
routineLoadTaskInfo.setLastScheduledTime(System.currentTimeMillis());
if (LOG.isDebugEnabled()) {
LOG.debug("schedule routine load task info {} for job {}",
routineLoadTaskInfo.id, routineLoadTaskInfo.getJobId());
Expand Down

0 comments on commit b665ddf

Please sign in to comment.