diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java index a675257fea9bb73..a5ff3710a1368b8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java @@ -245,9 +245,18 @@ public synchronized long write(short op, Writable writable) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("opCode = {}, journal size = {}", op, theData.getSize()); } + // Write the key value pair to bdb. boolean writeSucceed = false; - for (int i = 0; i < RETRY_TIME; i++) { + // ATTN: If all the followers exit except master, master should continue provide + // query service, so do not exit if the write operation is OP_TIMESTAMP. + // + // Because BDBJE will replicate the committed txns to FOLLOWERs after the connection + // resumed, directly reseting the next journal id and returning will cause subsequent + // txn written to the same journal ID not to be replayed by the FOLLOWERS. So for + // OP_TIMESTAMP operation, try to write until it succeeds here. + int retry_times = op == OperationType.OP_TIMESTAMP ? Integer.MAX_VALUE : RETRY_TIME; + for (int i = 0; i < retry_times; i++) { try { // Parameter null means auto commit if (currentJournalDB.put(null, theKey, theData) == OperationStatus.SUCCESS) { @@ -289,17 +298,6 @@ public synchronized long write(short op, Writable writable) throws IOException { } if (!writeSucceed) { - if (op == OperationType.OP_TIMESTAMP) { - /* - * Do not exit if the write operation is OP_TIMESTAMP. - * If all the followers exit except master, master should continue provide query - * service. - * To prevent master exit, we should exempt OP_TIMESTAMP write - */ - nextJournalId.set(id); - LOG.warn("master can not achieve quorum. write timestamp fail. but will not exit."); - return -1; - } String msg = "write bdb failed. will exit. journalId: " + id + ", bdb database Name: " + currentJournalDB.getDatabaseName(); LOG.error(msg);