Skip to content

Commit

Permalink
[enhance](blacklist) seperate blacklist conf from heartbeat (#28638)
Browse files Browse the repository at this point in the history
There is a circuit breaker lasting for 2 minutes in grpc, then if a be is down and up again, send fragments to the be fails lasting for 2 minutes.
  • Loading branch information
dataroaring authored Dec 20, 2023
1 parent 18ad856 commit a443a39
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 10 deletions.
14 changes: 14 additions & 0 deletions fe/fe-common/src/main/java/org/apache/doris/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -1782,6 +1782,20 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, masterOnly = true)
public static long max_backend_heartbeat_failure_tolerance_count = 1;

/**
* Heartbeat interval in seconds.
* Default is 10, which means every 10 seconds, the master will send a heartbeat to all backends.
*/
@ConfField(mutable = false, masterOnly = false)
public static int heartbeat_interval_second = 10;

/**
* After a backend is marked as unavailable, it will be added to blacklist.
* Default is 120.
*/
@ConfField(mutable = true, masterOnly = false)
public static int blacklist_duration_second = 120;

@ConfField(mutable = true, masterOnly = false, description = {
"禁止创建odbc, mysql, broker类型的外表", "Disallow the creation of odbc, mysql, broker type external tables"})
public static boolean enable_odbc_mysql_broker_table = false;
Expand Down
2 changes: 1 addition & 1 deletion fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -2424,7 +2424,7 @@ protected void runAfterCatalogReady() {
}

public void createFeDiskUpdater() {
feDiskUpdater = new Daemon("feDiskUpdater", FeConstants.heartbeat_interval_second * 1000L) {
feDiskUpdater = new Daemon("feDiskUpdater", Config.heartbeat_interval_second * 1000L) {
@Override
protected void runOneCycle() {
ExecuteEnv.getInstance().refreshAndGetDiskInfo(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

public class ClientPool {
static GenericKeyedObjectPoolConfig heartbeatConfig = new GenericKeyedObjectPoolConfig();
static int heartbeatTimeoutMs = FeConstants.heartbeat_interval_second * 1000;
static int heartbeatTimeoutMs = Config.heartbeat_interval_second * 1000;

static GenericKeyedObjectPoolConfig backendConfig = new GenericKeyedObjectPoolConfig();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ public class FeConstants {
public static int shortkey_max_column_count = 3;
public static int shortkey_maxsize_bytes = 36;

public static int heartbeat_interval_second = 5;
public static int checkpoint_interval_second = 60; // 1 minutes

// dpp version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import org.apache.doris.catalog.Env;
import org.apache.doris.common.Config;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.common.Reference;
import org.apache.doris.common.UserException;
Expand Down Expand Up @@ -176,7 +175,7 @@ public static void addToBlacklist(Long backendID, String reason) {
return;
}

blacklistBackends.put(backendID, Pair.of(FeConstants.heartbeat_interval_second + 1, reason));
blacklistBackends.put(backendID, Pair.of(Config.blacklist_duration_second + 1, reason));
LOG.warn("add backend {} to black list. reason: {}", backendID, reason);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public class HeartbeatMgr extends MasterDaemon {
private static volatile AtomicReference<TMasterInfo> masterInfo = new AtomicReference<>();

public HeartbeatMgr(SystemInfoService nodeMgr, boolean needRegisterMetric) {
super("heartbeat mgr", FeConstants.heartbeat_interval_second * 1000);
super("heartbeat mgr", Config.heartbeat_interval_second * 1000);
this.nodeMgr = nodeMgr;
this.executor = ThreadPoolManager.newDaemonFixedThreadPool(Config.heartbeat_mgr_threads_num,
Config.heartbeat_mgr_blocking_queue_size, "heartbeat-mgr-pool", needRegisterMetric);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.doris.qe;

import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Config;
import org.apache.doris.common.Reference;
import org.apache.doris.common.UserException;
import org.apache.doris.system.Backend;
Expand Down Expand Up @@ -47,7 +47,7 @@ public class SimpleSchedulerTest {
@BeforeClass
public static void setUp() {
SimpleScheduler.init();
FeConstants.heartbeat_interval_second = 2;
Config.heartbeat_interval_second = 2;
be1 = new Backend(1000L, "192.168.100.0", 9050);
be2 = new Backend(1001L, "192.168.100.1", 9050);
be3 = new Backend(1002L, "192.168.100.2", 9050);
Expand Down Expand Up @@ -139,7 +139,7 @@ public void run() {
t3.join();

Assert.assertFalse(SimpleScheduler.isAvailable(be1));
Thread.sleep((FeConstants.heartbeat_interval_second + 5) * 1000);
Thread.sleep((Config.heartbeat_interval_second + 5) * 1000);
Assert.assertTrue(SimpleScheduler.isAvailable(be1));
}

Expand Down Expand Up @@ -194,7 +194,7 @@ public void testGetHostAbnormal() throws UserException, InterruptedException {
System.out.println(e.getMessage());
}

Thread.sleep((FeConstants.heartbeat_interval_second + 5) * 1000);
Thread.sleep((Config.heartbeat_interval_second + 5) * 1000);
Assert.assertNotNull(SimpleScheduler.getHost(locations.get(0).backend_id, locations, backends, ref));
}
}

0 comments on commit a443a39

Please sign in to comment.