cherry pick from #36795
This commit is contained in:
@ -1867,10 +1867,13 @@ public class TabletScheduler extends MasterDaemon {
|
||||
* If task is timeout, remove the tablet.
|
||||
*/
|
||||
public void handleRunningTablets() {
|
||||
Set<Long> aliveBeIds = Sets.newHashSet(Env.getCurrentSystemInfo().getAllBackendIds(true));
|
||||
// 1. remove the tablet ctx if timeout
|
||||
List<TabletSchedCtx> cancelTablets = Lists.newArrayList();
|
||||
synchronized (this) {
|
||||
for (TabletSchedCtx tabletCtx : runningTablets.values()) {
|
||||
long srcBeId = tabletCtx.getSrcBackendId();
|
||||
long destBeId = tabletCtx.getDestBackendId();
|
||||
if (Config.disable_tablet_scheduler) {
|
||||
tabletCtx.setErrMsg("tablet scheduler is disabled");
|
||||
cancelTablets.add(tabletCtx);
|
||||
@ -1881,6 +1884,12 @@ public class TabletScheduler extends MasterDaemon {
|
||||
tabletCtx.setErrMsg("timeout");
|
||||
cancelTablets.add(tabletCtx);
|
||||
stat.counterCloneTaskTimeout.incrementAndGet();
|
||||
} else if (destBeId > 0 && !aliveBeIds.contains(destBeId)) {
|
||||
tabletCtx.setErrMsg("dest be " + destBeId + " is dead");
|
||||
cancelTablets.add(tabletCtx);
|
||||
} else if (srcBeId > 0 && !aliveBeIds.contains(srcBeId)) {
|
||||
tabletCtx.setErrMsg("src be " + srcBeId + " is dead");
|
||||
cancelTablets.add(tabletCtx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -134,12 +134,18 @@ public class DebugPointUtil {
|
||||
addDebugPoint(name, new DebugPoint());
|
||||
}
|
||||
|
||||
public static <E> void addDebugPointWithValue(String name, E value) {
|
||||
public static void addDebugPointWithParams(String name, Map<String, String> params) {
|
||||
DebugPoint debugPoint = new DebugPoint();
|
||||
debugPoint.params.put("value", String.format("%s", value));
|
||||
debugPoint.params = params;
|
||||
addDebugPoint(name, debugPoint);
|
||||
}
|
||||
|
||||
public static <E> void addDebugPointWithValue(String name, E value) {
|
||||
Map<String, String> params = Maps.newHashMap();
|
||||
params.put("value", String.format("%s", value));
|
||||
addDebugPointWithParams(name, params);
|
||||
}
|
||||
|
||||
public static void removeDebugPoint(String name) {
|
||||
DebugPoint debugPoint = debugPoints.remove(name);
|
||||
LOG.info("remove debug point: name={}, exists={}", name, debugPoint != null);
|
||||
|
||||
@ -24,6 +24,7 @@ import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.FeConstants;
|
||||
import org.apache.doris.common.ThreadPoolManager;
|
||||
import org.apache.doris.common.Version;
|
||||
import org.apache.doris.common.util.DebugPointUtil;
|
||||
import org.apache.doris.common.util.MasterDaemon;
|
||||
import org.apache.doris.persist.HbPackage;
|
||||
import org.apache.doris.resource.Tag;
|
||||
@ -56,6 +57,7 @@ import com.google.common.collect.Maps;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.Callable;
|
||||
@ -253,6 +255,14 @@ public class HeartbeatMgr extends MasterDaemon {
|
||||
result.setBackendInfo(backendInfo);
|
||||
}
|
||||
|
||||
String debugDeadBeIds = DebugPointUtil.getDebugParamOrDefault(
|
||||
"HeartbeatMgr.BackendHeartbeatHandler", "deadBeIds", "");
|
||||
if (!Strings.isNullOrEmpty(debugDeadBeIds)
|
||||
&& Arrays.stream(debugDeadBeIds.split(",")).anyMatch(id -> Long.parseLong(id) == backendId)) {
|
||||
result.getStatus().setStatusCode(TStatusCode.INTERNAL_ERROR);
|
||||
result.getStatus().addToErrorMsgs("debug point HeartbeatMgr.deadBeIds set dead be");
|
||||
}
|
||||
|
||||
ok = true;
|
||||
if (result.getStatus().getStatusCode() == TStatusCode.OK) {
|
||||
TBackendInfo tBackendInfo = result.getBackendInfo();
|
||||
|
||||
Reference in New Issue
Block a user