[improvement](tablet clone) furthur repair replicas should be check even if they are versions catchup (#25551)

This commit is contained in:
yujun
2023-10-26 18:14:40 +08:00
committed by GitHub
parent c3527672a5
commit 2679fa4ea7
7 changed files with 201 additions and 41 deletions

View File

@ -21,6 +21,7 @@ import org.apache.doris.catalog.TabletInvertedIndex;
import org.apache.doris.clone.BackendLoadStatistic.Classification;
import org.apache.doris.clone.BackendLoadStatistic.LoadScore;
import org.apache.doris.common.Config;
import org.apache.doris.common.util.DebugPointUtil;
import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.resource.Tag;
import org.apache.doris.system.Backend;
@ -186,35 +187,58 @@ public class LoadStatisticForTag {
int lowCounter = 0;
int midCounter = 0;
int highCounter = 0;
long debugHighBeId = DebugPointUtil.getDebugParamOrDefault("FE.HIGH_LOAD_BE_ID", -1L);
if (debugHighBeId > 0) {
final long targetBeId = debugHighBeId; // debugHighBeId can not put in lambda cause it's updated later
if (!beLoadStatistics.stream().anyMatch(it -> it.getBeId() == targetBeId)) {
debugHighBeId = -1L;
}
}
for (BackendLoadStatistic beStat : beLoadStatistics) {
if (!beStat.hasMedium(medium)) {
continue;
}
if (Config.be_rebalancer_fuzzy_test) {
Classification clazz = Classification.MID;
if (debugHighBeId > 0) {
if (beStat.getBeId() == debugHighBeId) {
clazz = Classification.HIGH;
} else {
clazz = Classification.LOW;
}
} else if (Config.be_rebalancer_fuzzy_test) {
if (beStat.getLoadScore(medium) > avgLoadScore) {
beStat.setClazz(medium, Classification.HIGH);
highCounter++;
clazz = Classification.HIGH;
} else if (beStat.getLoadScore(medium) < avgLoadScore) {
beStat.setClazz(medium, Classification.LOW);
lowCounter++;
clazz = Classification.LOW;
}
} else {
if (Math.abs(beStat.getLoadScore(medium) - avgLoadScore) / avgLoadScore
> Config.balance_load_score_threshold) {
if (beStat.getLoadScore(medium) > avgLoadScore) {
beStat.setClazz(medium, Classification.HIGH);
highCounter++;
clazz = Classification.HIGH;
} else if (beStat.getLoadScore(medium) < avgLoadScore) {
beStat.setClazz(medium, Classification.LOW);
lowCounter++;
clazz = Classification.LOW;
}
} else {
beStat.setClazz(medium, Classification.MID);
midCounter++;
}
}
beStat.setClazz(medium, clazz);
switch (clazz) {
case HIGH:
highCounter++;
break;
case LOW:
lowCounter++;
break;
case MID:
midCounter++;
break;
default:
break;
}
}
LOG.debug("classify backend by load. medium: {} avg load score: {}. low/mid/high: {}/{}/{}",
@ -265,6 +289,11 @@ public class LoadStatisticForTag {
return false;
}
long debugHighBeId = DebugPointUtil.getDebugParamOrDefault("FE.HIGH_LOAD_BE_ID", -1L);
if (srcBeStat.getBeId() == debugHighBeId) {
return true;
}
currentSrcBeScore = srcBeStat.getLoadScore(medium);
currentDestBeScore = destBeStat.getLoadScore(medium);

View File

@ -666,6 +666,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
List<Replica> decommissionCand = Lists.newArrayList();
List<Replica> colocateCand = Lists.newArrayList();
List<Replica> notColocateCand = Lists.newArrayList();
List<Replica> furtherRepairs = Lists.newArrayList();
for (Replica replica : tablet.getReplicas()) {
if (replica.isBad()) {
LOG.debug("replica {} is bad, skip. tablet: {}",
@ -688,6 +689,11 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
if (replica.getLastFailedVersion() <= 0
&& replica.getVersion() >= visibleVersion) {
if (tabletStatus == TabletStatus.NEED_FURTHER_REPAIR && replica.needFurtherRepair()) {
furtherRepairs.add(replica);
}
// skip healthy replica
LOG.debug("replica {} version {} is healthy, visible version {}, replica state {}, skip. tablet: {}",
replica.getId(), replica.getVersion(), visibleVersion, replica.getState(), tabletId);
@ -709,8 +715,24 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
} else {
candidates = decommissionCand;
}
if (candidates.isEmpty()) {
throw new SchedException(Status.UNRECOVERABLE, "unable to choose dest replica");
if (furtherRepairs.isEmpty()) {
throw new SchedException(Status.UNRECOVERABLE, "unable to choose dest replica");
}
boolean allCatchup = true;
for (Replica replica : furtherRepairs) {
if (checkFurthurRepairFinish(replica, visibleVersion)) {
replica.setNeedFurtherRepair(false);
replica.setFurtherRepairWatermarkTxnTd(-1);
} else {
allCatchup = false;
}
}
throw new SchedException(Status.FINISHED,
allCatchup ? "further repair all catchup" : "further repair waiting catchup");
}
Replica chosenReplica = null;
@ -782,6 +804,32 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
setDest(chosenReplica.getBackendId(), chosenReplica.getPathHash());
}
private boolean checkFurthurRepairFinish(Replica replica, long version) {
if (replica.getVersion() < version || replica.getLastFailedVersion() > 0) {
return false;
}
long furtherRepairWatermarkTxnTd = replica.getFurtherRepairWatermarkTxnTd();
if (furtherRepairWatermarkTxnTd < 0) {
return true;
}
try {
if (Env.getCurrentGlobalTransactionMgr().isPreviousTransactionsFinished(
furtherRepairWatermarkTxnTd, dbId, tblId, partitionId)) {
LOG.info("replica {} of tablet {} has catchup with further repair watermark id {}",
replica, tabletId, furtherRepairWatermarkTxnTd);
return true;
} else {
return false;
}
} catch (Exception e) {
LOG.warn("replica {} of tablet {} check catchup with further repair watermark id {} failed",
replica, tabletId, furtherRepairWatermarkTxnTd, e);
return true;
}
}
public void releaseResource(TabletScheduler tabletScheduler) {
releaseResource(tabletScheduler, false);
}
@ -1104,25 +1152,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
// change from prepare to committed or visible, this replica will be fall behind and be removed
// in REDUNDANT detection.
//
boolean isCatchup = false;
if (replica.getVersion() >= partition.getVisibleVersion() && replica.getLastFailedVersion() < 0) {
long furtherRepairWatermarkTxnTd = replica.getFurtherRepairWatermarkTxnTd();
if (furtherRepairWatermarkTxnTd < 0) {
isCatchup = true;
} else {
try {
if (Env.getCurrentGlobalTransactionMgr().isPreviousTransactionsFinished(
furtherRepairWatermarkTxnTd, dbId, tblId, partitionId)) {
isCatchup = true;
LOG.info("new replica {} of tablet {} has catchup with further repair watermark id {}",
replica, tabletId, furtherRepairWatermarkTxnTd);
}
} catch (Exception e) {
isCatchup = true;
}
}
}
boolean isCatchup = checkFurthurRepairFinish(replica, partition.getVisibleVersion());
replica.incrFurtherRepairCount();
if (isCatchup || replica.getLeftFurtherRepairCount() <= 0) {
replica.setNeedFurtherRepair(false);

View File

@ -46,6 +46,7 @@ import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.common.util.DebugPointUtil;
import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.persist.ReplicaPersistInfo;
import org.apache.doris.resource.Tag;
@ -983,6 +984,7 @@ public class TabletScheduler extends MasterDaemon {
boolean force, LoadStatisticForTag statistic) throws SchedException {
Replica chosenReplica = null;
double maxScore = 0;
long debugHighBeId = DebugPointUtil.getDebugParamOrDefault("FE.HIGH_LOAD_BE_ID", -1L);
for (Replica replica : replicas) {
BackendLoadStatistic beStatistic = statistic.getBackendLoadStatistic(replica.getBackendId());
if (beStatistic == null) {
@ -1007,6 +1009,11 @@ public class TabletScheduler extends MasterDaemon {
maxScore = loadScore;
chosenReplica = replica;
}
if (debugHighBeId > 0 && replica.getBackendId() == debugHighBeId) {
chosenReplica = replica;
break;
}
}
if (chosenReplica != null) {
@ -1535,6 +1542,16 @@ public class TabletScheduler extends MasterDaemon {
statusPair.second = tabletCtx.getPriority();
}
}
if (statusPair.first == TabletStatus.NEED_FURTHER_REPAIR) {
// replica is just waiting for finishing txns before furtherRepairWatermarkTxnTd,
// no need to add it immediately
Replica replica = tablet.getReplicaByBackendId(tabletCtx.getDestBackendId());
if (replica != null && replica.getVersion() >= partition.getVisibleVersion()
&& replica.getLastFailedVersion() < 0) {
return;
}
}
} finally {
tbl.readUnlock();
}

View File

@ -30,6 +30,9 @@ import java.util.concurrent.atomic.AtomicInteger;
/**
* Use for manage debug points.
*
* usage example can see DebugPointUtilTest.java
*
**/
public class DebugPointUtil {
private static final Logger LOG = LogManager.getLogger(DebugPointUtil.class);
@ -109,11 +112,34 @@ public class DebugPointUtil {
return debugPoint;
}
// if not enable debug point or its params not contains `key`, then return `defaultValue`
// url: /api/debug_point/add/name?k1=v1&k2=v2&...
public static <E> E getDebugParamOrDefault(String debugPointName, String key, E defaultValue) {
DebugPoint debugPoint = getDebugPoint(debugPointName);
return debugPoint != null ? debugPoint.param(key, defaultValue) : defaultValue;
}
// url: /api/debug_point/add/name?value=v
public static <E> E getDebugParamOrDefault(String debugPointName, E defaultValue) {
return getDebugParamOrDefault(debugPointName, "value", defaultValue);
}
public static void addDebugPoint(String name, DebugPoint debugPoint) {
debugPoints.put(name, debugPoint);
LOG.info("add debug point: name={}, params={}", name, debugPoint.params);
}
public static void addDebugPoint(String name) {
addDebugPoint(name, new DebugPoint());
}
public static <E> void addDebugPointWithValue(String name, E value) {
DebugPoint debugPoint = new DebugPoint();
debugPoint.params.put("value", String.format("%s", value));
addDebugPoint(name, debugPoint);
}
public static void removeDebugPoint(String name) {
DebugPoint debugPoint = debugPoints.remove(name);
LOG.info("remove debug point: name={}, exists={}", name, debugPoint != null);