[feature](replica) Add drop replica safely on backend (#30303)

This commit is contained in:
yujun
2024-01-25 09:32:23 +08:00
committed by yiguolei
parent 49f879f8fd
commit 3d22f9cfc8
14 changed files with 172 additions and 22 deletions

View File

@ -33,7 +33,7 @@ import java.util.Map;
* Required:
* "tablet_id" = "10010",
* "backend_id" = "10001"
* "status" = "bad"/"ok"
* "status" = "drop"/"bad"/"ok"
*/
public class AdminSetReplicaStatusStmt extends DdlStmt {
@ -81,7 +81,7 @@ public class AdminSetReplicaStatusStmt extends DdlStmt {
}
} else if (key.equalsIgnoreCase(STATUS)) {
status = ReplicaStatus.valueOf(val.toUpperCase());
if (status != ReplicaStatus.BAD && status != ReplicaStatus.OK) {
if (status != ReplicaStatus.BAD && status != ReplicaStatus.OK && status != ReplicaStatus.DROP) {
throw new AnalysisException("Do not support setting replica status as " + val);
}
} else {

View File

@ -40,7 +40,7 @@ public class ShowReplicaStatusStmt extends ShowStmt {
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
.add("TabletId").add("ReplicaId").add("BackendId").add("Version").add("LastFailedVersion")
.add("LastSuccessVersion").add("CommittedVersion").add("SchemaHash").add("VersionNum")
.add("IsBad").add("State").add("Status")
.add("IsBad").add("IsUserDrop").add("State").add("Status")
.build();
private TableRef tblRef;

View File

@ -5570,6 +5570,7 @@ public class Env {
throw new MetaNotFoundException("replica does not exist on backend, beId=" + backendId);
}
if (status == ReplicaStatus.BAD || status == ReplicaStatus.OK) {
replica.setUserDrop(false);
if (replica.setBad(status == ReplicaStatus.BAD)) {
if (!isReplay) {
SetReplicaStatusOperationLog log = new SetReplicaStatusOperationLog(backendId, tabletId,
@ -5579,6 +5580,10 @@ public class Env {
LOG.info("set replica {} of tablet {} on backend {} as {}. is replay: {}", replica.getId(),
tabletId, backendId, status, isReplay);
}
} else if (status == ReplicaStatus.DROP) {
replica.setUserDrop(true);
LOG.info("set replica {} of tablet {} on backend {} as {}.", replica.getId(),
tabletId, backendId, status);
}
} finally {
table.writeUnlock();

View File

@ -93,6 +93,8 @@ public class MetadataViewer {
} else if (replica.getSchemaHash() != -1 && replica.getSchemaHash() != schemaHash) {
status = ReplicaStatus.SCHEMA_ERROR;
} else if (replica.isUserDrop()) {
status = ReplicaStatus.DROP;
}
if (filterReplica(status, statusFilter, op)) {
@ -109,6 +111,7 @@ public class MetadataViewer {
row.add(String.valueOf(replica.getSchemaHash()));
row.add(String.valueOf(replica.getVersionCount()));
row.add(String.valueOf(replica.isBad()));
row.add(String.valueOf(replica.isUserDrop()));
row.add(replica.getState().name());
row.add(status.name());
result.add(row);
@ -131,6 +134,7 @@ public class MetadataViewer {
row.add("-1");
row.add(FeConstants.null_string);
row.add(FeConstants.null_string);
row.add(FeConstants.null_string);
row.add(ReplicaStatus.MISSING.name());
result.add(row);
}

View File

@ -70,7 +70,8 @@ public class Replica implements Writable {
VERSION_ERROR, // missing version
MISSING, // replica does not exist
SCHEMA_ERROR, // replica's schema hash does not equal to index's schema hash
BAD // replica is broken.
BAD, // replica is broken.
DROP, // user force drop replica on this backend
}
@SerializedName(value = "id")
@ -159,6 +160,8 @@ public class Replica implements Writable {
private long preWatermarkTxnId = -1;
private long postWatermarkTxnId = -1;
private long userDropTime = -1;
public Replica() {
}
@ -760,9 +763,33 @@ public class Replica implements Writable {
return postWatermarkTxnId;
}
public void setUserDrop(boolean isDrop) {
if (isDrop) {
userDropTime = System.currentTimeMillis();
} else {
userDropTime = -1;
}
}
public boolean isAlive() {
return getState() != ReplicaState.CLONE
&& getState() != ReplicaState.DECOMMISSION
&& !isBad();
}
public boolean isUserDrop() {
if (userDropTime > 0) {
if (System.currentTimeMillis() - userDropTime < Config.manual_drop_replica_valid_second * 1000L) {
return true;
}
userDropTime = -1;
}
return false;
}
public boolean isScheduleAvailable() {
return Env.getCurrentSystemInfo().checkBackendScheduleAvailable(backendId)
&& !isUserDrop();
}
}

View File

@ -498,7 +498,7 @@ public class Tablet extends MetaObject implements Writable {
aliveAndVersionComplete++;
}
if (backend.isScheduleAvailable()) {
if (replica.isScheduleAvailable()) {
if (replica.needFurtherRepair() && (needFurtherRepairReplica == null || !versionCompleted)) {
needFurtherRepairReplica = replica;
}

View File

@ -690,10 +690,14 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
continue;
}
Backend be = infoService.getBackend(replica.getBackendId());
if (be == null || !be.isScheduleAvailable()) {
LOG.debug("replica's backend {} does not exist or is not scheduler available, skip. tablet: {}",
replica.getBackendId(), tabletId);
if (!replica.isScheduleAvailable()) {
if (Env.getCurrentSystemInfo().checkBackendScheduleAvailable(replica.getBackendId())) {
LOG.debug("replica's backend {} does not exist or is not scheduler available, skip. tablet: {}",
replica.getBackendId(), tabletId);
} else {
LOG.debug("user drop replica {}, skip. tablet: {}",
replica, tabletId);
}
continue;
}

View File

@ -734,7 +734,7 @@ public class TabletScheduler extends MasterDaemon {
Map<Tag, Short> currentAllocMap = Maps.newHashMap();
for (Replica replica : replicas) {
Backend be = infoService.getBackend(replica.getBackendId());
if (be != null && be.isScheduleAvailable() && replica.isAlive() && !replica.tooSlow()
if (replica.isScheduleAvailable() && replica.isAlive() && !replica.tooSlow()
&& be.isMixNode()) {
Short num = currentAllocMap.getOrDefault(be.getLocationTag(), (short) 0);
currentAllocMap.put(be.getLocationTag(), (short) (num + 1));
@ -888,8 +888,9 @@ public class TabletScheduler extends MasterDaemon {
// this case should be handled in deleteBackendDropped()
continue;
}
if (!be.isScheduleAvailable()) {
deleteReplicaInternal(tabletCtx, replica, "backend unavailable", force);
if (!replica.isScheduleAvailable()) {
String reason = be.isScheduleAvailable() ? "backend unavailable" : "user drop replica";
deleteReplicaInternal(tabletCtx, replica, reason, force);
return true;
}
}

View File

@ -42,6 +42,7 @@ public class ReplicasProcNode implements ProcNodeInterface {
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>().add("ReplicaId")
.add("BackendId").add("Version").add("LstSuccessVersion").add("LstFailedVersion").add("LstFailedTime")
.add("SchemaHash").add("LocalDataSize").add("RemoteDataSize").add("RowCount").add("State").add("IsBad")
.add("IsUserDrop")
.add("VersionCount").add("PathHash").add("MetaUrl").add("CompactionStatus").add("CooldownReplicaId")
.add("CooldownMetaId").add("QueryHits").build();
@ -103,6 +104,7 @@ public class ReplicasProcNode implements ProcNodeInterface {
String.valueOf(replica.getRowCount()),
String.valueOf(replica.getState()),
String.valueOf(replica.isBad()),
String.valueOf(replica.isUserDrop()),
String.valueOf(replica.getVersionCount()),
String.valueOf(replica.getPathHash()),
metaUrl,

View File

@ -149,9 +149,10 @@ public class Diagnoser {
+ replica.getLastFailedVersion());
}
// status
if (!replica.isAlive()) {
if (!replica.isAlive() || replica.isUserDrop()) {
statusErr.append("Replica on backend " + replica.getBackendId() + "'s state is " + replica.getState()
+ ", and is bad: " + (replica.isBad() ? "Yes" : "No"));
+ ", and is bad: " + (replica.isBad() ? "Yes" : "No")
+ ", and is going to drop: " + (replica.isUserDrop() ? "Yes" : "No"));
}
if (replica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow) {
compactionErr.append("Replica on backend " + replica.getBackendId() + "'s version count is too high: "