[feature](replica) Add drop replica safely on backend (#30303)
This commit is contained in:
@ -33,7 +33,7 @@ import java.util.Map;
|
||||
* Required:
|
||||
* "tablet_id" = "10010",
|
||||
* "backend_id" = "10001"
|
||||
* "status" = "bad"/"ok"
|
||||
* "status" = "drop"/"bad"/"ok"
|
||||
*/
|
||||
public class AdminSetReplicaStatusStmt extends DdlStmt {
|
||||
|
||||
@ -81,7 +81,7 @@ public class AdminSetReplicaStatusStmt extends DdlStmt {
|
||||
}
|
||||
} else if (key.equalsIgnoreCase(STATUS)) {
|
||||
status = ReplicaStatus.valueOf(val.toUpperCase());
|
||||
if (status != ReplicaStatus.BAD && status != ReplicaStatus.OK) {
|
||||
if (status != ReplicaStatus.BAD && status != ReplicaStatus.OK && status != ReplicaStatus.DROP) {
|
||||
throw new AnalysisException("Do not support setting replica status as " + val);
|
||||
}
|
||||
} else {
|
||||
|
||||
@ -40,7 +40,7 @@ public class ShowReplicaStatusStmt extends ShowStmt {
|
||||
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
|
||||
.add("TabletId").add("ReplicaId").add("BackendId").add("Version").add("LastFailedVersion")
|
||||
.add("LastSuccessVersion").add("CommittedVersion").add("SchemaHash").add("VersionNum")
|
||||
.add("IsBad").add("State").add("Status")
|
||||
.add("IsBad").add("IsUserDrop").add("State").add("Status")
|
||||
.build();
|
||||
|
||||
private TableRef tblRef;
|
||||
|
||||
@ -5570,6 +5570,7 @@ public class Env {
|
||||
throw new MetaNotFoundException("replica does not exist on backend, beId=" + backendId);
|
||||
}
|
||||
if (status == ReplicaStatus.BAD || status == ReplicaStatus.OK) {
|
||||
replica.setUserDrop(false);
|
||||
if (replica.setBad(status == ReplicaStatus.BAD)) {
|
||||
if (!isReplay) {
|
||||
SetReplicaStatusOperationLog log = new SetReplicaStatusOperationLog(backendId, tabletId,
|
||||
@ -5579,6 +5580,10 @@ public class Env {
|
||||
LOG.info("set replica {} of tablet {} on backend {} as {}. is replay: {}", replica.getId(),
|
||||
tabletId, backendId, status, isReplay);
|
||||
}
|
||||
} else if (status == ReplicaStatus.DROP) {
|
||||
replica.setUserDrop(true);
|
||||
LOG.info("set replica {} of tablet {} on backend {} as {}.", replica.getId(),
|
||||
tabletId, backendId, status);
|
||||
}
|
||||
} finally {
|
||||
table.writeUnlock();
|
||||
|
||||
@ -93,6 +93,8 @@ public class MetadataViewer {
|
||||
|
||||
} else if (replica.getSchemaHash() != -1 && replica.getSchemaHash() != schemaHash) {
|
||||
status = ReplicaStatus.SCHEMA_ERROR;
|
||||
} else if (replica.isUserDrop()) {
|
||||
status = ReplicaStatus.DROP;
|
||||
}
|
||||
|
||||
if (filterReplica(status, statusFilter, op)) {
|
||||
@ -109,6 +111,7 @@ public class MetadataViewer {
|
||||
row.add(String.valueOf(replica.getSchemaHash()));
|
||||
row.add(String.valueOf(replica.getVersionCount()));
|
||||
row.add(String.valueOf(replica.isBad()));
|
||||
row.add(String.valueOf(replica.isUserDrop()));
|
||||
row.add(replica.getState().name());
|
||||
row.add(status.name());
|
||||
result.add(row);
|
||||
@ -131,6 +134,7 @@ public class MetadataViewer {
|
||||
row.add("-1");
|
||||
row.add(FeConstants.null_string);
|
||||
row.add(FeConstants.null_string);
|
||||
row.add(FeConstants.null_string);
|
||||
row.add(ReplicaStatus.MISSING.name());
|
||||
result.add(row);
|
||||
}
|
||||
|
||||
@ -70,7 +70,8 @@ public class Replica implements Writable {
|
||||
VERSION_ERROR, // missing version
|
||||
MISSING, // replica does not exist
|
||||
SCHEMA_ERROR, // replica's schema hash does not equal to index's schema hash
|
||||
BAD // replica is broken.
|
||||
BAD, // replica is broken.
|
||||
DROP, // user force drop replica on this backend
|
||||
}
|
||||
|
||||
@SerializedName(value = "id")
|
||||
@ -159,6 +160,8 @@ public class Replica implements Writable {
|
||||
private long preWatermarkTxnId = -1;
|
||||
private long postWatermarkTxnId = -1;
|
||||
|
||||
private long userDropTime = -1;
|
||||
|
||||
public Replica() {
|
||||
}
|
||||
|
||||
@ -760,9 +763,33 @@ public class Replica implements Writable {
|
||||
return postWatermarkTxnId;
|
||||
}
|
||||
|
||||
public void setUserDrop(boolean isDrop) {
|
||||
if (isDrop) {
|
||||
userDropTime = System.currentTimeMillis();
|
||||
} else {
|
||||
userDropTime = -1;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isAlive() {
|
||||
return getState() != ReplicaState.CLONE
|
||||
&& getState() != ReplicaState.DECOMMISSION
|
||||
&& !isBad();
|
||||
}
|
||||
|
||||
public boolean isUserDrop() {
|
||||
if (userDropTime > 0) {
|
||||
if (System.currentTimeMillis() - userDropTime < Config.manual_drop_replica_valid_second * 1000L) {
|
||||
return true;
|
||||
}
|
||||
userDropTime = -1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isScheduleAvailable() {
|
||||
return Env.getCurrentSystemInfo().checkBackendScheduleAvailable(backendId)
|
||||
&& !isUserDrop();
|
||||
}
|
||||
}
|
||||
|
||||
@ -498,7 +498,7 @@ public class Tablet extends MetaObject implements Writable {
|
||||
aliveAndVersionComplete++;
|
||||
}
|
||||
|
||||
if (backend.isScheduleAvailable()) {
|
||||
if (replica.isScheduleAvailable()) {
|
||||
if (replica.needFurtherRepair() && (needFurtherRepairReplica == null || !versionCompleted)) {
|
||||
needFurtherRepairReplica = replica;
|
||||
}
|
||||
|
||||
@ -690,10 +690,14 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
continue;
|
||||
}
|
||||
|
||||
Backend be = infoService.getBackend(replica.getBackendId());
|
||||
if (be == null || !be.isScheduleAvailable()) {
|
||||
LOG.debug("replica's backend {} does not exist or is not scheduler available, skip. tablet: {}",
|
||||
replica.getBackendId(), tabletId);
|
||||
if (!replica.isScheduleAvailable()) {
|
||||
if (Env.getCurrentSystemInfo().checkBackendScheduleAvailable(replica.getBackendId())) {
|
||||
LOG.debug("replica's backend {} does not exist or is not scheduler available, skip. tablet: {}",
|
||||
replica.getBackendId(), tabletId);
|
||||
} else {
|
||||
LOG.debug("user drop replica {}, skip. tablet: {}",
|
||||
replica, tabletId);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@ -734,7 +734,7 @@ public class TabletScheduler extends MasterDaemon {
|
||||
Map<Tag, Short> currentAllocMap = Maps.newHashMap();
|
||||
for (Replica replica : replicas) {
|
||||
Backend be = infoService.getBackend(replica.getBackendId());
|
||||
if (be != null && be.isScheduleAvailable() && replica.isAlive() && !replica.tooSlow()
|
||||
if (replica.isScheduleAvailable() && replica.isAlive() && !replica.tooSlow()
|
||||
&& be.isMixNode()) {
|
||||
Short num = currentAllocMap.getOrDefault(be.getLocationTag(), (short) 0);
|
||||
currentAllocMap.put(be.getLocationTag(), (short) (num + 1));
|
||||
@ -888,8 +888,9 @@ public class TabletScheduler extends MasterDaemon {
|
||||
// this case should be handled in deleteBackendDropped()
|
||||
continue;
|
||||
}
|
||||
if (!be.isScheduleAvailable()) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "backend unavailable", force);
|
||||
if (!replica.isScheduleAvailable()) {
|
||||
String reason = be.isScheduleAvailable() ? "backend unavailable" : "user drop replica";
|
||||
deleteReplicaInternal(tabletCtx, replica, reason, force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -42,6 +42,7 @@ public class ReplicasProcNode implements ProcNodeInterface {
|
||||
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>().add("ReplicaId")
|
||||
.add("BackendId").add("Version").add("LstSuccessVersion").add("LstFailedVersion").add("LstFailedTime")
|
||||
.add("SchemaHash").add("LocalDataSize").add("RemoteDataSize").add("RowCount").add("State").add("IsBad")
|
||||
.add("IsUserDrop")
|
||||
.add("VersionCount").add("PathHash").add("MetaUrl").add("CompactionStatus").add("CooldownReplicaId")
|
||||
.add("CooldownMetaId").add("QueryHits").build();
|
||||
|
||||
@ -103,6 +104,7 @@ public class ReplicasProcNode implements ProcNodeInterface {
|
||||
String.valueOf(replica.getRowCount()),
|
||||
String.valueOf(replica.getState()),
|
||||
String.valueOf(replica.isBad()),
|
||||
String.valueOf(replica.isUserDrop()),
|
||||
String.valueOf(replica.getVersionCount()),
|
||||
String.valueOf(replica.getPathHash()),
|
||||
metaUrl,
|
||||
|
||||
@ -149,9 +149,10 @@ public class Diagnoser {
|
||||
+ replica.getLastFailedVersion());
|
||||
}
|
||||
// status
|
||||
if (!replica.isAlive()) {
|
||||
if (!replica.isAlive() || replica.isUserDrop()) {
|
||||
statusErr.append("Replica on backend " + replica.getBackendId() + "'s state is " + replica.getState()
|
||||
+ ", and is bad: " + (replica.isBad() ? "Yes" : "No"));
|
||||
+ ", and is bad: " + (replica.isBad() ? "Yes" : "No")
|
||||
+ ", and is going to drop: " + (replica.isUserDrop() ? "Yes" : "No"));
|
||||
}
|
||||
if (replica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow) {
|
||||
compactionErr.append("Replica on backend " + replica.getBackendId() + "'s version count is too high: "
|
||||
|
||||
Reference in New Issue
Block a user