[fix](heartbeat) need to set backend status base on edit log (#21410)

For non-master FE, must set Backend's status based on the content of edit log.
There is a bug that if we set fe config: `max_backend_heartbeat_failure_tolerance_count` larger that one,
the non-master FE will not set Backend as dead until it receive enough number of heartbeat edit log,
which is wrong.
This will causing the Backend is dead on Master FE, but is alive on non-master FE
This commit is contained in:
Mingyu Chen
2023-07-04 17:12:53 +08:00
committed by GitHub
parent 9adbca685a
commit c2b483529c
2 changed files with 4 additions and 3 deletions

View File

@ -586,7 +586,7 @@ public class Backend implements Writable {
* handle Backend's heartbeat response.
* return true if any port changed, or alive state is changed.
*/
public boolean handleHbResponse(BackendHbResponse hbResponse) {
public boolean handleHbResponse(BackendHbResponse hbResponse, boolean isReplay) {
boolean isChanged = false;
if (hbResponse.getStatus() == HbStatus.OK) {
if (!this.version.equals(hbResponse.getVersion())) {
@ -632,7 +632,8 @@ public class Backend implements Writable {
this.heartbeatFailureCounter = 0;
} else {
// Only set backend to dead if the heartbeat failure counter exceed threshold.
if (++this.heartbeatFailureCounter >= Config.max_backend_heartbeat_failure_tolerance_count) {
// And if it is a replay process, must set backend to dead.
if (isReplay || ++this.heartbeatFailureCounter >= Config.max_backend_heartbeat_failure_tolerance_count) {
if (isAlive.compareAndSet(true, false)) {
isChanged = true;
LOG.warn("{} is dead,", this.toString());

View File

@ -166,7 +166,7 @@ public class HeartbeatMgr extends MasterDaemon {
BackendHbResponse hbResponse = (BackendHbResponse) response;
Backend be = nodeMgr.getBackend(hbResponse.getBeId());
if (be != null) {
boolean isChanged = be.handleHbResponse(hbResponse);
boolean isChanged = be.handleHbResponse(hbResponse, isReplay);
if (hbResponse.getStatus() != HbStatus.OK) {
// invalid all connections cached in ClientPool
ClientPool.backendPool.clearPool(new TNetworkAddress(be.getHost(), be.getBePort()));