[improvement](fe) Add reason log when Env is not ready (#28286)

This commit is contained in:
Lei Zhang
2023-12-15 12:22:06 +08:00
committed by GitHub
parent 6f3fb81965
commit e6b135c76a
5 changed files with 58 additions and 50 deletions

View File

@ -997,9 +997,10 @@ public class Env {
}
Thread.sleep(100);
if (counter++ % 20 == 0) {
LOG.info("wait catalog to be ready. FE type: {}. is ready: {}, counter: {}", feType, isReady.get(),
counter);
if (counter++ % 100 == 0) {
String reason = editLog == null ? "editlog is null" : editLog.getNotReadyReason();
LOG.info("wait catalog to be ready. feType:{} isReady:{}, counter:{} reason: {}",
feType, isReady.get(), counter, reason);
}
}
}
@ -2460,8 +2461,8 @@ public class Env {
if (currentTimeMs - synchronizedTimeMs > Config.meta_delay_toleration_second * 1000) {
// we still need this log to observe this situation
// but service may be continued when there is no log being replayed.
LOG.warn("meta out of date. current time: {}, synchronized time: {}, has log: {}, fe type: {}",
currentTimeMs, synchronizedTimeMs, hasLog, feType);
LOG.warn("meta out of date. current time: {}, sync time: {}, delta: {} ms, hasLog: {}, feType: {}",
currentTimeMs, synchronizedTimeMs, (currentTimeMs - synchronizedTimeMs), hasLog, feType);
if (hasLog || feType == FrontendNodeType.UNKNOWN) {
// 1. if we read log from BDB, which means master is still alive.
// So we need to set meta out of date.
@ -2471,6 +2472,13 @@ public class Env {
metaReplayState.setOutOfDate(currentTimeMs, synchronizedTimeMs);
canRead.set(false);
isReady.set(false);
if (editLog != null) {
String reason = editLog.getNotReadyReason();
if (!Strings.isNullOrEmpty(reason)) {
LOG.warn("Not ready reason:{}", reason);
}
}
}
// sleep 5s to avoid numerous 'meta out of date' log
@ -5891,10 +5899,10 @@ public class Env {
sb.append(frontend.toString()).append("\n");
}
long diskUsagePercent = editLog.getEnvDiskUsagePercent();
sb.append("Disk usage: ")
.append(diskUsagePercent != -1 ? String.valueOf(diskUsagePercent) : "<unknown>")
.append("%\n");
String reason = editLog.getNotReadyReason();
if (!Strings.isNullOrEmpty(reason)) {
sb.append("Reason: ").append(reason).append("%\n");
}
if (haProtocol instanceof BDBHA) {
try {
@ -5915,7 +5923,7 @@ public class Env {
}
} catch (Exception e) {
// pass
LOG.warn("checkReadyOrThrow:", e);
}
}

View File

@ -40,6 +40,7 @@ import com.sleepycat.je.rep.NetworkRestore;
import com.sleepycat.je.rep.NetworkRestoreConfig;
import com.sleepycat.je.rep.NoConsistencyRequiredPolicy;
import com.sleepycat.je.rep.NodeType;
import com.sleepycat.je.rep.RepInternal;
import com.sleepycat.je.rep.ReplicatedEnvironment;
import com.sleepycat.je.rep.ReplicationConfig;
import com.sleepycat.je.rep.RollbackException;
@ -51,10 +52,7 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.file.FileStore;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -76,7 +74,6 @@ public class BDBEnvironment {
"INFO", "CONFIG", "FINE", "FINER", "FINEST", "ALL");
public static final String PALO_JOURNAL_GROUP = "PALO_JOURNAL_GROUP";
private File envHome;
private ReplicatedEnvironment replicatedEnvironment;
private EnvironmentConfig environmentConfig;
private ReplicationConfig replicationConfig;
@ -98,7 +95,6 @@ public class BDBEnvironment {
// The setup() method opens the environment and database
public void setup(File envHome, String selfNodeName, String selfNodeHostPort,
String helperHostPort) {
this.envHome = envHome;
// Almost never used, just in case the master can not restart
if (metadataFailureRecovery) {
if (!isElectable) {
@ -443,25 +439,6 @@ public class BDBEnvironment {
}
}
// Get the disk usage of BDB Environment in percent. -1 is returned if any error occuried.
public long getEnvDiskUsagePercent() {
if (envHome == null) {
return -1;
}
try {
FileStore fileStore = Files.getFileStore(envHome.toPath());
long totalSpace = fileStore.getTotalSpace();
long usableSpace = fileStore.getUsableSpace();
if (totalSpace <= 0) {
return -1;
}
return 100 - (usableSpace * 100) / totalSpace;
} catch (IOException e) {
return -1;
}
}
private static SyncPolicy getSyncPolicy(String policy) {
if (policy.equalsIgnoreCase("SYNC")) {
return Durability.SyncPolicy.SYNC;
@ -484,4 +461,23 @@ public class BDBEnvironment {
return Durability.ReplicaAckPolicy.SIMPLE_MAJORITY;
}
public String getNotReadyReason() {
if (replicatedEnvironment == null) {
LOG.warn("replicatedEnvironment is null");
return "replicatedEnvironment is null";
}
try {
if (replicatedEnvironment.getInvalidatingException() != null) {
return replicatedEnvironment.getInvalidatingException().getMessage();
}
if (RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation() != null) {
return RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation();
}
} catch (Exception e) {
LOG.warn("getNotReadyReason exception:", e);
}
return "";
}
}

View File

@ -566,13 +566,6 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
return this.bdbEnvironment;
}
public long getEnvDiskUsagePercent() {
if (bdbEnvironment == null) {
return -1;
}
return bdbEnvironment.getEnvDiskUsagePercent();
}
public String getBDBStats() {
if (bdbEnvironment == null) {
return "";
@ -585,4 +578,12 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
return repEnv.getRepStats(StatsConfig.DEFAULT).toString();
}
public String getNotReadyReason() {
if (bdbEnvironment == null) {
LOG.warn("replicatedEnvironment is null");
return "replicatedEnvironment is null";
}
return bdbEnvironment.getNotReadyReason();
}
}

View File

@ -151,13 +151,6 @@ public class EditLog {
return journal == null ? 0 : 1;
}
public long getEnvDiskUsagePercent() {
if (journal instanceof BDBJEJournal) {
return ((BDBJEJournal) journal).getEnvDiskUsagePercent();
}
return -1;
}
/**
* Load journal.
**/
@ -1955,4 +1948,14 @@ public class EditLog {
public void logAlterMTMV(AlterMTMV log) {
logEdit(OperationType.OP_ALTER_MTMV, log);
}
public String getNotReadyReason() {
if (journal == null) {
return "journal is null";
}
if (journal instanceof BDBJEJournal) {
return ((BDBJEJournal) journal).getNotReadyReason();
}
return "";
}
}