[improvement](fe) Add reason log when Env is not ready (#28286)
This commit is contained in:
@ -997,9 +997,10 @@ public class Env {
|
||||
}
|
||||
|
||||
Thread.sleep(100);
|
||||
if (counter++ % 20 == 0) {
|
||||
LOG.info("wait catalog to be ready. FE type: {}. is ready: {}, counter: {}", feType, isReady.get(),
|
||||
counter);
|
||||
if (counter++ % 100 == 0) {
|
||||
String reason = editLog == null ? "editlog is null" : editLog.getNotReadyReason();
|
||||
LOG.info("wait catalog to be ready. feType:{} isReady:{}, counter:{} reason: {}",
|
||||
feType, isReady.get(), counter, reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2460,8 +2461,8 @@ public class Env {
|
||||
if (currentTimeMs - synchronizedTimeMs > Config.meta_delay_toleration_second * 1000) {
|
||||
// we still need this log to observe this situation
|
||||
// but service may be continued when there is no log being replayed.
|
||||
LOG.warn("meta out of date. current time: {}, synchronized time: {}, has log: {}, fe type: {}",
|
||||
currentTimeMs, synchronizedTimeMs, hasLog, feType);
|
||||
LOG.warn("meta out of date. current time: {}, sync time: {}, delta: {} ms, hasLog: {}, feType: {}",
|
||||
currentTimeMs, synchronizedTimeMs, (currentTimeMs - synchronizedTimeMs), hasLog, feType);
|
||||
if (hasLog || feType == FrontendNodeType.UNKNOWN) {
|
||||
// 1. if we read log from BDB, which means master is still alive.
|
||||
// So we need to set meta out of date.
|
||||
@ -2471,6 +2472,13 @@ public class Env {
|
||||
metaReplayState.setOutOfDate(currentTimeMs, synchronizedTimeMs);
|
||||
canRead.set(false);
|
||||
isReady.set(false);
|
||||
|
||||
if (editLog != null) {
|
||||
String reason = editLog.getNotReadyReason();
|
||||
if (!Strings.isNullOrEmpty(reason)) {
|
||||
LOG.warn("Not ready reason:{}", reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sleep 5s to avoid numerous 'meta out of date' log
|
||||
@ -5891,10 +5899,10 @@ public class Env {
|
||||
sb.append(frontend.toString()).append("\n");
|
||||
}
|
||||
|
||||
long diskUsagePercent = editLog.getEnvDiskUsagePercent();
|
||||
sb.append("Disk usage: ")
|
||||
.append(diskUsagePercent != -1 ? String.valueOf(diskUsagePercent) : "<unknown>")
|
||||
.append("%\n");
|
||||
String reason = editLog.getNotReadyReason();
|
||||
if (!Strings.isNullOrEmpty(reason)) {
|
||||
sb.append("Reason: ").append(reason).append("%\n");
|
||||
}
|
||||
|
||||
if (haProtocol instanceof BDBHA) {
|
||||
try {
|
||||
@ -5915,7 +5923,7 @@ public class Env {
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
// pass
|
||||
LOG.warn("checkReadyOrThrow:", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -40,6 +40,7 @@ import com.sleepycat.je.rep.NetworkRestore;
|
||||
import com.sleepycat.je.rep.NetworkRestoreConfig;
|
||||
import com.sleepycat.je.rep.NoConsistencyRequiredPolicy;
|
||||
import com.sleepycat.je.rep.NodeType;
|
||||
import com.sleepycat.je.rep.RepInternal;
|
||||
import com.sleepycat.je.rep.ReplicatedEnvironment;
|
||||
import com.sleepycat.je.rep.ReplicationConfig;
|
||||
import com.sleepycat.je.rep.RollbackException;
|
||||
@ -51,10 +52,7 @@ import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.nio.file.FileStore;
|
||||
import java.nio.file.Files;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
@ -76,7 +74,6 @@ public class BDBEnvironment {
|
||||
"INFO", "CONFIG", "FINE", "FINER", "FINEST", "ALL");
|
||||
public static final String PALO_JOURNAL_GROUP = "PALO_JOURNAL_GROUP";
|
||||
|
||||
private File envHome;
|
||||
private ReplicatedEnvironment replicatedEnvironment;
|
||||
private EnvironmentConfig environmentConfig;
|
||||
private ReplicationConfig replicationConfig;
|
||||
@ -98,7 +95,6 @@ public class BDBEnvironment {
|
||||
// The setup() method opens the environment and database
|
||||
public void setup(File envHome, String selfNodeName, String selfNodeHostPort,
|
||||
String helperHostPort) {
|
||||
this.envHome = envHome;
|
||||
// Almost never used, just in case the master can not restart
|
||||
if (metadataFailureRecovery) {
|
||||
if (!isElectable) {
|
||||
@ -443,25 +439,6 @@ public class BDBEnvironment {
|
||||
}
|
||||
}
|
||||
|
||||
// Get the disk usage of BDB Environment in percent. -1 is returned if any error occuried.
|
||||
public long getEnvDiskUsagePercent() {
|
||||
if (envHome == null) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
try {
|
||||
FileStore fileStore = Files.getFileStore(envHome.toPath());
|
||||
long totalSpace = fileStore.getTotalSpace();
|
||||
long usableSpace = fileStore.getUsableSpace();
|
||||
if (totalSpace <= 0) {
|
||||
return -1;
|
||||
}
|
||||
return 100 - (usableSpace * 100) / totalSpace;
|
||||
} catch (IOException e) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
private static SyncPolicy getSyncPolicy(String policy) {
|
||||
if (policy.equalsIgnoreCase("SYNC")) {
|
||||
return Durability.SyncPolicy.SYNC;
|
||||
@ -484,4 +461,23 @@ public class BDBEnvironment {
|
||||
return Durability.ReplicaAckPolicy.SIMPLE_MAJORITY;
|
||||
}
|
||||
|
||||
public String getNotReadyReason() {
|
||||
if (replicatedEnvironment == null) {
|
||||
LOG.warn("replicatedEnvironment is null");
|
||||
return "replicatedEnvironment is null";
|
||||
}
|
||||
try {
|
||||
if (replicatedEnvironment.getInvalidatingException() != null) {
|
||||
return replicatedEnvironment.getInvalidatingException().getMessage();
|
||||
}
|
||||
|
||||
if (RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation() != null) {
|
||||
return RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn("getNotReadyReason exception:", e);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -566,13 +566,6 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
|
||||
return this.bdbEnvironment;
|
||||
}
|
||||
|
||||
public long getEnvDiskUsagePercent() {
|
||||
if (bdbEnvironment == null) {
|
||||
return -1;
|
||||
}
|
||||
return bdbEnvironment.getEnvDiskUsagePercent();
|
||||
}
|
||||
|
||||
public String getBDBStats() {
|
||||
if (bdbEnvironment == null) {
|
||||
return "";
|
||||
@ -585,4 +578,12 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
|
||||
|
||||
return repEnv.getRepStats(StatsConfig.DEFAULT).toString();
|
||||
}
|
||||
|
||||
public String getNotReadyReason() {
|
||||
if (bdbEnvironment == null) {
|
||||
LOG.warn("replicatedEnvironment is null");
|
||||
return "replicatedEnvironment is null";
|
||||
}
|
||||
return bdbEnvironment.getNotReadyReason();
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,13 +151,6 @@ public class EditLog {
|
||||
return journal == null ? 0 : 1;
|
||||
}
|
||||
|
||||
public long getEnvDiskUsagePercent() {
|
||||
if (journal instanceof BDBJEJournal) {
|
||||
return ((BDBJEJournal) journal).getEnvDiskUsagePercent();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load journal.
|
||||
**/
|
||||
@ -1955,4 +1948,14 @@ public class EditLog {
|
||||
public void logAlterMTMV(AlterMTMV log) {
|
||||
logEdit(OperationType.OP_ALTER_MTMV, log);
|
||||
}
|
||||
|
||||
public String getNotReadyReason() {
|
||||
if (journal == null) {
|
||||
return "journal is null";
|
||||
}
|
||||
if (journal instanceof BDBJEJournal) {
|
||||
return ((BDBJEJournal) journal).getNotReadyReason();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user