diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 516f89ec37..c7b6c611a5 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -244,12 +244,12 @@ public class Config extends ConfigBase { "The desired upper limit on the number of bytes of reserved space to retain " + "in a replicated JE Environment. " + "This parameter is ignored in a non-replicated JE Environment."}) - public static int bdbje_reserved_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G + public static long bdbje_reserved_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G @ConfField(description = {"BDBJE 所需的空闲磁盘空间大小。如果空闲磁盘空间小于这个值,则BDBJE将无法写入。", "Amount of free disk space required by BDBJE. " + "If the free disk space is less than this value, BDBJE will not be able to write."}) - public static int bdbje_free_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G + public static long bdbje_free_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G @ConfField(masterOnly = true, description = {"心跳线程池的线程数", "Num of thread to handle heartbeat events"}) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 3065e92b71..a698b8cdcf 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -997,9 +997,10 @@ public class Env { } Thread.sleep(100); - if (counter++ % 20 == 0) { - LOG.info("wait catalog to be ready. FE type: {}. is ready: {}, counter: {}", feType, isReady.get(), - counter); + if (counter++ % 100 == 0) { + String reason = editLog == null ? "editlog is null" : editLog.getNotReadyReason(); + LOG.info("wait catalog to be ready. feType:{} isReady:{}, counter:{} reason: {}", + feType, isReady.get(), counter, reason); } } } @@ -2460,8 +2461,8 @@ public class Env { if (currentTimeMs - synchronizedTimeMs > Config.meta_delay_toleration_second * 1000) { // we still need this log to observe this situation // but service may be continued when there is no log being replayed. - LOG.warn("meta out of date. current time: {}, synchronized time: {}, has log: {}, fe type: {}", - currentTimeMs, synchronizedTimeMs, hasLog, feType); + LOG.warn("meta out of date. current time: {}, sync time: {}, delta: {} ms, hasLog: {}, feType: {}", + currentTimeMs, synchronizedTimeMs, (currentTimeMs - synchronizedTimeMs), hasLog, feType); if (hasLog || feType == FrontendNodeType.UNKNOWN) { // 1. if we read log from BDB, which means master is still alive. // So we need to set meta out of date. @@ -2471,6 +2472,13 @@ public class Env { metaReplayState.setOutOfDate(currentTimeMs, synchronizedTimeMs); canRead.set(false); isReady.set(false); + + if (editLog != null) { + String reason = editLog.getNotReadyReason(); + if (!Strings.isNullOrEmpty(reason)) { + LOG.warn("Not ready reason:{}", reason); + } + } } // sleep 5s to avoid numerous 'meta out of date' log @@ -5891,10 +5899,10 @@ public class Env { sb.append(frontend.toString()).append("\n"); } - long diskUsagePercent = editLog.getEnvDiskUsagePercent(); - sb.append("Disk usage: ") - .append(diskUsagePercent != -1 ? String.valueOf(diskUsagePercent) : "") - .append("%\n"); + String reason = editLog.getNotReadyReason(); + if (!Strings.isNullOrEmpty(reason)) { + sb.append("Reason: ").append(reason).append("%\n"); + } if (haProtocol instanceof BDBHA) { try { @@ -5915,7 +5923,7 @@ public class Env { } } catch (Exception e) { - // pass + LOG.warn("checkReadyOrThrow:", e); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java index 27206443c0..1f604f96a4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java @@ -40,6 +40,7 @@ import com.sleepycat.je.rep.NetworkRestore; import com.sleepycat.je.rep.NetworkRestoreConfig; import com.sleepycat.je.rep.NoConsistencyRequiredPolicy; import com.sleepycat.je.rep.NodeType; +import com.sleepycat.je.rep.RepInternal; import com.sleepycat.je.rep.ReplicatedEnvironment; import com.sleepycat.je.rep.ReplicationConfig; import com.sleepycat.je.rep.RollbackException; @@ -51,10 +52,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.File; -import java.io.IOException; import java.net.InetSocketAddress; -import java.nio.file.FileStore; -import java.nio.file.Files; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -76,7 +74,6 @@ public class BDBEnvironment { "INFO", "CONFIG", "FINE", "FINER", "FINEST", "ALL"); public static final String PALO_JOURNAL_GROUP = "PALO_JOURNAL_GROUP"; - private File envHome; private ReplicatedEnvironment replicatedEnvironment; private EnvironmentConfig environmentConfig; private ReplicationConfig replicationConfig; @@ -98,7 +95,6 @@ public class BDBEnvironment { // The setup() method opens the environment and database public void setup(File envHome, String selfNodeName, String selfNodeHostPort, String helperHostPort) { - this.envHome = envHome; // Almost never used, just in case the master can not restart if (metadataFailureRecovery) { if (!isElectable) { @@ -443,25 +439,6 @@ public class BDBEnvironment { } } - // Get the disk usage of BDB Environment in percent. -1 is returned if any error occuried. - public long getEnvDiskUsagePercent() { - if (envHome == null) { - return -1; - } - - try { - FileStore fileStore = Files.getFileStore(envHome.toPath()); - long totalSpace = fileStore.getTotalSpace(); - long usableSpace = fileStore.getUsableSpace(); - if (totalSpace <= 0) { - return -1; - } - return 100 - (usableSpace * 100) / totalSpace; - } catch (IOException e) { - return -1; - } - } - private static SyncPolicy getSyncPolicy(String policy) { if (policy.equalsIgnoreCase("SYNC")) { return Durability.SyncPolicy.SYNC; @@ -484,4 +461,23 @@ public class BDBEnvironment { return Durability.ReplicaAckPolicy.SIMPLE_MAJORITY; } + public String getNotReadyReason() { + if (replicatedEnvironment == null) { + LOG.warn("replicatedEnvironment is null"); + return "replicatedEnvironment is null"; + } + try { + if (replicatedEnvironment.getInvalidatingException() != null) { + return replicatedEnvironment.getInvalidatingException().getMessage(); + } + + if (RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation() != null) { + return RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation(); + } + } catch (Exception e) { + LOG.warn("getNotReadyReason exception:", e); + } + return ""; + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java index ebdbadae19..134b609549 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java @@ -566,13 +566,6 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B return this.bdbEnvironment; } - public long getEnvDiskUsagePercent() { - if (bdbEnvironment == null) { - return -1; - } - return bdbEnvironment.getEnvDiskUsagePercent(); - } - public String getBDBStats() { if (bdbEnvironment == null) { return ""; @@ -585,4 +578,12 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B return repEnv.getRepStats(StatsConfig.DEFAULT).toString(); } + + public String getNotReadyReason() { + if (bdbEnvironment == null) { + LOG.warn("replicatedEnvironment is null"); + return "replicatedEnvironment is null"; + } + return bdbEnvironment.getNotReadyReason(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index e2c5630ce7..be7dc75f38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -151,13 +151,6 @@ public class EditLog { return journal == null ? 0 : 1; } - public long getEnvDiskUsagePercent() { - if (journal instanceof BDBJEJournal) { - return ((BDBJEJournal) journal).getEnvDiskUsagePercent(); - } - return -1; - } - /** * Load journal. **/ @@ -1955,4 +1948,14 @@ public class EditLog { public void logAlterMTMV(AlterMTMV log) { logEdit(OperationType.OP_ALTER_MTMV, log); } + + public String getNotReadyReason() { + if (journal == null) { + return "journal is null"; + } + if (journal instanceof BDBJEJournal) { + return ((BDBJEJournal) journal).getNotReadyReason(); + } + return ""; + } }