[Enhancement](metric) add current edit log metric (#15657)
This commit is contained in:
@ -79,9 +79,11 @@ curl http://be_host:webserver_port/metrics?type=json
|
||||
|`doris_fe_counter_hit_sql_block_rule`|| Num| 被 SQL BLOCK RULE 拦截的查询数量 | | |
|
||||
|`doris_fe_edit_log_clean`| {type="failed"} | Num| 清理历史元数据日志失败的次数 | 不应失败,如失败,需人工介入 | P0|
|
||||
|| {type="success"} | Num| 清理历史元数据日志成功的次数 | |
|
||||
|`doris_fe_edit_log`| {type="bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
|
||||
|`doris_fe_edit_log`| {type="accumulated_bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
|
||||
|| {type="current_bytes"} |字节 | 元数据日志当前值 | 用于监控editlog 大小。如果大小超限,需人工介入 | P0 |
|
||||
|| {type="read"} |Num| 元数据日志读取次数的计数 | 通过斜率观察元数据读取频率是否正常 |P0 |
|
||||
|| {type="write"} |Num | 元数据日志写入次数的计数 |通过斜率观察元数据写入频率是否正常 |P0 |
|
||||
|| {type="current"} |Num | 元数据日志当前数量 |用于监控editlog 数量。如果数量超限,需人工介入 |P0 |
|
||||
|`doris_fe_editlog_write_latency_ms`| | 毫秒| 元数据日志写入延迟的百分位统计。如 {quantile="0.75"} 表示 75 分位的写入延迟 | |
|
||||
|`doris_fe_image_clean`|{type="failed"} | Num | 清理历史元数据镜像文件失败的次数 | 不应失败,如失败,需人工介入 | P0|
|
||||
||{type="success"} | Num | 清理历史元数据镜像文件成功的次数 | |
|
||||
|
||||
@ -49,6 +49,9 @@ public interface Journal {
|
||||
// Write a journal and sync to disk
|
||||
public void write(short op, Writable writable) throws IOException;
|
||||
|
||||
// Get current journal number
|
||||
public long getJournalNum();
|
||||
|
||||
// Delete journals whose max id is less than deleteToJournalId
|
||||
public void deleteJournals(long deleteJournalToId);
|
||||
|
||||
|
||||
@ -135,6 +135,7 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
|
||||
DatabaseEntry theData = new DatabaseEntry(buffer.getData());
|
||||
if (MetricRepo.isInit) {
|
||||
MetricRepo.COUNTER_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
|
||||
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
|
||||
}
|
||||
LOG.debug("opCode = {}, journal size = {}", op, theData.getSize());
|
||||
// Write the key value pair to bdb.
|
||||
@ -367,6 +368,11 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
|
||||
helperNode.first + ":" + helperNode.second, Env.getServingEnv().isElectable());
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getJournalNum() {
|
||||
return currentJournalDB.count();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteJournals(long deleteToJournalId) {
|
||||
List<Long> dbNames = getDatabaseNames();
|
||||
|
||||
@ -108,6 +108,11 @@ public class LocalJournal implements Journal {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getJournalNum() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (outputStream == null) {
|
||||
|
||||
@ -265,6 +265,8 @@ public class Checkpoint extends MasterDaemon {
|
||||
editLog.deleteJournals(deleteVersion + 1);
|
||||
if (MetricRepo.isInit) {
|
||||
MetricRepo.COUNTER_EDIT_LOG_CLEAN_SUCCESS.increase(1L);
|
||||
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.reset();
|
||||
MetricRepo.COUNTER_EDIT_LOG_CURRENT.update(editLog.getEditLogNum());
|
||||
}
|
||||
LOG.info("journals <= {} are deleted. image version {}, other nodes min version {}",
|
||||
deleteVersion, checkPointVersion, minOtherNodesJournalId);
|
||||
|
||||
@ -38,4 +38,13 @@ public class LongCounterMetric extends CounterMetric<Long> {
|
||||
public Long getValue() {
|
||||
return value.longValue();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
value.reset();
|
||||
}
|
||||
|
||||
public void update(Long delta) {
|
||||
value.reset();
|
||||
value.add(delta);
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,7 +86,9 @@ public final class MetricRepo {
|
||||
|
||||
public static LongCounterMetric COUNTER_EDIT_LOG_WRITE;
|
||||
public static LongCounterMetric COUNTER_EDIT_LOG_READ;
|
||||
public static LongCounterMetric COUNTER_EDIT_LOG_CURRENT;
|
||||
public static LongCounterMetric COUNTER_EDIT_LOG_SIZE_BYTES;
|
||||
public static LongCounterMetric COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES;
|
||||
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_SUCCESS;
|
||||
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_FAILED;
|
||||
public static Histogram HISTO_EDIT_LOG_WRITE_LATENCY;
|
||||
@ -345,9 +347,18 @@ public final class MetricRepo {
|
||||
"counter of edit log read from bdbje");
|
||||
COUNTER_EDIT_LOG_READ.addLabel(new MetricLabel("type", "read"));
|
||||
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_READ);
|
||||
COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES, "size of edit log");
|
||||
COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "bytes"));
|
||||
COUNTER_EDIT_LOG_CURRENT = new LongCounterMetric("edit_log", MetricUnit.OPERATIONS,
|
||||
"counter of current edit log in bdbje");
|
||||
COUNTER_EDIT_LOG_CURRENT.addLabel(new MetricLabel("type", "current"));
|
||||
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_CURRENT);
|
||||
COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
|
||||
"size of accumulated edit log");
|
||||
COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "accumulated_bytes"));
|
||||
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_SIZE_BYTES);
|
||||
COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
|
||||
"size of current edit log");
|
||||
COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "current_bytes"));
|
||||
DORIS_METRIC_REGISTER.addMetrics(COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES);
|
||||
HISTO_EDIT_LOG_WRITE_LATENCY = METRIC_REGISTER.histogram(
|
||||
MetricRegistry.name("editlog", "write", "latency", "ms"));
|
||||
|
||||
|
||||
@ -1044,6 +1044,7 @@ public class EditLog {
|
||||
totalTimeTransactions += (end - start);
|
||||
if (MetricRepo.isInit) {
|
||||
MetricRepo.HISTO_EDIT_LOG_WRITE_LATENCY.update((end - start));
|
||||
MetricRepo.COUNTER_EDIT_LOG_CURRENT.increase(1L);
|
||||
}
|
||||
|
||||
if (LOG.isDebugEnabled()) {
|
||||
@ -1066,10 +1067,17 @@ public class EditLog {
|
||||
/**
|
||||
* Return the size of the current EditLog
|
||||
*/
|
||||
synchronized long getEditLogSize() throws IOException {
|
||||
public synchronized long getEditLogSize() throws IOException {
|
||||
return editStream.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of the current EditLog
|
||||
*/
|
||||
public synchronized long getEditLogNum() throws IOException {
|
||||
return journal.getJournalNum();
|
||||
}
|
||||
|
||||
public synchronized long getTxId() {
|
||||
return txId;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user