[Enhancement](metric) add current edit log metric (#15657)

This commit is contained in:
yongjinhou
2023-01-10 18:46:57 +08:00
committed by GitHub
parent 503b6ee4da
commit a67cea2d27
8 changed files with 50 additions and 4 deletions

View File

@ -79,9 +79,11 @@ curl http://be_host:webserver_port/metrics?type=json
|`doris_fe_counter_hit_sql_block_rule`|| Num| 被 SQL BLOCK RULE 拦截的查询数量 | | |
|`doris_fe_edit_log_clean`| {type="failed"} | Num| 清理历史元数据日志失败的次数 | 不应失败,如失败,需人工介入 | P0|
|| {type="success"} | Num| 清理历史元数据日志成功的次数 | |
|`doris_fe_edit_log`| {type="bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
|`doris_fe_edit_log`| {type="accumulated_bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
|| {type="current_bytes"} |字节 | 元数据日志当前值 | 用于监控editlog 大小。如果大小超限,需人工介入 | P0 |
|| {type="read"} |Num| 元数据日志读取次数的计数 | 通过斜率观察元数据读取频率是否正常 |P0 |
|| {type="write"} |Num | 元数据日志写入次数的计数 |通过斜率观察元数据写入频率是否正常 |P0 |
|| {type="current"} |Num | 元数据日志当前数量 |用于监控editlog 数量。如果数量超限,需人工介入 |P0 |
|`doris_fe_editlog_write_latency_ms`| | 毫秒| 元数据日志写入延迟的百分位统计。如 {quantile="0.75"} 表示 75 分位的写入延迟 | |
|`doris_fe_image_clean`|{type="failed"} | Num | 清理历史元数据镜像文件失败的次数 | 不应失败,如失败,需人工介入 | P0|
||{type="success"} | Num | 清理历史元数据镜像文件成功的次数 | |

View File

@ -49,6 +49,9 @@ public interface Journal {
// Write a journal and sync to disk
public void write(short op, Writable writable) throws IOException;
// Get current journal number
public long getJournalNum();
// Delete journals whose max id is less than deleteToJournalId
public void deleteJournals(long deleteJournalToId);

View File

@ -135,6 +135,7 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
DatabaseEntry theData = new DatabaseEntry(buffer.getData());
if (MetricRepo.isInit) {
MetricRepo.COUNTER_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
}
LOG.debug("opCode = {}, journal size = {}", op, theData.getSize());
// Write the key value pair to bdb.
@ -367,6 +368,11 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
helperNode.first + ":" + helperNode.second, Env.getServingEnv().isElectable());
}
@Override
public long getJournalNum() {
return currentJournalDB.count();
}
@Override
public void deleteJournals(long deleteToJournalId) {
List<Long> dbNames = getDatabaseNames();

View File

@ -108,6 +108,11 @@ public class LocalJournal implements Journal {
return 0;
}
@Override
public long getJournalNum() {
return 0;
}
@Override
public void close() {
if (outputStream == null) {

View File

@ -265,6 +265,8 @@ public class Checkpoint extends MasterDaemon {
editLog.deleteJournals(deleteVersion + 1);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_EDIT_LOG_CLEAN_SUCCESS.increase(1L);
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.reset();
MetricRepo.COUNTER_EDIT_LOG_CURRENT.update(editLog.getEditLogNum());
}
LOG.info("journals <= {} are deleted. image version {}, other nodes min version {}",
deleteVersion, checkPointVersion, minOtherNodesJournalId);

View File

@ -38,4 +38,13 @@ public class LongCounterMetric extends CounterMetric<Long> {
public Long getValue() {
return value.longValue();
}
public void reset() {
value.reset();
}
public void update(Long delta) {
value.reset();
value.add(delta);
}
}

View File

@ -86,7 +86,9 @@ public final class MetricRepo {
public static LongCounterMetric COUNTER_EDIT_LOG_WRITE;
public static LongCounterMetric COUNTER_EDIT_LOG_READ;
public static LongCounterMetric COUNTER_EDIT_LOG_CURRENT;
public static LongCounterMetric COUNTER_EDIT_LOG_SIZE_BYTES;
public static LongCounterMetric COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES;
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_SUCCESS;
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_FAILED;
public static Histogram HISTO_EDIT_LOG_WRITE_LATENCY;
@ -345,9 +347,18 @@ public final class MetricRepo {
"counter of edit log read from bdbje");
COUNTER_EDIT_LOG_READ.addLabel(new MetricLabel("type", "read"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_READ);
COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES, "size of edit log");
COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "bytes"));
COUNTER_EDIT_LOG_CURRENT = new LongCounterMetric("edit_log", MetricUnit.OPERATIONS,
"counter of current edit log in bdbje");
COUNTER_EDIT_LOG_CURRENT.addLabel(new MetricLabel("type", "current"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_CURRENT);
COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
"size of accumulated edit log");
COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "accumulated_bytes"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_SIZE_BYTES);
COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
"size of current edit log");
COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "current_bytes"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES);
HISTO_EDIT_LOG_WRITE_LATENCY = METRIC_REGISTER.histogram(
MetricRegistry.name("editlog", "write", "latency", "ms"));

View File

@ -1044,6 +1044,7 @@ public class EditLog {
totalTimeTransactions += (end - start);
if (MetricRepo.isInit) {
MetricRepo.HISTO_EDIT_LOG_WRITE_LATENCY.update((end - start));
MetricRepo.COUNTER_EDIT_LOG_CURRENT.increase(1L);
}
if (LOG.isDebugEnabled()) {
@ -1066,10 +1067,17 @@ public class EditLog {
/**
* Return the size of the current EditLog
*/
synchronized long getEditLogSize() throws IOException {
public synchronized long getEditLogSize() throws IOException {
return editStream.length();
}
/**
* Return the number of the current EditLog
*/
public synchronized long getEditLogNum() throws IOException {
return journal.getJournalNum();
}
public synchronized long getTxId() {
return txId;
}