From c39d35df4c7ed8f75a5bbbbef8b8f46400bfe9e6 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Thu, 12 Dec 2019 17:46:59 +0800 Subject: [PATCH] Add tablet compaction score metrics (#2427) [Metric] Add tablet compaction score metrics Backend: Add metric "tablet_max_compaction_score" to monitor the current max compaction score of tablets on this Backend. This metric will be updated each time the compaction thread picking tablets to compact. Frontend: Add metric "tablet_max_compaction_score" for each Backend. These metrics will be updated when backends report tablet. And also add a calculated metric "max_tablet_compaction_core" to monitor the max compaction core of tablets on all Backends. --- be/src/agent/task_worker_pool.cpp | 3 ++ be/src/olap/tablet.cpp | 2 +- be/src/olap/tablet_manager.cpp | 5 +++ be/src/util/doris_metrics.cpp | 6 +++ be/src/util/doris_metrics.h | 6 +++ .../apache/doris/master/ReportHandler.java | 4 ++ .../doris/metric/DorisMetricRegistry.java | 5 +++ .../apache/doris/metric/MetricCalculator.java | 11 +++++ .../org/apache/doris/metric/MetricRepo.java | 42 +++++++++++++++---- .../doris/metric/SimpleCoreMetricVisitor.java | 3 ++ .../java/org/apache/doris/system/Backend.java | 13 +++++- .../doris/system/SystemInfoService.java | 4 +- .../org/apache/doris/catalog/BackendTest.java | 2 +- gensrc/thrift/MasterService.thrift | 3 ++ 14 files changed, 97 insertions(+), 12 deletions(-) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 5fec93fa30..6c7d810123 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -1287,6 +1287,9 @@ void* TaskWorkerPool::_report_tablet_worker_thread_callback(void* arg_this) { return (void*)0; #endif } + int64_t max_compaction_score = std::max(DorisMetrics::tablet_cumulative_max_compaction_score.value(), + DorisMetrics::tablet_base_max_compaction_score.value()); + request.__set_tablet_max_compaction_score(max_compaction_score); TMasterResult result; status = worker_pool_this->_master_client->report(request, &result); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 0ae9171534..324e038267 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -65,7 +65,7 @@ Tablet::Tablet(TabletMetaSharedPtr tablet_meta, DataDir* data_dir) _schema(tablet_meta->tablet_schema()), _data_dir(data_dir), _is_bad(false), - _last_compaction_failure_time(UnixMillis()) { + _last_compaction_failure_time(0) { _tablet_path.append(_data_dir->path()); _tablet_path.append(DATA_PREFIX); _tablet_path.append("/"); diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index aed6215ec1..45e232d398 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -759,6 +759,11 @@ TabletSharedPtr TabletManager::find_best_tablet_to_compaction( LOG(INFO) << "find best tablet to do compaction." << " type: " << (compaction_type == CompactionType::CUMULATIVE_COMPACTION ? "cumulative" : "base") << ", tablet id: " << best_tablet->tablet_id() << ", score: " << highest_score; + if (compaction_type == CompactionType::CUMULATIVE_COMPACTION) { + DorisMetrics::tablet_cumulative_max_compaction_score.set_value(highest_score); + } else { + DorisMetrics::tablet_base_max_compaction_score.set_value(highest_score); + } } return best_tablet; } diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp index a4c08ea52d..6ba60252d5 100644 --- a/be/src/util/doris_metrics.cpp +++ b/be/src/util/doris_metrics.cpp @@ -115,6 +115,9 @@ IntGaugeMetricsMap DorisMetrics::disks_avail_capacity; IntGaugeMetricsMap DorisMetrics::disks_data_used_capacity; IntGaugeMetricsMap DorisMetrics::disks_state; +IntGauge DorisMetrics::tablet_cumulative_max_compaction_score; +IntGauge DorisMetrics::tablet_base_max_compaction_score; + IntGauge DorisMetrics::push_request_write_bytes_per_second; IntGauge DorisMetrics::query_scan_bytes_per_second; IntGauge DorisMetrics::max_disk_io_util_percent; @@ -266,6 +269,9 @@ void DorisMetrics::initialize( REGISTER_DORIS_METRIC(process_fd_num_limit_soft); REGISTER_DORIS_METRIC(process_fd_num_limit_hard); + REGISTER_DORIS_METRIC(tablet_cumulative_max_compaction_score); + REGISTER_DORIS_METRIC(tablet_base_max_compaction_score); + // disk usage for (auto& path : paths) { IntGauge* gauge = disks_total_capacity.set_key(path); diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h index 222f42380e..7259bf9ea5 100644 --- a/be/src/util/doris_metrics.h +++ b/be/src/util/doris_metrics.h @@ -139,6 +139,12 @@ public: static IntGaugeMetricsMap disks_data_used_capacity; static IntGaugeMetricsMap disks_state; + // the max compaction score of all tablets. + // Record base and cumulative scores separately, because + // we need to get the larger of the two. + static IntGauge tablet_cumulative_max_compaction_score; + static IntGauge tablet_base_max_compaction_score; + // The following metrics will be calculated // by metric calculator static IntGauge push_request_write_bytes_per_second; diff --git a/fe/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/src/main/java/org/apache/doris/master/ReportHandler.java index f3a5a21958..fd272950ab 100644 --- a/fe/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/src/main/java/org/apache/doris/master/ReportHandler.java @@ -157,6 +157,10 @@ public class ReportHandler extends Daemon { forceRecovery = request.isForce_recovery(); } + if (request.isSetTablet_max_compaction_score()) { + backend.setTabletMaxCompactionScore(request.getTablet_max_compaction_score()); + } + ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, reportVersion, forceRecovery); try { putToQueue(reportTask); diff --git a/fe/src/main/java/org/apache/doris/metric/DorisMetricRegistry.java b/fe/src/main/java/org/apache/doris/metric/DorisMetricRegistry.java index 461b5116a4..686784908e 100644 --- a/fe/src/main/java/org/apache/doris/metric/DorisMetricRegistry.java +++ b/fe/src/main/java/org/apache/doris/metric/DorisMetricRegistry.java @@ -38,6 +38,11 @@ public class DorisMetricRegistry { return Lists.newArrayList(paloMetrics); } + // the metrics by metric name + public synchronized List getPaloMetricsByName(String name) { + return paloMetrics.stream().filter(m -> m.getName().equals(name)).collect(Collectors.toList()); + } + public synchronized void removeMetrics(String name) { paloMetrics = paloMetrics.stream().filter(m -> !(m.getName().equals(name))).collect(Collectors.toList()); } diff --git a/fe/src/main/java/org/apache/doris/metric/MetricCalculator.java b/fe/src/main/java/org/apache/doris/metric/MetricCalculator.java index 99528ba7e1..05aa38e79f 100644 --- a/fe/src/main/java/org/apache/doris/metric/MetricCalculator.java +++ b/fe/src/main/java/org/apache/doris/metric/MetricCalculator.java @@ -17,6 +17,7 @@ package org.apache.doris.metric; +import java.util.List; import java.util.TimerTask; /* @@ -65,5 +66,15 @@ public class MetricCalculator extends TimerTask { lastQueryErrCounter = currentErrCounter; lastTs = currentTs; + + // max tabet compaction score of all backends + long maxCompactionScore = 0; + List compactionScoreMetrics = MetricRepo.getMetricsByName(MetricRepo.TABLET_MAX_COMPACTION_SCORE); + for (Metric metric : compactionScoreMetrics) { + if (((GaugeMetric) metric).getValue() > maxCompactionScore) { + maxCompactionScore = ((GaugeMetric) metric).getValue(); + } + } + MetricRepo.GAUGE_MAX_TABLET_COMPACTION_SCORE.setValue(maxCompactionScore); } } diff --git a/fe/src/main/java/org/apache/doris/metric/MetricRepo.java b/fe/src/main/java/org/apache/doris/metric/MetricRepo.java index 68d1cf8446..a15e93aaeb 100644 --- a/fe/src/main/java/org/apache/doris/metric/MetricRepo.java +++ b/fe/src/main/java/org/apache/doris/metric/MetricRepo.java @@ -38,6 +38,7 @@ import com.codahale.metrics.MetricRegistry; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.Timer; @@ -51,6 +52,9 @@ public final class MetricRepo { public static AtomicBoolean isInit = new AtomicBoolean(false); + public static final String TABLET_NUM = "tablet_num"; + public static final String TABLET_MAX_COMPACTION_SCORE = "tablet_max_compaction_score"; + public static LongCounterMetric COUNTER_REQUEST_ALL; public static LongCounterMetric COUNTER_QUERY_ALL; public static LongCounterMetric COUNTER_QUERY_ERR; @@ -76,6 +80,7 @@ public final class MetricRepo { public static GaugeMetricImpl GAUGE_QUERY_PER_SECOND; public static GaugeMetricImpl GAUGE_REQUEST_PER_SECOND; public static GaugeMetricImpl GAUGE_QUERY_ERR_RATE; + public static GaugeMetricImpl GAUGE_MAX_TABLET_COMPACTION_SCORE; private static Timer metricTimer = new Timer(); private static MetricCalculator metricCalculator = new MetricCalculator(); @@ -135,7 +140,7 @@ public final class MetricRepo { } // capacity - generateTabletNumMetrics(); + generateBackendsTabletMetrics(); // connections GaugeMetric conections = (GaugeMetric) new GaugeMetric( @@ -182,9 +187,13 @@ public final class MetricRepo { GAUGE_REQUEST_PER_SECOND = new GaugeMetricImpl<>("rps", "request per second"); GAUGE_REQUEST_PER_SECOND.setValue(0.0); PALO_METRIC_REGISTER.addPaloMetrics(GAUGE_REQUEST_PER_SECOND); - GAUGE_QUERY_ERR_RATE = new GaugeMetricImpl<>("query_err_rate", "query_error_rate"); + GAUGE_QUERY_ERR_RATE = new GaugeMetricImpl<>("query_err_rate", "query error rate"); PALO_METRIC_REGISTER.addPaloMetrics(GAUGE_QUERY_ERR_RATE); GAUGE_QUERY_ERR_RATE.setValue(0.0); + GAUGE_MAX_TABLET_COMPACTION_SCORE = new GaugeMetricImpl<>("max_tablet_compaction_score", + "max tablet compaction score of all backends"); + PALO_METRIC_REGISTER.addPaloMetrics(GAUGE_MAX_TABLET_COMPACTION_SCORE); + GAUGE_MAX_TABLET_COMPACTION_SCORE.setValue(0L); // 2. counter COUNTER_REQUEST_ALL = new LongCounterMetric("request_total", "total request"); @@ -238,14 +247,14 @@ public final class MetricRepo { } } + // to generate the metrics related to tablets of each backends // this metric is reentrant, so that we can add or remove metric along with the backend add or remove // at runtime. - public static void generateTabletNumMetrics() { - final String TABLET_NUM = "tablet_num"; - // remove all previous 'tablet_num' metric + public static void generateBackendsTabletMetrics() { + // remove all previous 'tablet' metric PALO_METRIC_REGISTER.removeMetrics(TABLET_NUM); + PALO_METRIC_REGISTER.removeMetrics(TABLET_MAX_COMPACTION_SCORE); - LOG.info("begin to generate capacity metrics"); SystemInfoService infoService = Catalog.getCurrentSystemInfo(); TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); @@ -266,9 +275,24 @@ public final class MetricRepo { return (long) invertedIndex.getTabletNumByBackendId(beId); } }; - tabletNum.addLabel(new MetricLabel("backend", be.getHost() + ":" + be.getHeartbeatPort())); PALO_METRIC_REGISTER.addPaloMetrics(tabletNum); + + // max compaction score of tablets on each backends + GaugeMetric tabletMaxCompactionScore = (GaugeMetric) new GaugeMetric( + TABLET_MAX_COMPACTION_SCORE, + "tablet max compaction score") { + @Override + public Long getValue() { + if (!Catalog.getInstance().isMaster()) { + return 0L; + } + return be.getTabletMaxCompactionScore(); + } + }; + tabletMaxCompactionScore.addLabel(new MetricLabel("backend", be.getHost() + ":" + be.getHeartbeatPort())); + PALO_METRIC_REGISTER.addPaloMetrics(tabletMaxCompactionScore); + } // end for backends } @@ -299,6 +323,10 @@ public final class MetricRepo { return sb.toString(); } + public static synchronized List getMetricsByName(String name) { + return PALO_METRIC_REGISTER.getPaloMetricsByName(name); + } + public static void addMetric(Metric metric) { init(); PALO_METRIC_REGISTER.addPaloMetrics(metric); diff --git a/fe/src/main/java/org/apache/doris/metric/SimpleCoreMetricVisitor.java b/fe/src/main/java/org/apache/doris/metric/SimpleCoreMetricVisitor.java index 2cd0e3fd1c..cf832ad597 100644 --- a/fe/src/main/java/org/apache/doris/metric/SimpleCoreMetricVisitor.java +++ b/fe/src/main/java/org/apache/doris/metric/SimpleCoreMetricVisitor.java @@ -54,6 +54,8 @@ public class SimpleCoreMetricVisitor extends MetricVisitor { public static final String REQUEST_PER_SECOND = "rps"; public static final String QUERY_ERR_RATE = "query_err_rate"; + public static final String MAX_TABLET_COMPACTION_SCORE = "max_tablet_compaction_score"; + private static final Map CORE_METRICS = Maps.newHashMap(); static { CORE_METRICS.put(MAX_JOURMAL_ID, TYPE_LONG); @@ -62,6 +64,7 @@ public class SimpleCoreMetricVisitor extends MetricVisitor { CORE_METRICS.put(QUERY_PER_SECOND, TYPE_DOUBLE); CORE_METRICS.put(REQUEST_PER_SECOND, TYPE_DOUBLE); CORE_METRICS.put(QUERY_ERR_RATE, TYPE_DOUBLE); + CORE_METRICS.put(MAX_TABLET_COMPACTION_SCORE, TYPE_LONG); } public SimpleCoreMetricVisitor(String prefix) { diff --git a/fe/src/main/java/org/apache/doris/system/Backend.java b/fe/src/main/java/org/apache/doris/system/Backend.java index 861bc19869..79e81c077c 100644 --- a/fe/src/main/java/org/apache/doris/system/Backend.java +++ b/fe/src/main/java/org/apache/doris/system/Backend.java @@ -88,7 +88,10 @@ public class Backend implements Writable { // after init it, this variable is set to true. private boolean initPathInfo = false; - long lastMissingHeartbeatTime = -1; + private long lastMissingHeartbeatTime = -1; + // the max tablet compaction score of this backend. + // this field is set by tablet report, and just for metric monitor, no need to persist. + private AtomicLong tabletMaxCompactionScore = new AtomicLong(0); public Backend() { this.host = ""; @@ -615,5 +618,13 @@ public class Backend implements Writable { return isChanged; } + + public void setTabletMaxCompactionScore(long compactionScore) { + tabletMaxCompactionScore.set(compactionScore); + } + + public long getTabletMaxCompactionScore() { + return tabletMaxCompactionScore.get(); + } } diff --git a/fe/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/src/main/java/org/apache/doris/system/SystemInfoService.java index e1292be56e..8e4eeb5063 100644 --- a/fe/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -172,7 +172,7 @@ public class SystemInfoService { LOG.info("finished to add {} ", newBackend); // backends is changed, regenerated tablet number metrics - MetricRepo.generateTabletNumMetrics(); + MetricRepo.generateBackendsTabletMetrics(); } public void dropBackends(List> hostPortPairs) throws DdlException { @@ -230,7 +230,7 @@ public class SystemInfoService { LOG.info("finished to drop {}", droppedBackend); // backends is changed, regenerated tablet number metrics - MetricRepo.generateTabletNumMetrics(); + MetricRepo.generateBackendsTabletMetrics(); } // only for test diff --git a/fe/src/test/java/org/apache/doris/catalog/BackendTest.java b/fe/src/test/java/org/apache/doris/catalog/BackendTest.java index 48f323dc06..e3606bdacd 100644 --- a/fe/src/test/java/org/apache/doris/catalog/BackendTest.java +++ b/fe/src/test/java/org/apache/doris/catalog/BackendTest.java @@ -75,7 +75,7 @@ public class BackendTest { backend.updateOnce(bePort, httpPort, beRpcPort); PowerMock.mockStatic(MetricRepo.class); - MetricRepo.generateTabletNumMetrics(); + MetricRepo.generateBackendsTabletMetrics(); EasyMock.expectLastCall().anyTimes(); PowerMock.replay(MetricRepo.class); } diff --git a/gensrc/thrift/MasterService.thrift b/gensrc/thrift/MasterService.thrift index 4489740d24..353362e413 100644 --- a/gensrc/thrift/MasterService.thrift +++ b/gensrc/thrift/MasterService.thrift @@ -80,6 +80,9 @@ struct TReportRequest { 5: optional map disks // string root_path 6: optional bool force_recovery 7: optional list tablet_list + // the max compaction score of all tablets on a backend, + // this field should be set along with tablet report + 8: optional i64 tablet_max_compaction_score } struct TMasterResult {