Files
doris/be/test/util/metrics_test.cpp
plat1ko db07e51cd3 [refactor](status) Refactor status handling in agent task (#11940)
Refactor TaggableLogger
Refactor status handling in agent task:
Unify log format in TaskWorkerPool
Pass Status to the top caller, and replace some OLAPInternalError with more detailed error message Status
Premature return with the opposite condition to reduce indention
2022-08-29 12:06:01 +08:00

487 lines
18 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "util/metrics.h"
#include <gtest/gtest.h>
#include <iostream>
#include <thread>
#include "common/config.h"
#include "testutil/test_util.h"
#include "util/stopwatch.hpp"
namespace doris {
class MetricsTest : public testing::Test {
public:
MetricsTest() {}
virtual ~MetricsTest() {}
};
TEST_F(MetricsTest, Counter) {
{
IntCounter counter;
EXPECT_EQ(0, counter.value());
counter.increment(100);
EXPECT_EQ(100, counter.value());
EXPECT_STREQ("100", counter.to_string().c_str());
}
{
IntAtomicCounter counter;
EXPECT_EQ(0, counter.value());
counter.increment(100);
EXPECT_EQ(100, counter.value());
EXPECT_STREQ("100", counter.to_string().c_str());
}
{
UIntCounter counter;
EXPECT_EQ(0, counter.value());
counter.increment(100);
EXPECT_EQ(100, counter.value());
EXPECT_STREQ("100", counter.to_string().c_str());
}
{
DoubleCounter counter;
EXPECT_EQ(0, counter.value());
counter.increment(1.23);
EXPECT_EQ(1.23, counter.value());
EXPECT_STREQ("1.230000", counter.to_string().c_str());
}
}
template <typename T>
void mt_updater(int32_t loop, T* counter, std::atomic<uint64_t>* used_time) {
sleep(1);
MonotonicStopWatch watch;
watch.start();
for (int i = 0; i < loop; ++i) {
counter->increment(1);
}
uint64_t elapsed = watch.elapsed_time();
used_time->fetch_add(elapsed);
}
TEST_F(MetricsTest, CounterPerf) {
static const int kLoopCount = LOOP_LESS_OR_MORE(10, 100000000);
static const int kThreadLoopCount = LOOP_LESS_OR_MORE(1000, 1000000);
{
int64_t sum = 0;
MonotonicStopWatch watch;
watch.start();
for (int i = 0; i < kLoopCount; ++i) {
sum += 1;
}
uint64_t elapsed = watch.elapsed_time();
EXPECT_EQ(kLoopCount, sum);
LOG(INFO) << "int64_t: elapsed: " << elapsed << "ns, ns/iter:" << elapsed / kLoopCount;
}
// IntAtomicCounter
{
IntAtomicCounter counter;
MonotonicStopWatch watch;
watch.start();
for (int i = 0; i < kLoopCount; ++i) {
counter.increment(1);
}
uint64_t elapsed = watch.elapsed_time();
EXPECT_EQ(kLoopCount, counter.value());
LOG(INFO) << "IntAtomicCounter: elapsed: " << elapsed
<< "ns, ns/iter:" << elapsed / kLoopCount;
}
// IntCounter
{
IntCounter counter;
MonotonicStopWatch watch;
watch.start();
for (int i = 0; i < kLoopCount; ++i) {
counter.increment(1);
}
uint64_t elapsed = watch.elapsed_time();
EXPECT_EQ(kLoopCount, counter.value());
LOG(INFO) << "IntCounter: elapsed: " << elapsed << "ns, ns/iter:" << elapsed / kLoopCount;
}
// multi-thread for IntCounter
{
IntCounter mt_counter;
std::vector<std::thread> updaters;
std::atomic<uint64_t> used_time(0);
for (int i = 0; i < 8; ++i) {
updaters.emplace_back(&mt_updater<IntCounter>, kThreadLoopCount, &mt_counter,
&used_time);
}
for (int i = 0; i < 8; ++i) {
updaters[i].join();
}
LOG(INFO) << "IntCounter multi-thread elapsed: " << used_time.load()
<< "ns, ns/iter:" << used_time.load() / (8 * kThreadLoopCount);
EXPECT_EQ(8 * kThreadLoopCount, mt_counter.value());
}
// multi-thread for IntAtomicCounter
{
IntAtomicCounter mt_counter;
std::vector<std::thread> updaters;
std::atomic<uint64_t> used_time(0);
for (int i = 0; i < 8; ++i) {
updaters.emplace_back(&mt_updater<IntAtomicCounter>, kThreadLoopCount, &mt_counter,
&used_time);
}
for (int i = 0; i < 8; ++i) {
updaters[i].join();
}
LOG(INFO) << "IntAtomicCounter multi-thread elapsed: " << used_time.load()
<< "ns, ns/iter:" << used_time.load() / (8 * kThreadLoopCount);
EXPECT_EQ(8 * kThreadLoopCount, mt_counter.value());
}
}
TEST_F(MetricsTest, Gauge) {
// IntGauge
{
IntGauge gauge;
EXPECT_EQ(0, gauge.value());
gauge.set_value(100);
EXPECT_EQ(100, gauge.value());
EXPECT_STREQ("100", gauge.to_string().c_str());
}
// UIntGauge
{
UIntGauge gauge;
EXPECT_EQ(0, gauge.value());
gauge.set_value(100);
EXPECT_EQ(100, gauge.value());
EXPECT_STREQ("100", gauge.to_string().c_str());
}
// DoubleGauge
{
DoubleGauge gauge;
EXPECT_EQ(0.0, gauge.value());
gauge.set_value(1.23);
EXPECT_EQ(1.23, gauge.value());
EXPECT_STREQ("1.230000", gauge.to_string().c_str());
}
}
TEST_F(MetricsTest, MetricPrototype) {
{
MetricPrototype cpu_idle_type(MetricType::COUNTER, MetricUnit::PERCENT,
"fragment_requests_total",
"Total fragment requests received.");
EXPECT_EQ("fragment_requests_total", cpu_idle_type.simple_name());
EXPECT_EQ("fragment_requests_total", cpu_idle_type.combine_name(""));
EXPECT_EQ("doris_be_fragment_requests_total", cpu_idle_type.combine_name("doris_be"));
}
{
MetricPrototype cpu_idle_type(MetricType::COUNTER, MetricUnit::PERCENT, "cpu_idle",
"CPU's idle time percent", "cpu");
EXPECT_EQ("cpu", cpu_idle_type.simple_name());
EXPECT_EQ("cpu", cpu_idle_type.combine_name(""));
EXPECT_EQ("doris_be_cpu", cpu_idle_type.combine_name("doris_be"));
}
}
TEST_F(MetricsTest, MetricEntityWithMetric) {
MetricEntity entity(MetricEntityType::kServer, "test_entity", {});
MetricPrototype cpu_idle_type(MetricType::COUNTER, MetricUnit::PERCENT, "cpu_idle");
// Before register
Metric* metric = entity.get_metric("cpu_idle");
EXPECT_EQ(nullptr, metric);
// Register
IntCounter* cpu_idle = (IntCounter*)entity.register_metric<IntCounter>(&cpu_idle_type);
cpu_idle->increment(12);
metric = entity.get_metric("cpu_idle");
EXPECT_NE(nullptr, metric);
EXPECT_EQ("12", metric->to_string());
cpu_idle->increment(8);
EXPECT_EQ("20", metric->to_string());
// Deregister
entity.deregister_metric(&cpu_idle_type);
// After deregister
metric = entity.get_metric("cpu_idle");
EXPECT_EQ(nullptr, metric);
}
TEST_F(MetricsTest, MetricEntityWithHook) {
MetricEntity entity(MetricEntityType::kServer, "test_entity", {});
MetricPrototype cpu_idle_type(MetricType::COUNTER, MetricUnit::PERCENT, "cpu_idle");
// Register
IntCounter* cpu_idle = (IntCounter*)entity.register_metric<IntCounter>(&cpu_idle_type);
entity.register_hook("test_hook", [cpu_idle]() { cpu_idle->increment(6); });
// Before hook
Metric* metric = entity.get_metric("cpu_idle");
EXPECT_NE(nullptr, metric);
EXPECT_EQ("0", metric->to_string());
// Hook
entity.trigger_hook_unlocked(true);
EXPECT_EQ("6", metric->to_string());
entity.trigger_hook_unlocked(true);
EXPECT_EQ("12", metric->to_string());
// Deregister hook
entity.deregister_hook("test_hook");
// Hook but no effect
entity.trigger_hook_unlocked(true);
EXPECT_EQ("12", metric->to_string());
}
TEST_F(MetricsTest, MetricRegistryRegister) {
MetricRegistry registry("test_registry");
// No entity
EXPECT_EQ("", registry.to_prometheus());
EXPECT_EQ("[]", registry.to_json());
EXPECT_EQ("", registry.to_core_string());
// Register
auto entity1 = registry.register_entity("test_entity");
EXPECT_NE(nullptr, entity1);
// Register again
auto entity2 = registry.register_entity("test_entity");
EXPECT_NE(nullptr, entity2);
EXPECT_EQ(entity1.get(), entity2.get());
// Deregister entity once
registry.deregister_entity(entity1);
// Still exist and equal to entity1
entity2 = registry.get_entity("test_entity");
EXPECT_NE(nullptr, entity2);
EXPECT_EQ(entity1.get(), entity2.get());
// Deregister entity twice
registry.deregister_entity(entity2);
// Not exist and registry is empty
entity2 = registry.get_entity("test_entity");
EXPECT_EQ(nullptr, entity2);
EXPECT_EQ("", registry.to_prometheus());
}
TEST_F(MetricsTest, MetricRegistryOutput) {
MetricRegistry registry("test_registry");
{
// No entity
EXPECT_EQ("", registry.to_prometheus());
EXPECT_EQ("[]", registry.to_json());
EXPECT_EQ("", registry.to_core_string());
}
{
// Register one common metric to the entity
auto entity = registry.register_entity("test_entity");
MetricPrototype cpu_idle_type(MetricType::GAUGE, MetricUnit::PERCENT, "cpu_idle", "", "",
{}, true);
IntCounter* cpu_idle = (IntCounter*)entity->register_metric<IntCounter>(&cpu_idle_type);
cpu_idle->increment(8);
EXPECT_EQ(R"(# TYPE test_registry_cpu_idle gauge
test_registry_cpu_idle 8
)",
registry.to_prometheus());
EXPECT_EQ(R"([{"tags":{"metric":"cpu_idle"},"unit":"percent","value":8}])",
registry.to_json());
EXPECT_EQ("test_registry_cpu_idle LONG 8\n", registry.to_core_string());
registry.deregister_entity(entity);
}
{
// Register one metric with group name to the entity
auto entity = registry.register_entity("test_entity");
MetricPrototype cpu_idle_type(MetricType::GAUGE, MetricUnit::PERCENT, "cpu_idle", "", "cpu",
{{"mode", "idle"}}, false);
IntCounter* cpu_idle = (IntCounter*)entity->register_metric<IntCounter>(&cpu_idle_type);
cpu_idle->increment(18);
EXPECT_EQ(R"(# TYPE test_registry_cpu gauge
test_registry_cpu{mode="idle"} 18
)",
registry.to_prometheus());
EXPECT_EQ(R"([{"tags":{"metric":"cpu","mode":"idle"},"unit":"percent","value":18}])",
registry.to_json());
EXPECT_EQ("", registry.to_core_string());
registry.deregister_entity(entity);
}
{
// Register one common metric to an entity with label
auto entity = registry.register_entity("test_entity", {{"name", "label_test"}});
MetricPrototype cpu_idle_type(MetricType::GAUGE, MetricUnit::PERCENT, "cpu_idle");
IntCounter* cpu_idle = (IntCounter*)entity->register_metric<IntCounter>(&cpu_idle_type);
cpu_idle->increment(28);
EXPECT_EQ(R"(# TYPE test_registry_cpu_idle gauge
test_registry_cpu_idle{name="label_test"} 28
)",
registry.to_prometheus());
EXPECT_EQ(
R"([{"tags":{"metric":"cpu_idle","name":"label_test"},"unit":"percent","value":28}])",
registry.to_json());
EXPECT_EQ("", registry.to_core_string());
registry.deregister_entity(entity);
}
{
// Register one common metric with group name to an entity with label
auto entity = registry.register_entity("test_entity", {{"name", "label_test"}});
MetricPrototype cpu_idle_type(MetricType::GAUGE, MetricUnit::PERCENT, "cpu_idle", "", "cpu",
{{"mode", "idle"}});
IntCounter* cpu_idle = (IntCounter*)entity->register_metric<IntCounter>(&cpu_idle_type);
cpu_idle->increment(38);
EXPECT_EQ(R"(# TYPE test_registry_cpu gauge
test_registry_cpu{name="label_test",mode="idle"} 38
)",
registry.to_prometheus());
EXPECT_EQ(
R"([{"tags":{"metric":"cpu","mode":"idle","name":"label_test"},"unit":"percent","value":38}])",
registry.to_json());
EXPECT_EQ("", registry.to_core_string());
registry.deregister_entity(entity);
}
{
// Register two common metrics to one entity
auto entity = registry.register_entity("test_entity");
MetricPrototype cpu_idle_type(MetricType::GAUGE, MetricUnit::PERCENT, "cpu_idle", "", "cpu",
{{"mode", "idle"}});
IntCounter* cpu_idle = (IntCounter*)entity->register_metric<IntCounter>(&cpu_idle_type);
cpu_idle->increment(48);
MetricPrototype cpu_guest_type(MetricType::GAUGE, MetricUnit::PERCENT, "cpu_guest", "",
"cpu", {{"mode", "guest"}});
IntGauge* cpu_guest = (IntGauge*)entity->register_metric<IntGauge>(&cpu_guest_type);
cpu_guest->increment(58);
EXPECT_EQ(R"(# TYPE test_registry_cpu gauge
test_registry_cpu{mode="idle"} 48
test_registry_cpu{mode="guest"} 58
)",
registry.to_prometheus());
EXPECT_EQ(
R"([{"tags":{"metric":"cpu","mode":"guest"},"unit":"percent","value":58},{"tags":{"metric":"cpu","mode":"idle"},"unit":"percent","value":48}])",
registry.to_json());
EXPECT_EQ("", registry.to_core_string());
registry.deregister_entity(entity);
}
}
TEST_F(MetricsTest, HistogramRegistryOutput) {
MetricRegistry registry("test_registry");
{
// Register one histogram metric to the entity
auto entity = registry.register_entity("test_entity");
MetricPrototype task_duration_type(MetricType::HISTOGRAM, MetricUnit::MILLISECONDS,
"task_duration");
HistogramMetric* task_duration =
(HistogramMetric*)entity->register_metric<HistogramMetric>(&task_duration_type);
for (int j = 1; j <= 100; j++) {
task_duration->add(j);
}
EXPECT_EQ(R"(# TYPE test_registry_task_duration histogram
test_registry_task_duration{quantile="0.50"} 50
test_registry_task_duration{quantile="0.75"} 75
test_registry_task_duration{quantile="0.90"} 95.8333
test_registry_task_duration{quantile="0.95"} 100
test_registry_task_duration{quantile="0.99"} 100
test_registry_task_duration_sum 5050
test_registry_task_duration_count 100
test_registry_task_duration_max 100
test_registry_task_duration_min 1
test_registry_task_duration_average 50.5
test_registry_task_duration_median 50
test_registry_task_duration_standard_deviation 28.8661
)",
registry.to_prometheus());
EXPECT_EQ(
R"*([{"tags":{"metric":"task_duration"},"unit":"milliseconds",)*"
R"*("value":{"total_count":100,"min":1,"average":50.5,"median":50.0,)*"
R"*("percentile_50":50.0,"percentile_75":75.0,"percentile_90":95.83333333333334,"percentile_95":100.0,"percentile_99":100.0,)*"
R"*("standard_deviation":28.86607004772212,"max":100,"total_sum":5050}}])*",
registry.to_json());
registry.deregister_entity(entity);
}
{
// Register one histogram metric with lables to the entity
auto entity = registry.register_entity("test_entity", {{"instance", "test"}});
MetricPrototype task_duration_type(MetricType::HISTOGRAM, MetricUnit::MILLISECONDS,
"task_duration", "", "", {{"type", "create_tablet"}});
HistogramMetric* task_duration =
(HistogramMetric*)entity->register_metric<HistogramMetric>(&task_duration_type);
for (int j = 1; j <= 100; j++) {
task_duration->add(j);
}
EXPECT_EQ(R"(# TYPE test_registry_task_duration histogram
test_registry_task_duration{instance="test",type="create_tablet",quantile="0.50"} 50
test_registry_task_duration{instance="test",type="create_tablet",quantile="0.75"} 75
test_registry_task_duration{instance="test",type="create_tablet",quantile="0.90"} 95.8333
test_registry_task_duration{instance="test",type="create_tablet",quantile="0.95"} 100
test_registry_task_duration{instance="test",type="create_tablet",quantile="0.99"} 100
test_registry_task_duration_sum{instance="test",type="create_tablet"} 5050
test_registry_task_duration_count{instance="test",type="create_tablet"} 100
test_registry_task_duration_max{instance="test",type="create_tablet"} 100
test_registry_task_duration_min{instance="test",type="create_tablet"} 1
test_registry_task_duration_average{instance="test",type="create_tablet"} 50.5
test_registry_task_duration_median{instance="test",type="create_tablet"} 50
test_registry_task_duration_standard_deviation{instance="test",type="create_tablet"} 28.8661
)",
registry.to_prometheus());
EXPECT_EQ(
R"*([{"tags":{"metric":"task_duration","type":"create_tablet","instance":"test"},"unit":"milliseconds",)*"
R"*("value":{"total_count":100,"min":1,"average":50.5,"median":50.0,)*"
R"*("percentile_50":50.0,"percentile_75":75.0,"percentile_90":95.83333333333334,"percentile_95":100.0,"percentile_99":100.0,)*"
R"*("standard_deviation":28.86607004772212,"max":100,"total_sum":5050}}])*",
registry.to_json());
registry.deregister_entity(entity);
}
}
} // namespace doris