[fix](metric) fix prometheus metric format error (#22045)
we should define metric name only once like following:
# HELP doris_fe_query_latency_ms
# TYPE doris_fe_query_latency_ms summary
doris_fe_query_latency_ms{quantile="0.75"} 1.0
doris_fe_query_latency_ms{quantile="0.95"} 2.0
doris_fe_query_latency_ms{quantile="0.98"} 100.0
doris_fe_query_latency_ms{quantile="0.99"} 100.0
doris_fe_query_latency_ms{quantile="0.999"} 100.0
doris_fe_query_latency_ms{quantile="0.75",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.95",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.98",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.99",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.999",user="default_cluster:test1"} 1.0
This commit is contained in:
@ -72,7 +72,7 @@ public final class MetricRepo {
|
||||
public static LongCounterMetric COUNTER_QUERY_TABLE;
|
||||
public static LongCounterMetric COUNTER_QUERY_OLAP_TABLE;
|
||||
public static Histogram HISTO_QUERY_LATENCY;
|
||||
public static AutoMappedMetric<Histogram> DB_HISTO_QUERY_LATENCY;
|
||||
public static AutoMappedMetric<Histogram> USER_HISTO_QUERY_LATENCY;
|
||||
public static AutoMappedMetric<GaugeMetricImpl<Long>> USER_GAUGE_QUERY_INSTANCE_NUM;
|
||||
public static AutoMappedMetric<LongCounterMetric> USER_COUNTER_QUERY_INSTANCE_BEGIN;
|
||||
public static AutoMappedMetric<LongCounterMetric> BE_COUNTER_QUERY_RPC_ALL;
|
||||
@ -287,8 +287,8 @@ public final class MetricRepo {
|
||||
DORIS_METRIC_REGISTER.addMetrics(COUNTER_QUERY_OLAP_TABLE);
|
||||
HISTO_QUERY_LATENCY = METRIC_REGISTER.histogram(
|
||||
MetricRegistry.name("query", "latency", "ms"));
|
||||
DB_HISTO_QUERY_LATENCY = new AutoMappedMetric<>(name -> {
|
||||
String metricName = MetricRegistry.name("query", "latency", "ms", "db=" + name);
|
||||
USER_HISTO_QUERY_LATENCY = new AutoMappedMetric<>(name -> {
|
||||
String metricName = MetricRegistry.name("query", "latency", "ms", "user=" + name);
|
||||
return METRIC_REGISTER.histogram(metricName);
|
||||
});
|
||||
USER_COUNTER_QUERY_INSTANCE_BEGIN = addLabeledMetrics("user", () ->
|
||||
|
||||
@ -191,8 +191,11 @@ public class PrometheusMetricVisitor extends MetricVisitor {
|
||||
}
|
||||
final String fullName = prefix + String.join("_", names);
|
||||
final String fullTag = String.join(",", tags);
|
||||
sb.append(HELP).append(fullName).append(" ").append("\n");
|
||||
sb.append(TYPE).append(fullName).append(" ").append("summary\n");
|
||||
// we should define metric name only once
|
||||
if (tags.isEmpty()) {
|
||||
sb.append(HELP).append(fullName).append(" ").append("\n");
|
||||
sb.append(TYPE).append(fullName).append(" ").append("summary\n");
|
||||
}
|
||||
String delimiter = tags.isEmpty() ? "" : ",";
|
||||
Snapshot snapshot = histogram.getSnapshot();
|
||||
sb.append(fullName).append("{quantile=\"0.75\"").append(delimiter).append(fullTag).append("} ")
|
||||
@ -205,11 +208,6 @@ public class PrometheusMetricVisitor extends MetricVisitor {
|
||||
.append(snapshot.get99thPercentile()).append("\n");
|
||||
sb.append(fullName).append("{quantile=\"0.999\"").append(delimiter).append(fullTag).append("} ")
|
||||
.append(snapshot.get999thPercentile()).append("\n");
|
||||
sb.append(fullName).append("_sum {").append(fullTag).append("} ")
|
||||
.append(histogram.getCount() * snapshot.getMean()).append("\n");
|
||||
sb.append(fullName).append("_count {").append(fullTag).append("} ")
|
||||
.append(histogram.getCount()).append("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -304,7 +304,8 @@ public class ConnectProcessor {
|
||||
|| ctx.getState().getStateType() == MysqlStateType.EOF) {
|
||||
// ok query
|
||||
MetricRepo.HISTO_QUERY_LATENCY.update(elapseMs);
|
||||
MetricRepo.DB_HISTO_QUERY_LATENCY.getOrAdd(ctx.getDatabase()).update(elapseMs);
|
||||
MetricRepo.USER_HISTO_QUERY_LATENCY.getOrAdd(ctx.getQualifiedUser()).update(elapseMs);
|
||||
|
||||
if (elapseMs > Config.qe_slow_log_ms) {
|
||||
String sqlDigest = DigestUtils.md5Hex(((Queriable) parsedStmt).toDigest());
|
||||
ctx.getAuditEventBuilder().setSqlDigest(sqlDigest);
|
||||
|
||||
@ -58,8 +58,8 @@ public class MetricsTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDBMetrics() {
|
||||
MetricRepo.DB_HISTO_QUERY_LATENCY.getOrAdd("test_db").update(10L);
|
||||
public void testUserMetrics() {
|
||||
MetricRepo.USER_HISTO_QUERY_LATENCY.getOrAdd("test_user").update(10L);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
MetricVisitor visitor = new PrometheusMetricVisitor();
|
||||
SortedMap<String, Histogram> histograms = MetricRepo.METRIC_REGISTER.getHistograms();
|
||||
@ -69,7 +69,7 @@ public class MetricsTest {
|
||||
String metricResult = sb.toString();
|
||||
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_latency_ms summary"));
|
||||
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\"} 0.0"));
|
||||
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\",db=\"test_db\"} 10.0"));
|
||||
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\",user=\"test_user\"} 10.0"));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user