[fix](metric) fix prometheus metric format error (#22045)

we should define metric name only once like following:

# HELP doris_fe_query_latency_ms 
# TYPE doris_fe_query_latency_ms summary
doris_fe_query_latency_ms{quantile="0.75"} 1.0
doris_fe_query_latency_ms{quantile="0.95"} 2.0
doris_fe_query_latency_ms{quantile="0.98"} 100.0
doris_fe_query_latency_ms{quantile="0.99"} 100.0
doris_fe_query_latency_ms{quantile="0.999"} 100.0
doris_fe_query_latency_ms{quantile="0.75",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.95",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.98",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.99",user="default_cluster:test1"} 1.0
doris_fe_query_latency_ms{quantile="0.999",user="default_cluster:test1"} 1.0
This commit is contained in:
caiconghui
2023-07-22 22:38:29 +08:00
committed by GitHub
parent 0755fd16d8
commit 8cb532230a
4 changed files with 13 additions and 14 deletions

View File

@ -72,7 +72,7 @@ public final class MetricRepo {
public static LongCounterMetric COUNTER_QUERY_TABLE;
public static LongCounterMetric COUNTER_QUERY_OLAP_TABLE;
public static Histogram HISTO_QUERY_LATENCY;
public static AutoMappedMetric<Histogram> DB_HISTO_QUERY_LATENCY;
public static AutoMappedMetric<Histogram> USER_HISTO_QUERY_LATENCY;
public static AutoMappedMetric<GaugeMetricImpl<Long>> USER_GAUGE_QUERY_INSTANCE_NUM;
public static AutoMappedMetric<LongCounterMetric> USER_COUNTER_QUERY_INSTANCE_BEGIN;
public static AutoMappedMetric<LongCounterMetric> BE_COUNTER_QUERY_RPC_ALL;
@ -287,8 +287,8 @@ public final class MetricRepo {
DORIS_METRIC_REGISTER.addMetrics(COUNTER_QUERY_OLAP_TABLE);
HISTO_QUERY_LATENCY = METRIC_REGISTER.histogram(
MetricRegistry.name("query", "latency", "ms"));
DB_HISTO_QUERY_LATENCY = new AutoMappedMetric<>(name -> {
String metricName = MetricRegistry.name("query", "latency", "ms", "db=" + name);
USER_HISTO_QUERY_LATENCY = new AutoMappedMetric<>(name -> {
String metricName = MetricRegistry.name("query", "latency", "ms", "user=" + name);
return METRIC_REGISTER.histogram(metricName);
});
USER_COUNTER_QUERY_INSTANCE_BEGIN = addLabeledMetrics("user", () ->

View File

@ -191,8 +191,11 @@ public class PrometheusMetricVisitor extends MetricVisitor {
}
final String fullName = prefix + String.join("_", names);
final String fullTag = String.join(",", tags);
sb.append(HELP).append(fullName).append(" ").append("\n");
sb.append(TYPE).append(fullName).append(" ").append("summary\n");
// we should define metric name only once
if (tags.isEmpty()) {
sb.append(HELP).append(fullName).append(" ").append("\n");
sb.append(TYPE).append(fullName).append(" ").append("summary\n");
}
String delimiter = tags.isEmpty() ? "" : ",";
Snapshot snapshot = histogram.getSnapshot();
sb.append(fullName).append("{quantile=\"0.75\"").append(delimiter).append(fullTag).append("} ")
@ -205,11 +208,6 @@ public class PrometheusMetricVisitor extends MetricVisitor {
.append(snapshot.get99thPercentile()).append("\n");
sb.append(fullName).append("{quantile=\"0.999\"").append(delimiter).append(fullTag).append("} ")
.append(snapshot.get999thPercentile()).append("\n");
sb.append(fullName).append("_sum {").append(fullTag).append("} ")
.append(histogram.getCount() * snapshot.getMean()).append("\n");
sb.append(fullName).append("_count {").append(fullTag).append("} ")
.append(histogram.getCount()).append("\n");
return;
}
@Override

View File

@ -304,7 +304,8 @@ public class ConnectProcessor {
|| ctx.getState().getStateType() == MysqlStateType.EOF) {
// ok query
MetricRepo.HISTO_QUERY_LATENCY.update(elapseMs);
MetricRepo.DB_HISTO_QUERY_LATENCY.getOrAdd(ctx.getDatabase()).update(elapseMs);
MetricRepo.USER_HISTO_QUERY_LATENCY.getOrAdd(ctx.getQualifiedUser()).update(elapseMs);
if (elapseMs > Config.qe_slow_log_ms) {
String sqlDigest = DigestUtils.md5Hex(((Queriable) parsedStmt).toDigest());
ctx.getAuditEventBuilder().setSqlDigest(sqlDigest);

View File

@ -58,8 +58,8 @@ public class MetricsTest {
}
@Test
public void testDBMetrics() {
MetricRepo.DB_HISTO_QUERY_LATENCY.getOrAdd("test_db").update(10L);
public void testUserMetrics() {
MetricRepo.USER_HISTO_QUERY_LATENCY.getOrAdd("test_user").update(10L);
StringBuilder sb = new StringBuilder();
MetricVisitor visitor = new PrometheusMetricVisitor();
SortedMap<String, Histogram> histograms = MetricRepo.METRIC_REGISTER.getHistograms();
@ -69,7 +69,7 @@ public class MetricsTest {
String metricResult = sb.toString();
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_latency_ms summary"));
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\"} 0.0"));
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\",db=\"test_db\"} 10.0"));
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\",user=\"test_user\"} 10.0"));
}
}