[feature](metric) Support collect query counter and error query counter metric in user level (#22125)

1. support collect query counter and error query counter metric in user level
2. add sum and count for histogram metric for mistaken delete in PR #22045
This commit is contained in:
caiconghui
2023-07-25 11:16:38 +08:00
committed by GitHub
parent ba2eb4d788
commit 2e20ff8cab
7 changed files with 44 additions and 11 deletions

View File

@ -19,16 +19,17 @@ package org.apache.doris.metric;
import org.apache.doris.catalog.Env;
import com.google.common.collect.Lists;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.PriorityQueue;
import java.util.stream.Collectors;
public class DorisMetricRegistry {
private Collection<Metric> metrics = new PriorityQueue<>(Comparator.comparing(Metric::getName));
private Collection<Metric> systemMetrics = new PriorityQueue<>(Comparator.comparing(Metric::getName));
private Collection<Metric> metrics = Lists.newArrayList();
private Collection<Metric> systemMetrics = Lists.newArrayList();
public DorisMetricRegistry() {
@ -49,6 +50,10 @@ public class DorisMetricRegistry {
}
}
public synchronized int getAllMetricSize() {
return metrics.size() + systemMetrics.size();
}
public synchronized List<Metric> getMetrics() {
return metrics.stream().sorted(Comparator.comparing(Metric::getName)).collect(Collectors.toList());
}

View File

@ -71,7 +71,6 @@ public class JsonMetricVisitor extends MetricVisitor {
} else {
sb.append("\n]");
}
return;
}
@Override

View File

@ -71,6 +71,8 @@ public final class MetricRepo {
public static LongCounterMetric COUNTER_QUERY_ERR;
public static LongCounterMetric COUNTER_QUERY_TABLE;
public static LongCounterMetric COUNTER_QUERY_OLAP_TABLE;
public static AutoMappedMetric<LongCounterMetric> USER_COUNTER_QUERY_ALL;
public static AutoMappedMetric<LongCounterMetric> USER_COUNTER_QUERY_ERR;
public static Histogram HISTO_QUERY_LATENCY;
public static AutoMappedMetric<Histogram> USER_HISTO_QUERY_LATENCY;
public static AutoMappedMetric<GaugeMetricImpl<Long>> USER_GAUGE_QUERY_INSTANCE_NUM;
@ -285,6 +287,20 @@ public final class MetricRepo {
COUNTER_QUERY_OLAP_TABLE = new LongCounterMetric("query_olap_table", MetricUnit.REQUESTS,
"total query from olap table");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_QUERY_OLAP_TABLE);
USER_COUNTER_QUERY_ALL = new AutoMappedMetric<>(name -> {
LongCounterMetric userCountQueryAll = new LongCounterMetric("query_total", MetricUnit.REQUESTS,
"total query for single user");
userCountQueryAll.addLabel(new MetricLabel("user", name));
DORIS_METRIC_REGISTER.addMetrics(userCountQueryAll);
return userCountQueryAll;
});
USER_COUNTER_QUERY_ERR = new AutoMappedMetric<>(name -> {
LongCounterMetric userCountQueryErr = new LongCounterMetric("query_err", MetricUnit.REQUESTS,
"total error query for single user");
userCountQueryErr.addLabel(new MetricLabel("user", name));
DORIS_METRIC_REGISTER.addMetrics(userCountQueryErr);
return userCountQueryErr;
});
HISTO_QUERY_LATENCY = METRIC_REGISTER.histogram(
MetricRegistry.name("query", "latency", "ms"));
USER_HISTO_QUERY_LATENCY = new AutoMappedMetric<>(name -> {
@ -633,8 +649,7 @@ public final class MetricRepo {
JvmStats jvmStats = jvmService.stats();
visitor.visitJvm(sb, jvmStats);
visitor.setMetricNumber(
DORIS_METRIC_REGISTER.getMetrics().size() + DORIS_METRIC_REGISTER.getSystemMetrics().size());
visitor.setMetricNumber(DORIS_METRIC_REGISTER.getAllMetricSize());
// doris metrics
for (Metric metric : DORIS_METRIC_REGISTER.getMetrics()) {
visitor.visit(sb, MetricVisitor.FE_PREFIX, metric);

View File

@ -173,7 +173,6 @@ public class PrometheusMetricVisitor extends MetricVisitor {
// value
sb.append(" ").append(metric.getValue().toString()).append("\n");
return;
}
@Override
@ -192,9 +191,10 @@ public class PrometheusMetricVisitor extends MetricVisitor {
final String fullName = prefix + String.join("_", names);
final String fullTag = String.join(",", tags);
// we should define metric name only once
if (tags.isEmpty()) {
if (!metricNames.contains(fullName)) {
sb.append(HELP).append(fullName).append(" ").append("\n");
sb.append(TYPE).append(fullName).append(" ").append("summary\n");
metricNames.add(fullName);
}
String delimiter = tags.isEmpty() ? "" : ",";
Snapshot snapshot = histogram.getSnapshot();
@ -208,6 +208,10 @@ public class PrometheusMetricVisitor extends MetricVisitor {
.append(snapshot.get99thPercentile()).append("\n");
sb.append(fullName).append("{quantile=\"0.999\"").append(delimiter).append(fullTag).append("} ")
.append(snapshot.get999thPercentile()).append("\n");
sb.append(fullName).append("_sum {").append(fullTag).append("} ")
.append(histogram.getCount() * snapshot.getMean()).append("\n");
sb.append(fullName).append("_count {").append(fullTag).append("} ")
.append(histogram.getCount()).append("\n");
}
@Override

View File

@ -117,7 +117,6 @@ public class SimpleCoreMetricVisitor extends MetricVisitor {
.join(prefix + metric.getName(), CORE_METRICS.get(metric.getName()), metric.getValue().toString()))
.append("\n");
}
return;
}
@Override
@ -132,7 +131,6 @@ public class SimpleCoreMetricVisitor extends MetricVisitor {
String.format("%.0f", snapshot.get95thPercentile()))).append("\n");
sb.append(Joiner.on(" ").join(prefix + name + "_99", CORE_METRICS.get(name),
String.format("%.0f", snapshot.get99thPercentile()))).append("\n");
return;
}
@Override

View File

@ -296,10 +296,12 @@ public class ConnectProcessor {
if (ctx.getState().isQuery()) {
MetricRepo.COUNTER_QUERY_ALL.increase(1L);
MetricRepo.USER_COUNTER_QUERY_ALL.getOrAdd(ctx.getQualifiedUser()).increase(1L);
if (ctx.getState().getStateType() == MysqlStateType.ERR
&& ctx.getState().getErrType() != QueryState.ErrType.ANALYSIS_ERR) {
// err query
MetricRepo.COUNTER_QUERY_ERR.increase(1L);
MetricRepo.USER_COUNTER_QUERY_ERR.getOrAdd(ctx.getQualifiedUser()).increase(1L);
} else if (ctx.getState().getStateType() == MysqlStateType.OK
|| ctx.getState().getStateType() == MysqlStateType.EOF) {
// ok query

View File

@ -58,15 +58,25 @@ public class MetricsTest {
}
@Test
public void testUserMetrics() {
public void testUserQueryMetrics() {
MetricRepo.USER_COUNTER_QUERY_ALL.getOrAdd("test_user").increase(1L);
MetricRepo.USER_COUNTER_QUERY_ERR.getOrAdd("test_user").increase(1L);
MetricRepo.USER_HISTO_QUERY_LATENCY.getOrAdd("test_user").update(10L);
StringBuilder sb = new StringBuilder();
MetricVisitor visitor = new PrometheusMetricVisitor();
List<Metric> metrics = MetricRepo.DORIS_METRIC_REGISTER.getMetrics();
for (Metric metric : metrics) {
visitor.visit(sb, MetricVisitor.FE_PREFIX, metric);
}
SortedMap<String, Histogram> histograms = MetricRepo.METRIC_REGISTER.getHistograms();
for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
visitor.visitHistogram(sb, MetricVisitor.FE_PREFIX, entry.getKey(), entry.getValue());
}
String metricResult = sb.toString();
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_total counter"));
Assert.assertTrue(metricResult.contains("doris_fe_query_total{user=\"test_user\"} 1"));
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_err counter"));
Assert.assertTrue(metricResult.contains("doris_fe_query_err{user=\"test_user\"} 1"));
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_latency_ms summary"));
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\"} 0.0"));
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\",user=\"test_user\"} 10.0"));