[feature](metric) Support collect query counter and error query counter metric in user level (#22125)
1. support collect query counter and error query counter metric in user level 2. add sum and count for histogram metric for mistaken delete in PR #22045
This commit is contained in:
@ -19,16 +19,17 @@ package org.apache.doris.metric;
|
||||
|
||||
import org.apache.doris.catalog.Env;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class DorisMetricRegistry {
|
||||
|
||||
private Collection<Metric> metrics = new PriorityQueue<>(Comparator.comparing(Metric::getName));
|
||||
private Collection<Metric> systemMetrics = new PriorityQueue<>(Comparator.comparing(Metric::getName));
|
||||
private Collection<Metric> metrics = Lists.newArrayList();
|
||||
private Collection<Metric> systemMetrics = Lists.newArrayList();
|
||||
|
||||
public DorisMetricRegistry() {
|
||||
|
||||
@ -49,6 +50,10 @@ public class DorisMetricRegistry {
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized int getAllMetricSize() {
|
||||
return metrics.size() + systemMetrics.size();
|
||||
}
|
||||
|
||||
public synchronized List<Metric> getMetrics() {
|
||||
return metrics.stream().sorted(Comparator.comparing(Metric::getName)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@ -71,7 +71,6 @@ public class JsonMetricVisitor extends MetricVisitor {
|
||||
} else {
|
||||
sb.append("\n]");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -71,6 +71,8 @@ public final class MetricRepo {
|
||||
public static LongCounterMetric COUNTER_QUERY_ERR;
|
||||
public static LongCounterMetric COUNTER_QUERY_TABLE;
|
||||
public static LongCounterMetric COUNTER_QUERY_OLAP_TABLE;
|
||||
public static AutoMappedMetric<LongCounterMetric> USER_COUNTER_QUERY_ALL;
|
||||
public static AutoMappedMetric<LongCounterMetric> USER_COUNTER_QUERY_ERR;
|
||||
public static Histogram HISTO_QUERY_LATENCY;
|
||||
public static AutoMappedMetric<Histogram> USER_HISTO_QUERY_LATENCY;
|
||||
public static AutoMappedMetric<GaugeMetricImpl<Long>> USER_GAUGE_QUERY_INSTANCE_NUM;
|
||||
@ -285,6 +287,20 @@ public final class MetricRepo {
|
||||
COUNTER_QUERY_OLAP_TABLE = new LongCounterMetric("query_olap_table", MetricUnit.REQUESTS,
|
||||
"total query from olap table");
|
||||
DORIS_METRIC_REGISTER.addMetrics(COUNTER_QUERY_OLAP_TABLE);
|
||||
USER_COUNTER_QUERY_ALL = new AutoMappedMetric<>(name -> {
|
||||
LongCounterMetric userCountQueryAll = new LongCounterMetric("query_total", MetricUnit.REQUESTS,
|
||||
"total query for single user");
|
||||
userCountQueryAll.addLabel(new MetricLabel("user", name));
|
||||
DORIS_METRIC_REGISTER.addMetrics(userCountQueryAll);
|
||||
return userCountQueryAll;
|
||||
});
|
||||
USER_COUNTER_QUERY_ERR = new AutoMappedMetric<>(name -> {
|
||||
LongCounterMetric userCountQueryErr = new LongCounterMetric("query_err", MetricUnit.REQUESTS,
|
||||
"total error query for single user");
|
||||
userCountQueryErr.addLabel(new MetricLabel("user", name));
|
||||
DORIS_METRIC_REGISTER.addMetrics(userCountQueryErr);
|
||||
return userCountQueryErr;
|
||||
});
|
||||
HISTO_QUERY_LATENCY = METRIC_REGISTER.histogram(
|
||||
MetricRegistry.name("query", "latency", "ms"));
|
||||
USER_HISTO_QUERY_LATENCY = new AutoMappedMetric<>(name -> {
|
||||
@ -633,8 +649,7 @@ public final class MetricRepo {
|
||||
JvmStats jvmStats = jvmService.stats();
|
||||
visitor.visitJvm(sb, jvmStats);
|
||||
|
||||
visitor.setMetricNumber(
|
||||
DORIS_METRIC_REGISTER.getMetrics().size() + DORIS_METRIC_REGISTER.getSystemMetrics().size());
|
||||
visitor.setMetricNumber(DORIS_METRIC_REGISTER.getAllMetricSize());
|
||||
// doris metrics
|
||||
for (Metric metric : DORIS_METRIC_REGISTER.getMetrics()) {
|
||||
visitor.visit(sb, MetricVisitor.FE_PREFIX, metric);
|
||||
|
||||
@ -173,7 +173,6 @@ public class PrometheusMetricVisitor extends MetricVisitor {
|
||||
|
||||
// value
|
||||
sb.append(" ").append(metric.getValue().toString()).append("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -192,9 +191,10 @@ public class PrometheusMetricVisitor extends MetricVisitor {
|
||||
final String fullName = prefix + String.join("_", names);
|
||||
final String fullTag = String.join(",", tags);
|
||||
// we should define metric name only once
|
||||
if (tags.isEmpty()) {
|
||||
if (!metricNames.contains(fullName)) {
|
||||
sb.append(HELP).append(fullName).append(" ").append("\n");
|
||||
sb.append(TYPE).append(fullName).append(" ").append("summary\n");
|
||||
metricNames.add(fullName);
|
||||
}
|
||||
String delimiter = tags.isEmpty() ? "" : ",";
|
||||
Snapshot snapshot = histogram.getSnapshot();
|
||||
@ -208,6 +208,10 @@ public class PrometheusMetricVisitor extends MetricVisitor {
|
||||
.append(snapshot.get99thPercentile()).append("\n");
|
||||
sb.append(fullName).append("{quantile=\"0.999\"").append(delimiter).append(fullTag).append("} ")
|
||||
.append(snapshot.get999thPercentile()).append("\n");
|
||||
sb.append(fullName).append("_sum {").append(fullTag).append("} ")
|
||||
.append(histogram.getCount() * snapshot.getMean()).append("\n");
|
||||
sb.append(fullName).append("_count {").append(fullTag).append("} ")
|
||||
.append(histogram.getCount()).append("\n");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -117,7 +117,6 @@ public class SimpleCoreMetricVisitor extends MetricVisitor {
|
||||
.join(prefix + metric.getName(), CORE_METRICS.get(metric.getName()), metric.getValue().toString()))
|
||||
.append("\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -132,7 +131,6 @@ public class SimpleCoreMetricVisitor extends MetricVisitor {
|
||||
String.format("%.0f", snapshot.get95thPercentile()))).append("\n");
|
||||
sb.append(Joiner.on(" ").join(prefix + name + "_99", CORE_METRICS.get(name),
|
||||
String.format("%.0f", snapshot.get99thPercentile()))).append("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -296,10 +296,12 @@ public class ConnectProcessor {
|
||||
|
||||
if (ctx.getState().isQuery()) {
|
||||
MetricRepo.COUNTER_QUERY_ALL.increase(1L);
|
||||
MetricRepo.USER_COUNTER_QUERY_ALL.getOrAdd(ctx.getQualifiedUser()).increase(1L);
|
||||
if (ctx.getState().getStateType() == MysqlStateType.ERR
|
||||
&& ctx.getState().getErrType() != QueryState.ErrType.ANALYSIS_ERR) {
|
||||
// err query
|
||||
MetricRepo.COUNTER_QUERY_ERR.increase(1L);
|
||||
MetricRepo.USER_COUNTER_QUERY_ERR.getOrAdd(ctx.getQualifiedUser()).increase(1L);
|
||||
} else if (ctx.getState().getStateType() == MysqlStateType.OK
|
||||
|| ctx.getState().getStateType() == MysqlStateType.EOF) {
|
||||
// ok query
|
||||
|
||||
@ -58,15 +58,25 @@ public class MetricsTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUserMetrics() {
|
||||
public void testUserQueryMetrics() {
|
||||
MetricRepo.USER_COUNTER_QUERY_ALL.getOrAdd("test_user").increase(1L);
|
||||
MetricRepo.USER_COUNTER_QUERY_ERR.getOrAdd("test_user").increase(1L);
|
||||
MetricRepo.USER_HISTO_QUERY_LATENCY.getOrAdd("test_user").update(10L);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
MetricVisitor visitor = new PrometheusMetricVisitor();
|
||||
List<Metric> metrics = MetricRepo.DORIS_METRIC_REGISTER.getMetrics();
|
||||
for (Metric metric : metrics) {
|
||||
visitor.visit(sb, MetricVisitor.FE_PREFIX, metric);
|
||||
}
|
||||
SortedMap<String, Histogram> histograms = MetricRepo.METRIC_REGISTER.getHistograms();
|
||||
for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
|
||||
visitor.visitHistogram(sb, MetricVisitor.FE_PREFIX, entry.getKey(), entry.getValue());
|
||||
}
|
||||
String metricResult = sb.toString();
|
||||
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_total counter"));
|
||||
Assert.assertTrue(metricResult.contains("doris_fe_query_total{user=\"test_user\"} 1"));
|
||||
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_err counter"));
|
||||
Assert.assertTrue(metricResult.contains("doris_fe_query_err{user=\"test_user\"} 1"));
|
||||
Assert.assertTrue(metricResult.contains("# TYPE doris_fe_query_latency_ms summary"));
|
||||
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\"} 0.0"));
|
||||
Assert.assertTrue(metricResult.contains("doris_fe_query_latency_ms{quantile=\"0.999\",user=\"test_user\"} 10.0"));
|
||||
|
||||
Reference in New Issue
Block a user