[Refactor](opentelemetry) Remove opentelemetry (#26605)

This commit is contained in:
zhiqiang
2023-11-09 04:05:34 -06:00
committed by GitHub
parent eca747413d
commit a5565f68b2
184 changed files with 90 additions and 1948 deletions

View File

@ -25,7 +25,6 @@ import org.apache.doris.common.LdapConfig;
import org.apache.doris.common.Log4jConfig;
import org.apache.doris.common.ThreadPoolManager;
import org.apache.doris.common.Version;
import org.apache.doris.common.telemetry.Telemetry;
import org.apache.doris.common.util.JdkUtils;
import org.apache.doris.common.util.NetUtils;
import org.apache.doris.httpv2.HttpServer;
@ -168,8 +167,6 @@ public class DorisFE {
Env.getCurrentEnv().initialize(args);
Env.getCurrentEnv().waitForReady();
Telemetry.initOpenTelemetry();
// init and start:
// 1. HttpServer for HTTP Server
// 2. FeServer for Thrift Server

View File

@ -1,48 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.telemetry;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Scope;
/**
* encapsulated {@link Span} and {@link Scope}.
*/
public class ScopedSpan {
private Span span;
private Scope scope;
public ScopedSpan() {
span = Telemetry.getNoopSpan();
this.scope = span.makeCurrent();
}
public ScopedSpan(Span span) {
this.span = span;
this.scope = span.makeCurrent();
}
public Span getSpan() {
return span;
}
public void endSpan() {
scope.close();
span.end();
}
}

View File

@ -1,109 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.telemetry;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.Config;
import io.opentelemetry.api.OpenTelemetry;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.Tracer;
import io.opentelemetry.api.trace.propagation.W3CTraceContextPropagator;
import io.opentelemetry.context.propagation.ContextPropagators;
import io.opentelemetry.exporter.otlp.http.trace.OtlpHttpSpanExporter;
import io.opentelemetry.exporter.zipkin.ZipkinSpanExporter;
import io.opentelemetry.sdk.OpenTelemetrySdk;
import io.opentelemetry.sdk.resources.Resource;
import io.opentelemetry.sdk.trace.SdkTracerProvider;
import io.opentelemetry.sdk.trace.export.BatchSpanProcessor;
import io.opentelemetry.sdk.trace.export.SpanExporter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.concurrent.TimeUnit;
/**
* Managing OpenTelemetry sdk.
*/
public class Telemetry {
private static final Logger LOG = LogManager.getLogger(Telemetry.class);
private static OpenTelemetry openTelemetry = OpenTelemetry.noop();
public enum DorisTraceExporter {
zipkin, collector
}
/**
* Initialize {@link OpenTelemetry} with {@link SdkTracerProvider}, {@link BatchSpanProcessor},
* {@link ZipkinSpanExporter} and {@link W3CTraceContextPropagator}.
*/
public static void initOpenTelemetry() throws Exception {
if (!Config.enable_tracing) {
return;
}
String traceExportUrl = Config.trace_export_url;
SpanExporter spanExporter;
if (DorisTraceExporter.collector.name().equalsIgnoreCase(Config.trace_exporter)) {
spanExporter = oltpExporter(traceExportUrl);
} else if (DorisTraceExporter.zipkin.name().equalsIgnoreCase(Config.trace_exporter)) {
spanExporter = zipkinExporter(traceExportUrl);
} else {
throw new Exception("unknown value " + Config.trace_exporter + " of trace_exporter in fe.conf");
}
String serviceName = "FRONTEND:" + Env.getCurrentEnv().getSelfNode().getHost();
Resource serviceNameResource = Resource.create(
Attributes.of(AttributeKey.stringKey("service.name"), serviceName));
// Send a batch of spans if ScheduleDelay time or MaxExportBatchSize is reached
BatchSpanProcessor spanProcessor =
BatchSpanProcessor.builder(spanExporter).setScheduleDelay(100, TimeUnit.MILLISECONDS)
.setMaxExportBatchSize(1000).build();
SdkTracerProvider tracerProvider = SdkTracerProvider.builder().addSpanProcessor(spanProcessor)
.setResource(Resource.getDefault().merge(serviceNameResource)).build();
openTelemetry = OpenTelemetrySdk.builder().setTracerProvider(tracerProvider)
.setPropagators(ContextPropagators.create(W3CTraceContextPropagator.getInstance())).build();
// add a shutdown hook to shut down the SDK
Runtime.getRuntime().addShutdownHook(new Thread(tracerProvider::shutdown));
}
private static SpanExporter zipkinExporter(String httpUrl) {
return ZipkinSpanExporter.builder().setEndpoint(httpUrl).build();
}
private static SpanExporter oltpExporter(String httpUrl) {
return OtlpHttpSpanExporter.builder().setEndpoint(httpUrl).build();
}
public static OpenTelemetry getOpenTelemetry() {
return openTelemetry;
}
public static Tracer getNoopTracer() {
return OpenTelemetry.noop().getTracer("noop");
}
public static Span getNoopSpan() {
return getNoopTracer().spanBuilder("noopSpan").startSpan();
}
}

View File

@ -144,8 +144,6 @@ public class ExportTaskExecutor implements TransientTaskExecutor {
exportJob.updateExportJobState(ExportJobState.CANCELLED, taskId, null,
ExportFailMsg.CancelType.RUN_FAIL, e.getMessage());
throw new JobException(e);
} finally {
stmtExecutor.addProfileToSpan();
}
}
if (isCanceled.get()) {

View File

@ -61,9 +61,6 @@ import org.apache.doris.qe.ResultSetMetaData;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.Scope;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -183,19 +180,8 @@ public class NereidsPlanner extends Planner {
try (Lock lock = new Lock(plan, cascadesContext)) {
// resolve column, table and function
Span queryAnalysisSpan =
statementContext.getConnectContext().getTracer()
.spanBuilder("query analysis").setParent(Context.current()).startSpan();
try (Scope scope = queryAnalysisSpan.makeCurrent()) {
// analyze this query
analyze();
} catch (Exception e) {
queryAnalysisSpan.recordException(e);
throw e;
} finally {
queryAnalysisSpan.end();
}
// analyze this query
analyze();
// minidump of input must be serialized first, this process ensure minidump string not null
try {
MinidumpUtils.serializeInputsToDumpFile(plan, cascadesContext.getTables());

View File

@ -228,11 +228,6 @@ public class AuditEvent {
return this;
}
public AuditEventBuilder setTraceId(String traceId) {
auditEvent.traceId = traceId;
return this;
}
public AuditEventBuilder setFuzzyVariables(String variables) {
auditEvent.fuzzyVariables = variables;
return this;

View File

@ -29,9 +29,6 @@ import org.apache.doris.plugin.AuditEvent.EventType;
import org.apache.doris.qe.QueryState.MysqlStateType;
import org.apache.doris.service.FrontendOptions;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.SpanContext;
import io.opentelemetry.context.Context;
import org.apache.commons.codec.digest.DigestUtils;
public class AuditLogHelper {
@ -42,7 +39,6 @@ public class AuditLogHelper {
// slow query
long endTime = System.currentTimeMillis();
long elapseMs = endTime - ctx.getStartTime();
SpanContext spanContext = Span.fromContext(Context.current()).getSpanContext();
ctx.getAuditEventBuilder().setEventType(EventType.AFTER_QUERY)
.setDb(ClusterNamespace.getNameFromFullName(ctx.getDatabase()))
@ -58,7 +54,6 @@ public class AuditLogHelper {
.setReturnRows(ctx.getReturnRows())
.setStmtId(ctx.getStmtId())
.setQueryId(ctx.queryId() == null ? "NaN" : DebugUtil.printId(ctx.queryId()))
.setTraceId(spanContext.isValid() ? spanContext.getTraceId() : "")
.setWorkloadGroup(ctx.getWorkloadGroupName())
.setFuzzyVariables(!printFuzzyVariables ? "" : ctx.getSessionVariable().printFuzzyVariables());

View File

@ -25,7 +25,6 @@ import org.apache.doris.catalog.FunctionRegistry;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.cluster.ClusterNamespace;
import org.apache.doris.common.Config;
import org.apache.doris.common.telemetry.Telemetry;
import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.CatalogIf;
@ -55,7 +54,6 @@ import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import io.opentelemetry.api.trace.Tracer;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONObject;
@ -149,8 +147,6 @@ public class ConnectContext {
// Cache thread info for this connection.
protected volatile ThreadInfo threadInfo;
protected volatile Tracer tracer = Telemetry.getNoopTracer();
// Catalog: put catalog here is convenient for unit test,
// because catalog is singleton, hard to mock
protected Env env;
@ -703,10 +699,6 @@ public class ConnectContext {
}
}
public void setTraceId(String traceId) {
this.traceId = traceId;
}
public String traceId() {
return traceId;
}
@ -739,14 +731,6 @@ public class ConnectContext {
this.minidump = minidump;
}
public Tracer getTracer() {
return tracer;
}
public void initTracer(String name) {
this.tracer = Telemetry.getOpenTelemetry().getTracer(name);
}
public StatementContext getStatementContext() {
return statementContext;
}

View File

@ -34,7 +34,6 @@ import org.apache.doris.common.DdlException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.NotImplementedException;
import org.apache.doris.common.UserException;
import org.apache.doris.common.telemetry.Telemetry;
import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.common.util.SqlParserUtils;
import org.apache.doris.common.util.SqlUtils;
@ -59,11 +58,6 @@ import org.apache.doris.thrift.TUniqueId;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.SpanKind;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.Scope;
import io.opentelemetry.context.propagation.TextMapGetter;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -72,9 +66,7 @@ import java.io.IOException;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
/**
@ -87,21 +79,6 @@ public abstract class ConnectProcessor {
}
private static final Logger LOG = LogManager.getLogger(ConnectProcessor.class);
protected static final TextMapGetter<Map<String, String>> getter =
new TextMapGetter<Map<String, String>>() {
@Override
public Iterable<String> keys(Map<String, String> carrier) {
return carrier.keySet();
}
@Override
public String get(Map<String, String> carrier, String key) {
if (carrier.containsKey(key)) {
return carrier.get(key);
}
return "";
}
};
protected final ConnectContext ctx;
protected StmtExecutor executor = null;
protected ConnectType connectType;
@ -292,8 +269,6 @@ public abstract class ConnectProcessor {
executor.getQueryStatisticsForAuditLog());
// execute failed, skip remaining stmts
break;
} finally {
executor.addProfileToSpan();
}
}
@ -524,21 +499,6 @@ public abstract class ConnectProcessor {
}
}
Map<String, String> traceCarrier = new HashMap<>();
if (request.isSetTraceCarrier()) {
traceCarrier = request.getTraceCarrier();
}
Context extractedContext = Telemetry.getOpenTelemetry().getPropagators().getTextMapPropagator()
.extract(Context.current(), traceCarrier, getter);
// What we want is for the Traceid to remain unchanged during propagation.
// ctx.initTracer() will be called only if the Context is valid,
// so that the Traceid generated by SDKTracer is the same as the follower. Otherwise,
// if the Context is invalid and ctx.initTracer() is called,
// SDKTracer will generate a different Traceid.
if (Span.fromContext(extractedContext).getSpanContext().isValid()) {
ctx.initTracer("master trace");
}
ctx.setThreadLocalInfo();
StmtExecutor executor = null;
try {
@ -560,6 +520,7 @@ public abstract class ConnectProcessor {
}
}
}
TUniqueId queryId; // This query id will be set in ctx
if (request.isSetQueryId()) {
queryId = request.getQueryId();
@ -567,17 +528,8 @@ public abstract class ConnectProcessor {
UUID uuid = UUID.randomUUID();
queryId = new TUniqueId(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits());
}
Span masterQuerySpan =
ctx.getTracer().spanBuilder("master execute").setParent(extractedContext)
.setSpanKind(SpanKind.SERVER).startSpan();
try (Scope scope = masterQuerySpan.makeCurrent()) {
executor.execute(queryId);
} catch (Exception e) {
masterQuerySpan.recordException(e);
throw e;
} finally {
masterQuerySpan.end();
}
executor.execute(queryId);
} catch (IOException e) {
// Client failed.
LOG.warn("Process one query failed because IOException: ", e);

View File

@ -28,8 +28,6 @@ import org.apache.doris.common.Reference;
import org.apache.doris.common.Status;
import org.apache.doris.common.UserException;
import org.apache.doris.common.profile.ExecutionProfile;
import org.apache.doris.common.telemetry.ScopedSpan;
import org.apache.doris.common.telemetry.Telemetry;
import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.common.util.ListUtil;
import org.apache.doris.common.util.RuntimeProfile;
@ -120,10 +118,6 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
import com.google.common.collect.Sets;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.SpanKind;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.Scope;
import org.apache.commons.lang3.tuple.ImmutableTriple;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.logging.log4j.LogManager;
@ -780,14 +774,8 @@ public class Coordinator implements CoordInterface {
// 4. send and wait fragments rpc
List<Triple<BackendExecStates, BackendServiceProxy, Future<InternalService.PExecPlanFragmentResult>>>
futures = Lists.newArrayList();
Context parentSpanContext = Context.current();
for (BackendExecStates states : beToExecStates.values()) {
Span span = Telemetry.getNoopSpan();
if (ConnectContext.get() != null) {
span = ConnectContext.get().getTracer().spanBuilder("execRemoteFragmentsAsync")
.setParent(parentSpanContext).setSpanKind(SpanKind.CLIENT).startSpan();
}
states.scopedSpan = new ScopedSpan(span);
states.unsetFields();
BackendServiceProxy proxy = BackendServiceProxy.getInstance();
futures.add(ImmutableTriple.of(states, proxy, states.execRemoteFragmentsAsync(proxy)));
@ -798,12 +786,6 @@ public class Coordinator implements CoordInterface {
// 5. send and wait execution start rpc
futures.clear();
for (BackendExecStates states : beToExecStates.values()) {
Span span = Telemetry.getNoopSpan();
if (ConnectContext.get() != null) {
span = ConnectContext.get().getTracer().spanBuilder("execPlanFragmentStartAsync")
.setParent(parentSpanContext).setSpanKind(SpanKind.CLIENT).startSpan();
}
states.scopedSpan = new ScopedSpan(span);
BackendServiceProxy proxy = BackendServiceProxy.getInstance();
futures.add(ImmutableTriple.of(states, proxy, states.execPlanFragmentStartAsync(proxy)));
}
@ -922,14 +904,8 @@ public class Coordinator implements CoordInterface {
// 4. send and wait fragments rpc
List<Triple<PipelineExecContexts, BackendServiceProxy, Future<InternalService.PExecPlanFragmentResult>>>
futures = Lists.newArrayList();
Context parentSpanContext = Context.current();
for (PipelineExecContexts ctxs : beToPipelineExecCtxs.values()) {
Span span = Telemetry.getNoopSpan();
if (ConnectContext.get() != null) {
span = ConnectContext.get().getTracer().spanBuilder("execRemoteFragmentsAsync")
.setParent(parentSpanContext).setSpanKind(SpanKind.CLIENT).startSpan();
}
for (PipelineExecContexts ctxs : beToPipelineExecCtxs.values()) {
if (LOG.isDebugEnabled()) {
String infos = "";
for (PipelineExecContext pec : ctxs.ctxs) {
@ -939,7 +915,6 @@ public class Coordinator implements CoordInterface {
DebugUtil.printId(queryId), infos, ctxs.beId, ctxs.brpcAddr.toString());
}
ctxs.scopedSpan = new ScopedSpan(span);
ctxs.unsetFields();
BackendServiceProxy proxy = BackendServiceProxy.getInstance();
futures.add(ImmutableTriple.of(ctxs, proxy, ctxs.execRemoteFragmentsAsync(proxy)));
@ -950,12 +925,6 @@ public class Coordinator implements CoordInterface {
// 5. send and wait execution start rpc
futures.clear();
for (PipelineExecContexts ctxs : beToPipelineExecCtxs.values()) {
Span span = Telemetry.getNoopSpan();
if (ConnectContext.get() != null) {
span = ConnectContext.get().getTracer().spanBuilder("execPlanFragmentStartAsync")
.setParent(parentSpanContext).setSpanKind(SpanKind.CLIENT).startSpan();
}
ctxs.scopedSpan = new ScopedSpan(span);
BackendServiceProxy proxy = BackendServiceProxy.getInstance();
futures.add(ImmutableTriple.of(ctxs, proxy, ctxs.execPlanFragmentStartAsync(proxy)));
}
@ -981,7 +950,7 @@ public class Coordinator implements CoordInterface {
TStatusCode code;
String errMsg = null;
Exception exception = null;
Span span = triple.getLeft().scopedSpan.getSpan();
try {
PExecPlanFragmentResult result = triple.getRight().get(timeoutMs, TimeUnit.MILLISECONDS);
code = TStatusCode.findByValue(result.getStatus().getStatusCode());
@ -1005,32 +974,25 @@ public class Coordinator implements CoordInterface {
code = TStatusCode.TIMEOUT;
}
try {
if (code != TStatusCode.OK) {
if (exception != null && errMsg == null) {
errMsg = operation + " failed. " + exception.getMessage();
}
queryStatus.setStatus(errMsg);
cancelInternal(Types.PPlanFragmentCancelReason.INTERNAL_ERROR);
switch (code) {
case TIMEOUT:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
case THRIFT_RPC_ERROR:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
SimpleScheduler.addToBlacklist(triple.getLeft().beId, errMsg);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
default:
throw new UserException(errMsg, exception);
}
if (code != TStatusCode.OK) {
if (exception != null && errMsg == null) {
errMsg = operation + " failed. " + exception.getMessage();
}
queryStatus.setStatus(errMsg);
cancelInternal(Types.PPlanFragmentCancelReason.INTERNAL_ERROR);
switch (code) {
case TIMEOUT:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
case THRIFT_RPC_ERROR:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
SimpleScheduler.addToBlacklist(triple.getLeft().beId, errMsg);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
default:
throw new UserException(errMsg, exception);
}
} catch (Exception e) {
span.recordException(e);
throw e;
} finally {
triple.getLeft().scopedSpan.endSpan();
}
}
}
@ -1048,7 +1010,7 @@ public class Coordinator implements CoordInterface {
TStatusCode code;
String errMsg = null;
Exception exception = null;
Span span = triple.getLeft().scopedSpan.getSpan();
try {
PExecPlanFragmentResult result = triple.getRight().get(timeoutMs, TimeUnit.MILLISECONDS);
code = TStatusCode.findByValue(result.getStatus().getStatusCode());
@ -1072,32 +1034,25 @@ public class Coordinator implements CoordInterface {
code = TStatusCode.TIMEOUT;
}
try {
if (code != TStatusCode.OK) {
if (exception != null && errMsg == null) {
errMsg = operation + " failed. " + exception.getMessage();
}
queryStatus.setStatus(errMsg);
cancelInternal(Types.PPlanFragmentCancelReason.INTERNAL_ERROR);
switch (code) {
case TIMEOUT:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
case THRIFT_RPC_ERROR:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
SimpleScheduler.addToBlacklist(triple.getLeft().beId, errMsg);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
default:
throw new UserException(errMsg, exception);
}
if (code != TStatusCode.OK) {
if (exception != null && errMsg == null) {
errMsg = operation + " failed. " + exception.getMessage();
}
queryStatus.setStatus(errMsg);
cancelInternal(Types.PPlanFragmentCancelReason.INTERNAL_ERROR);
switch (code) {
case TIMEOUT:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
case THRIFT_RPC_ERROR:
MetricRepo.BE_COUNTER_QUERY_RPC_FAILED.getOrAdd(triple.getLeft().brpcAddr.hostname)
.increase(1L);
SimpleScheduler.addToBlacklist(triple.getLeft().beId, errMsg);
throw new RpcException(triple.getLeft().brpcAddr.hostname, errMsg, exception);
default:
throw new UserException(errMsg, exception);
}
} catch (Exception e) {
span.recordException(e);
throw e;
} finally {
triple.getLeft().scopedSpan.endSpan();
}
}
}
@ -2490,7 +2445,7 @@ public class Coordinator implements CoordInterface {
// (UpdateStatus() initiates cancellation, if it hasn't already been initiated)
if (!(returnedAllResults && status.isCancelled()) && !status.ok()) {
LOG.warn("one instance report fail, query_id={} fragment_id={} instance_id={}, be={},"
+ " error message: {}",
+ " error message: {}",
DebugUtil.printId(queryId), params.getFragmentId(),
DebugUtil.printId(params.getFragmentInstanceId()),
params.getBackendId(), status.getErrorMsg());
@ -2516,11 +2471,11 @@ public class Coordinator implements CoordInterface {
updateErrorTabletInfos(params.getErrorTabletInfos());
}
LOG.debug("Query {} instance {} is marked done",
DebugUtil.printId(queryId), DebugUtil.printId(params.getFragmentInstanceId()));
DebugUtil.printId(queryId), DebugUtil.printId(params.getFragmentInstanceId()));
executionProfile.markOneInstanceDone(params.getFragmentInstanceId());
} else {
LOG.debug("Query {} instance {} is not marked done",
DebugUtil.printId(queryId), DebugUtil.printId(params.getFragmentInstanceId()));
DebugUtil.printId(queryId), DebugUtil.printId(params.getFragmentInstanceId()));
}
} else {
if (params.backend_num >= backendExecStates.size()) {
@ -2998,20 +2953,13 @@ public class Coordinator implements CoordInterface {
return false;
}
Span span = ConnectContext.get() != null
? ConnectContext.get().getTracer().spanBuilder("cancelPlanFragmentAsync")
.setParent(Context.current()).setSpanKind(SpanKind.CLIENT).startSpan()
: Telemetry.getNoopSpan();
try (Scope scope = span.makeCurrent()) {
try {
BackendServiceProxy.getInstance().cancelPlanFragmentAsync(brpcAddress,
fragmentInstanceId(), cancelReason);
} catch (RpcException e) {
span.recordException(e);
LOG.warn("cancel plan fragment get a exception, address={}:{}", brpcAddress.getHostname(),
brpcAddress.getPort());
SimpleScheduler.addToBlacklist(addressToBackendID.get(brpcAddress), e.getMessage());
} finally {
span.end();
}
this.hasCanceled = true;
@ -3194,20 +3142,13 @@ public class Coordinator implements CoordInterface {
this.hasCanceled = true;
try {
Span span = ConnectContext.get() != null
? ConnectContext.get().getTracer().spanBuilder("cancelPlanFragmentAsync")
.setParent(Context.current()).setSpanKind(SpanKind.CLIENT).startSpan()
: Telemetry.getNoopSpan();
try (Scope scope = span.makeCurrent()) {
try {
BackendServiceProxy.getInstance().cancelPlanFragmentAsync(brpcAddress,
localParam.fragment_instance_id, cancelReason);
} catch (RpcException e) {
span.recordException(e);
LOG.warn("cancel plan fragment get a exception, address={}:{}", brpcAddress.getHostname(),
brpcAddress.getPort());
SimpleScheduler.addToBlacklist(addressToBackendID.get(brpcAddress), e.getMessage());
} finally {
span.end();
}
} catch (Exception e) {
LOG.warn("catch a exception", e);
@ -3258,7 +3199,6 @@ public class Coordinator implements CoordInterface {
TNetworkAddress brpcAddr;
List<BackendExecState> states = Lists.newArrayList();
boolean twoPhaseExecution = false;
ScopedSpan scopedSpan = new ScopedSpan();
long beProcessEpoch = 0;
public BackendExecStates(long beId, TNetworkAddress brpcAddr, boolean twoPhaseExecution, long beProcessEpoch) {
@ -3357,7 +3297,6 @@ public class Coordinator implements CoordInterface {
TNetworkAddress brpcAddr;
List<PipelineExecContext> ctxs = Lists.newArrayList();
boolean twoPhaseExecution = false;
ScopedSpan scopedSpan = new ScopedSpan();
int instanceNumber;
public PipelineExecContexts(long beId, TNetworkAddress brpcAddr, boolean twoPhaseExecution,

View File

@ -21,7 +21,6 @@ import org.apache.doris.analysis.RedirectStatus;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.ClientPool;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.telemetry.Telemetry;
import org.apache.doris.thrift.FrontendService;
import org.apache.doris.thrift.TMasterOpRequest;
import org.apache.doris.thrift.TMasterOpResult;
@ -29,16 +28,12 @@ import org.apache.doris.thrift.TNetworkAddress;
import org.apache.doris.thrift.TUniqueId;
import com.google.common.collect.ImmutableMap;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.Scope;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.thrift.TException;
import org.apache.thrift.transport.TTransportException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
public class MasterOpExecutor {
@ -77,17 +72,7 @@ public class MasterOpExecutor {
}
public void execute() throws Exception {
Span forwardSpan =
ctx.getTracer().spanBuilder("forward").setParent(Context.current())
.startSpan();
try (Scope ignored = forwardSpan.makeCurrent()) {
result = forward(buildStmtForwardParams());
} catch (Exception e) {
forwardSpan.recordException(e);
throw e;
} finally {
forwardSpan.end();
}
result = forward(buildStmtForwardParams());
waitOnReplaying();
}
@ -179,14 +164,6 @@ public class MasterOpExecutor {
// session variables
params.setSessionVariables(ctx.getSessionVariable().getForwardVariables());
// create a trace carrier
Map<String, String> traceCarrier = new HashMap<>();
// Inject the request with the current context
Telemetry.getOpenTelemetry().getPropagators().getTextMapPropagator()
.inject(Context.current(), traceCarrier, (carrier, key, value) -> carrier.put(key, value));
// carrier send tracing to master
params.setTraceCarrier(traceCarrier);
if (null != ctx.queryId()) {
params.setQueryId(ctx.queryId());
}

View File

@ -27,8 +27,6 @@ import org.apache.doris.mysql.MysqlChannel;
import org.apache.doris.mysql.MysqlCommand;
import org.apache.doris.mysql.MysqlProto;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Scope;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -186,16 +184,7 @@ public class MysqlConnectProcessor extends ConnectProcessor {
case COM_QUERY:
case COM_STMT_PREPARE:
// Process COM_QUERY statement,
ctx.initTracer("trace");
Span rootSpan = ctx.getTracer().spanBuilder("handleQuery").setNoParent().startSpan();
try (Scope scope = rootSpan.makeCurrent()) {
handleQuery(command);
} catch (Exception e) {
rootSpan.recordException(e);
throw e;
} finally {
rootSpan.end();
}
handleQuery(command);
break;
case COM_STMT_EXECUTE:
handleExecute();

View File

@ -178,9 +178,6 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.Scope;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.thrift.TException;
@ -338,16 +335,6 @@ public class StmtExecutor {
return builder.build();
}
public void addProfileToSpan() {
Span span = Span.fromContext(Context.current());
if (!span.isRecording()) {
return;
}
for (Map.Entry<String, String> entry : getSummaryInfo(true).entrySet()) {
span.setAttribute(entry.getKey(), entry.getValue());
}
}
public Planner planner() {
return planner;
}
@ -448,11 +435,11 @@ public class StmtExecutor {
public void execute(TUniqueId queryId) throws Exception {
SessionVariable sessionVariable = context.getSessionVariable();
Span executeSpan = context.getTracer().spanBuilder("execute").setParent(Context.current()).startSpan();
if (context.getConnectType() == ConnectType.ARROW_FLIGHT_SQL) {
context.setReturnResultFromLocal(true);
}
try (Scope scope = executeSpan.makeCurrent()) {
try {
if (parsedStmt instanceof LogicalPlanAdapter
|| (parsedStmt == null && sessionVariable.isEnableNereidsPlanner())) {
try {
@ -479,7 +466,6 @@ public class StmtExecutor {
executeByLegacy(queryId);
}
} finally {
executeSpan.end();
// revert Session Value
try {
VariableMgr.revertSessionValue(sessionVariable);
@ -712,17 +698,9 @@ public class StmtExecutor {
analyzeVariablesInStmt();
if (!context.isTxnModel()) {
Span queryAnalysisSpan =
context.getTracer().spanBuilder("query analysis").setParent(Context.current()).startSpan();
try (Scope ignored = queryAnalysisSpan.makeCurrent()) {
// analyze this query
analyze(context.getSessionVariable().toThrift());
} catch (Exception e) {
queryAnalysisSpan.recordException(e);
throw e;
} finally {
queryAnalysisSpan.end();
}
// analyze this query
analyze(context.getSessionVariable().toThrift());
if (isForwardToMaster()) {
if (context.getCommand() == MysqlCommand.COM_STMT_PREPARE) {
throw new UserException("Forward master command is not supported for prepare statement");
@ -1474,16 +1452,9 @@ public class StmtExecutor {
profile.addExecutionProfile(coord.getExecutionProfile());
coordBase = coord;
}
Span queryScheduleSpan =
context.getTracer().spanBuilder("query schedule").setParent(Context.current()).startSpan();
try (Scope scope = queryScheduleSpan.makeCurrent()) {
coordBase.exec();
} catch (Exception e) {
queryScheduleSpan.recordException(e);
throw e;
} finally {
queryScheduleSpan.end();
}
coordBase.exec();
profile.getSummaryProfile().setQueryScheduleFinishTime();
updateProfile(false);
if (coordBase.getInstanceTotalNum() > 1 && LOG.isDebugEnabled()) {
@ -1513,8 +1484,7 @@ public class StmtExecutor {
return;
}
Span fetchResultSpan = context.getTracer().spanBuilder("fetch result").setParent(Context.current()).startSpan();
try (Scope scope = fetchResultSpan.makeCurrent()) {
try {
while (true) {
// register the fetch result time.
profile.getSummaryProfile().setTempStartTime();
@ -1587,10 +1557,8 @@ public class StmtExecutor {
// details see issue https://github.com/apache/doris/issues/16203
LOG.warn("cancel fragment query_id:{} cause {}", DebugUtil.printId(context.queryId()), e.getMessage());
coordBase.cancel(Types.PPlanFragmentCancelReason.INTERNAL_ERROR);
fetchResultSpan.recordException(e);
throw e;
} finally {
fetchResultSpan.end();
if (coordBase.getInstanceTotalNum() > 1 && LOG.isDebugEnabled()) {
try {
LOG.debug("Finish to execute fragment. user: {}, db: {}, sql: {}, fragment instance num: {}",
@ -2712,19 +2680,13 @@ public class StmtExecutor {
throw new RuntimeException("Failed to execute internal SQL. " + Util.getRootCauseMessage(e), e);
}
Span queryScheduleSpan = context.getTracer()
.spanBuilder("internal SQL schedule").setParent(Context.current()).startSpan();
try (Scope scope = queryScheduleSpan.makeCurrent()) {
try {
coord.exec();
} catch (Exception e) {
queryScheduleSpan.recordException(e);
throw new InternalQueryExecutionException(e.getMessage() + Util.getRootCauseMessage(e), e);
} finally {
queryScheduleSpan.end();
}
Span fetchResultSpan = context.getTracer().spanBuilder("fetch internal SQL result")
.setParent(Context.current()).startSpan();
try (Scope scope = fetchResultSpan.makeCurrent()) {
try {
while (true) {
batch = coord.getNext();
if (batch == null || batch.isEos()) {
@ -2734,10 +2696,7 @@ public class StmtExecutor {
}
}
} catch (Exception e) {
fetchResultSpan.recordException(e);
throw new RuntimeException("Failed to fetch internal SQL result. " + Util.getRootCauseMessage(e), e);
} finally {
fetchResultSpan.end();
}
} finally {
AuditLogHelper.logAuditLog(context, originStmt.toString(), parsedStmt, getQueryStatisticsForAuditLog(),

View File

@ -63,10 +63,6 @@ public class VariableVarCallbacks {
if (innerParts.length != 2) {
continue;
}
if (innerParts[0].equals("trace_id")) {
ConnectContext.get().setTraceId(innerParts[1]);
break;
}
}
}
}

View File

@ -18,22 +18,13 @@
package org.apache.doris.rpc;
import org.apache.doris.common.Config;
import org.apache.doris.common.telemetry.Telemetry;
import org.apache.doris.proto.InternalService;
import org.apache.doris.proto.PBackendServiceGrpc;
import org.apache.doris.thrift.TNetworkAddress;
import io.grpc.CallOptions;
import io.grpc.Channel;
import io.grpc.ClientCall;
import io.grpc.ClientInterceptor;
import io.grpc.ConnectivityState;
import io.grpc.ForwardingClientCall;
import io.grpc.ManagedChannel;
import io.grpc.Metadata;
import io.grpc.MethodDescriptor;
import io.grpc.netty.NettyChannelBuilder;
import io.opentelemetry.context.Context;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -58,7 +49,7 @@ public class BackendServiceClient {
.flowControlWindow(Config.grpc_max_message_size_bytes)
.keepAliveWithoutCalls(true)
.maxInboundMessageSize(Config.grpc_max_message_size_bytes).enableRetry().maxRetryAttempts(MAX_RETRY_NUM)
.intercept(new OpenTelemetryClientInterceptor()).usePlaintext().build();
.usePlaintext().build();
stub = PBackendServiceGrpc.newFutureStub(channel);
blockingStub = PBackendServiceGrpc.newBlockingStub(channel);
// execPlanTimeout should be greater than future.get timeout, otherwise future will throw ExecutionException
@ -69,8 +60,8 @@ public class BackendServiceClient {
public boolean isNormalState() {
ConnectivityState state = channel.getState(false);
return state == ConnectivityState.CONNECTING
|| state == ConnectivityState.IDLE
|| state == ConnectivityState.READY;
|| state == ConnectivityState.IDLE
|| state == ConnectivityState.READY;
}
public Future<InternalService.PExecPlanFragmentResult> execPlanFragmentAsync(
@ -195,25 +186,4 @@ public class BackendServiceClient {
LOG.warn("shut down backend service client: {}", address);
}
/**
* OpenTelemetry span interceptor.
*/
public static class OpenTelemetryClientInterceptor implements ClientInterceptor {
@Override
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(MethodDescriptor<ReqT, RespT> methodDescriptor,
CallOptions callOptions, Channel channel) {
return new ForwardingClientCall.SimpleForwardingClientCall<ReqT, RespT>(
channel.newCall(methodDescriptor, callOptions)) {
@Override
public void start(Listener<RespT> responseListener, Metadata headers) {
// Inject the request with the current context
Telemetry.getOpenTelemetry().getPropagators().getTextMapPropagator()
.inject(Context.current(), headers, (carrier, key, value) -> carrier.put(
Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER), value));
super.start(responseListener, headers);
}
};
}
}
}

View File

@ -34,8 +34,6 @@ import org.apache.doris.thrift.TNetworkAddress;
import org.apache.doris.thrift.TStatusCode;
import org.apache.doris.thrift.TUniqueId;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Scope;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.VectorSchemaRoot;
@ -86,16 +84,7 @@ public class FlightSqlConnectProcessor extends ConnectProcessor implements AutoC
prepare(command);
ctx.setRunningQuery(query);
ctx.initTracer("trace");
Span rootSpan = ctx.getTracer().spanBuilder("handleQuery").setNoParent().startSpan();
try (Scope scope = rootSpan.makeCurrent()) {
handleQuery(command, query);
} catch (Exception e) {
rootSpan.recordException(e);
throw e;
} finally {
rootSpan.end();
}
handleQuery(command, query);
}
// TODO