[feature](tracing) Support query tracing to improve doris observability by introducing OpenTelemetry. (#10533)

The collection of query traces is implemented in fe and be, and the spans are exported to zipkin.
DSIP: https://cwiki.apache.org/confluence/display/DORIS/DSIP-012%3A+Introduce+opentelemetry
This commit is contained in:
luozenglin
2022-07-09 15:50:40 +08:00
committed by GitHub
parent 1112dba525
commit d5ea677282
61 changed files with 1119 additions and 110 deletions

View File

@ -836,6 +836,7 @@ Status HashJoinNode::close(RuntimeState* state) {
return Status::OK();
}
START_AND_SCOPE_SPAN(state->get_tracer(), span, "ashJoinNode::close");
VExpr::close(_build_expr_ctxs, state);
VExpr::close(_probe_expr_ctxs, state);
if (_vother_join_conjunct_ptr) {
@ -852,6 +853,7 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eo
}
Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eos) {
INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span, "HashJoinNode::get_next");
SCOPED_TIMER(_runtime_profile->total_time_counter());
SCOPED_TIMER(_probe_timer);
@ -872,7 +874,8 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eo
do {
SCOPED_TIMER(_probe_next_timer);
RETURN_IF_ERROR(child(0)->get_next(state, &_probe_block, &_probe_eos));
RETURN_IF_ERROR_AND_CHECK_SPAN(child(0)->get_next(state, &_probe_block, &_probe_eos),
child(0)->get_next_span(), _probe_eos);
} while (_probe_block.rows() == 0 && !_probe_eos);
probe_rows = _probe_block.rows();
@ -983,6 +986,7 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eo
}
Status HashJoinNode::open(RuntimeState* state) {
START_AND_SCOPE_SPAN(state->get_tracer(), span, "HashJoinNode::open");
SCOPED_TIMER(_runtime_profile->total_time_counter());
SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker());
RETURN_IF_ERROR(ExecNode::open(state));
@ -995,8 +999,11 @@ Status HashJoinNode::open(RuntimeState* state) {
}
std::promise<Status> thread_status;
std::thread(bind(&HashJoinNode::_hash_table_build_thread, this, state, &thread_status))
.detach();
std::thread([this, state, thread_status_p = &thread_status,
parent_span = opentelemetry::trace::Tracer::GetCurrentSpan()] {
OpentelemetryScope scope {parent_span};
this->_hash_table_build_thread(state, thread_status_p);
}).detach();
// Open the probe-side child so that it may perform any initialisation in parallel.
// Don't exit even if we see an error, we still need to wait for the build thread
@ -1010,6 +1017,7 @@ Status HashJoinNode::open(RuntimeState* state) {
}
void HashJoinNode::_hash_table_build_thread(RuntimeState* state, std::promise<Status>* status) {
START_AND_SCOPE_SPAN(state->get_tracer(), span, "HashJoinNode::_hash_table_build_thread");
SCOPED_ATTACH_TASK_THREAD(state, mem_tracker());
status->set_value(_hash_table_build(state));
}
@ -1032,7 +1040,8 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {
block.clear_column_data();
RETURN_IF_CANCELLED(state);
RETURN_IF_ERROR(child(1)->get_next(state, &block, &eos));
RETURN_IF_ERROR_AND_CHECK_SPAN(child(1)->get_next(state, &block, &eos),
child(1)->get_next_span(), eos);
_hash_table_mem_tracker->consume(block.allocated_bytes());
_mem_used += block.allocated_bytes();