[chore](error status) print error stack when rpc error (#14473)

Currently, BE will print fail to get master client from cache. host=xxxxx, port=9228, code=THRIFT_RPC_ERROR but we did not know which step generate this error. So that I refactor error status in be and add error stack for RPC_ERROR.

W1122 10:19:21.130796 30405 utils.cpp:89] fail to get master client from cache. host=xxxx, port=9228, code=RPC error(error -1): Couldn't open transport for xxxx:9228 (open() timed out)/n @ 0x559af8f774ea doris::Status::ConstructErrorStatus()
@ 0x559af9aacbee _ZN5doris16ThriftClientImpl4openEv.cold
@ 0x559af97f563a doris::ClientCacheHelper::_create_client()
@ 0x559af97f78cd doris::ClientCacheHelper::get_client()
@ 0x559af934f38b doris::MasterServerClient::report()
@ 0x559af932e7a7 doris::TaskWorkerPool::_handle_report()
@ 0x559af932f07c doris::TaskWorkerPool::_report_task_worker_thread_callback()
@ 0x559af9b223c5 doris::ThreadPool::dispatch_thread()
@ 0x559af9b187af doris::Thread::supervise_thread()
@ 0x7f661bd8bea5 start_thread
@ 0x7f661c09eb0d __clone

Co-authored-by: yiguolei <yiguolei@gmail.com>
This commit is contained in:
yiguolei
2022-11-22 14:29:28 +08:00
committed by GitHub
parent 6e3716e0ea
commit f72c63e4bb
3 changed files with 41 additions and 12 deletions

View File

@ -89,7 +89,7 @@ Status MasterServerClient::report(const TReportRequest& request, TMasterResult*
LOG(WARNING) << "fail to get master client from cache. "
<< "host=" << _master_info.network_address.hostname
<< ", port=" << _master_info.network_address.port
<< ", code=" << client_status.code();
<< ", code=" << client_status;
return Status::InternalError("Fail to get master client from cache");
}

View File

@ -63,7 +63,15 @@ Status::Status(const PStatus& s) {
}
}
Status Status::ConstructErrorStatus(int16_t precise_code) {
// A wrapper for ErrorCode
// Precise code is for ErrorCode's enum value
// All Status Error is treated as Internal Error
Status Status::OLAPInternalError(int16_t precise_code, std::string_view msg) {
return ConstructErrorStatus(TStatusCode::INTERNAL_ERROR, precise_code, msg);
}
Status Status::ConstructErrorStatus(TStatusCode::type tcode, int16_t precise_code,
std::string_view msg) {
// This will print all error status's stack, it maybe too many, but it is just used for debug
#ifdef PRINT_ALL_ERR_STATUS_STACKTRACE
LOG(WARNING) << "Error occurred, error code = " << precise_code << ", with message: " << msg
@ -72,9 +80,17 @@ Status Status::ConstructErrorStatus(int16_t precise_code) {
if (error_states[abs(precise_code)].stacktrace) {
// Add stacktrace as part of message, could use LOG(WARN) << "" << status will print both
// the error message and the stacktrace
return Status(TStatusCode::INTERNAL_ERROR, get_stack_trace(), precise_code);
if (msg.empty()) {
return Status(tcode, get_stack_trace(), precise_code);
} else {
return Status(tcode, std::string(msg) + "/n" + get_stack_trace(), precise_code);
}
} else {
return Status(TStatusCode::INTERNAL_ERROR, std::string_view(), precise_code);
if (msg.empty()) {
return Status(tcode, std::string_view(), precise_code);
} else {
return Status(tcode, msg, precise_code);
}
}
}

View File

@ -264,14 +264,14 @@ public:
Status(const PStatus& pstatus);
// Not allow user create status using constructors, could only use util methods
private:
Status(TStatusCode::type code, std::string_view msg, int16_t precise_code = 1)
: _code(code), _precise_code(precise_code), _err_msg(msg) {}
Status(TStatusCode::type code, std::string&& msg, int16_t precise_code = 1)
: _code(code), _precise_code(precise_code), _err_msg(std::move(msg)) {}
static Status OK() { return Status(); }
template <typename... Args>
static Status ErrorFmt(TStatusCode::type code, std::string_view fmt, Args&&... args) {
// In some cases, fmt contains '{}' but there are no args.
@ -282,6 +282,20 @@ public:
}
}
template <typename... Args>
static Status ErrorFmtWithStackTrace(TStatusCode::type code, std::string_view fmt,
Args&&... args) {
// In some cases, fmt contains '{}' but there are no args.
if constexpr (sizeof...(args) == 0) {
return ConstructErrorStatus(code, -1, fmt);
} else {
return ConstructErrorStatus(code, -1, fmt::format(fmt, std::forward<Args>(args)...));
}
}
public:
static Status OK() { return Status(); }
template <typename... Args>
static Status PublishTimeout(std::string_view fmt, Args&&... args) {
return ErrorFmt(TStatusCode::PUBLISH_TIMEOUT, fmt, std::forward<Args>(args)...);
@ -355,7 +369,8 @@ public:
template <typename... Args>
static Status RpcError(std::string_view fmt, Args&&... args) {
return ErrorFmt(TStatusCode::THRIFT_RPC_ERROR, fmt, std::forward<Args>(args)...);
return ErrorFmtWithStackTrace(TStatusCode::THRIFT_RPC_ERROR, fmt,
std::forward<Args>(args)...);
}
template <typename... Args>
@ -391,17 +406,15 @@ public:
// A wrapper for ErrorCode
// Precise code is for ErrorCode's enum value
// All Status Error is treated as Internal Error
static Status OLAPInternalError(int16_t precise_code) {
return ConstructErrorStatus(precise_code);
}
static Status OLAPInternalError(int16_t precise_code, std::string_view msg = "");
static Status ConstructErrorStatus(int16_t precise_code);
static Status ConstructErrorStatus(TStatusCode::type tcode, int16_t precise_code,
std::string_view msg);
bool ok() const { return _code == TStatusCode::OK; }
bool is_cancelled() const { return code() == TStatusCode::CANCELLED; }
bool is_mem_limit_exceeded() const { return code() == TStatusCode::MEM_LIMIT_EXCEEDED; }
bool is_rpc_error() const { return code() == TStatusCode::THRIFT_RPC_ERROR; }
bool is_end_of_file() const { return code() == TStatusCode::END_OF_FILE; }
bool is_not_found() const { return code() == TStatusCode::NOT_FOUND; }
bool is_already_exist() const { return code() == TStatusCode::ALREADY_EXIST; }