Files
oceanbase/deps/oblib/src/lib/signal/ob_signal_handlers.cpp
2024-02-04 09:12:28 +00:00

292 lines
9.6 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX COMMON
#define _GNU_SOURCE 1
#include "lib/signal/ob_signal_handlers.h"
#include <sys/prctl.h>
#include <dirent.h>
#include <unistd.h>
#include <fstream>
#include <sys/wait.h>
#include "lib/profile/ob_trace_id.h"
#include "lib/utility/utility.h"
#include "lib/signal/ob_libunwind.h"
#include "lib/signal/ob_signal_struct.h"
#include "lib/signal/ob_signal_utils.h"
#include "lib/signal/ob_signal_worker.h"
#include "lib/utility/ob_hang_fatal_error.h"
#include "common/ob_common_utility.h"
namespace oceanbase
{
namespace common
{
int64_t FASTSTACK_MIN_INTERVAL = 30 * 60 * 1000 * 1000; // 30min
static const int SIG_SET[] = {SIGABRT, SIGBUS, SIGFPE, SIGSEGV, SIGURG};
static constexpr char MINICORE_SHELL_PATH[] = "tools/minicore.sh";
static constexpr char FASTSTACK_SHELL_PATH[] = "tools/callstack.sh";
static constexpr char MINICORE_SCRIPT[] = "if [ -e bin/minicore.py ]; then\n"
" python bin/minicore.py `cat $(pwd)/run/observer.pid` -c -o core.`cat $(pwd)/run/observer.pid`.mini\n"
"fi\n"
"[ $(ls -1 core.*.mini 2>/dev/null | wc -l) -gt 5 ] && ls -1 core.*.mini -t | tail -n 1 | xargs rm -f";
static constexpr char FASTSTACK_SCRIPT[] = "if [ -x \"$(command -v obstack)\" ]; then\n"
" obstack `cat $(pwd)/run/observer.pid` > stack.`cat $(pwd)/run/observer.pid`.`date +%Y%m%d%H%M%S`\n"
"fi\n"
"[ $(ls -1 stack.* 2>/dev/null | wc -l) -gt 100 ] && ls -1 stack.* -t | tail -n 1 | xargs rm -f";
static inline void handler(int sig, siginfo_t *s, void *p)
{
if (get_signal_handler() != nullptr) {
get_signal_handler()(sig, s, p);
}
}
int install_ob_signal_handler()
{
int ret = OB_SUCCESS;
struct sigaction sa;
sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER | SA_ONSTACK;
sa.sa_sigaction = handler;
sigemptyset(&sa.sa_mask);
for (int i = 0; OB_SUCC(ret) && i < ARRAYSIZEOF(SIG_SET); i++) {
if (-1 == sigaction(SIG_SET[i], &sa, nullptr)) {
ret = OB_INIT_FAIL;
}
}
return ret;
}
signal_handler_t &get_signal_handler()
{
struct Wrapper {
Wrapper() : v_(ob_signal_handler) {}
signal_handler_t v_;
};
RLOCAL(Wrapper, tl_handler);
return (&tl_handler)->v_;
}
bool g_redirect_handler = false;
static __thread int g_coredump_num = 0;
#define COMMON_FMT "timestamp=%ld, tid=%ld, tname=%s, trace_id="TRACE_ID_FORMAT_V2", extra_info=(%s), lbt=%s"
void coredump_cb(int, int, void*, void*);
void ob_signal_handler(int sig, siginfo_t *si, void *context)
{
if (!g_redirect_handler) {
signal(sig, SIG_DFL);
raise(sig);
} else {
if (MP_SIG == sig) {
auto &ctx = g_sig_handler_ctx_;
ctx.lock();
DEFER(ctx.unlock());
int64_t req_id = (int64_t)si->si_value.sival_ptr;
if (ctx.req_id_ != req_id) return;
ctx.handler_->handle(ctx);
} else {
coredump_cb(sig, si->si_code, si->si_addr, context);
}
}
}
void hook_sigsegv_msg(int sig, siginfo_t *si, void *context)
{
if (mprotect_page(si->si_addr, 8, PROT_READ | PROT_WRITE, "release signal addr") != 0) {
coredump_cb(sig, si->si_code, si->si_addr, context);
} else {
// thread_name
char tname[16];
prctl(PR_GET_NAME, tname);
_OB_LOG_RET(ERROR, OB_ERROR, "CRASH ERROR!!! sig=%d, sig_code=%d, sig_addr=%p, tid=%ld, tname=%s",
sig, si->si_code, si->si_addr, GETTID(), tname);
}
}
void close_socket_fd()
{
char path[32];
char name[32];
char real_name[32];
DIR *dir = nullptr;
struct dirent *fd_file = nullptr;
int fd = -1;
int pid = getpid();
lnprintf(path, 32, "/proc/%d/fd/", pid);
if (NULL == (dir = opendir(path))) {
} else {
while(NULL != (fd_file = readdir(dir))) {
if (0 != strcmp(fd_file->d_name, ".") && 0 != strcmp(fd_file->d_name, "..")
&& 0 != strcmp(fd_file->d_name, "0") && 0 != strcmp(fd_file->d_name, "1")
&& 0 != strcmp(fd_file->d_name, "2")) {
lnprintf(name, 32, "/proc/%d/fd/%s", pid, fd_file->d_name);
if (-1 == readlink(name, real_name, 32)) {
DLOG(INFO, "[CLOSEFD], err read link %s, errno = %d", name, errno);
} else {
lnprintf(name, 32, "%s", real_name);
if (NULL != strstr(name, "socket")) {
fd = atoi(fd_file->d_name);
close(fd);
}
}
}
}
DLOG(INFO, "[CLOSEFD], close socket fd finish");
}
if (NULL != dir) {
closedir(dir);
}
}
void coredump_cb(volatile int sig, volatile int sig_code, void* volatile sig_addr, void *context)
{
int ret = OB_SUCCESS;
if (g_coredump_num++ < 1) {
pid_t pid;
close_socket_fd();
ret = minicoredump(sig, GETTID(), pid);
//send_request_and_wait(VERB_LEVEL_2,
// syscall(SYS_gettid)/*exclude_id*/);
// parent or fork failed
timespec time = {0, 0};
clock_gettime(CLOCK_REALTIME, &time);
int64_t ts = time.tv_sec * 1000000 + time.tv_nsec / 1000;
// thread_name
char tname[16];
prctl(PR_GET_NAME, tname);
// backtrace
char bt[512];
int64_t len = 0;
#ifdef __x86_64__
safe_backtrace(bt, sizeof(bt) - 1, &len);
#endif
bt[len++] = '\0';
// extra
const ObFatalErrExtraInfoGuard *extra_info = nullptr; // TODO: May deadlock, ObFatalErrExtraInfoGuard::get_thd_local_val_ptr();
uint64_t uval[4] = {0};
auto *trace_id = ObCurTraceId::get_trace_id();
if (trace_id != nullptr) {
trace_id->get_uval(uval);
}
char print_buf[1024];
const ucontext_t *con = (ucontext_t *)context;
#if defined(__x86_64__)
int64_t ip = con->uc_mcontext.gregs[REG_RIP];
int64_t bp = con->uc_mcontext.gregs[REG_RBP]; // stack base
#else
// TODO: ARM
int64_t ip = -1;
int64_t bp = -1;
#endif
char rlimit_core[32] = "unlimited";
if (UINT64_MAX != g_rlimit_core) {
lnprintf(rlimit_core, sizeof(rlimit_core), "%lu", g_rlimit_core);
}
char crash_info[128] = "CRASH ERROR!!!";
int64_t fatal_error_thread_id = get_fatal_error_thread_id();
if (-1 != fatal_error_thread_id) {
lnprintf(crash_info, sizeof(crash_info),
"Right to Die or Duty to Live's Thread Existed before CRASH ERROR!!!"
"ThreadId=%ld,", fatal_error_thread_id);
}
ssize_t print_len = lnprintf(print_buf, sizeof(print_buf),
"%s IP=%lx, RBP=%lx, sig=%d, sig_code=%d, sig_addr=%p, RLIMIT_CORE=%s, "COMMON_FMT", ",
crash_info, ip, bp, sig, sig_code, sig_addr, rlimit_core,
ts, GETTID(), tname, TRACE_ID_FORMAT_PARAM(uval),
(NULL == extra_info) ? NULL : to_cstring(*extra_info), bt);
const auto &si_guard = ObSqlInfoGuard::get_cur_guard();
char sql[] = "SQL=";
char end[] = "\n";
struct iovec iov[4];
memset(iov, 0, sizeof(iov));
iov[0].iov_base = print_buf;
iov[0].iov_len = print_len;
iov[1].iov_base = sql;
iov[1].iov_len = strlen(sql);
iov[2].iov_base = NULL != si_guard ? si_guard->sql_.ptr() : NULL;
iov[2].iov_len = NULL != si_guard ? si_guard->sql_.length() : 0;
iov[3].iov_base = end;
iov[3].iov_len = strlen(end);
writev(STDERR_FILENO, iov, sizeof(iov) / sizeof(iov[0]));
if (OB_SUCC(ret)) {
int status = 0;
waitpid(pid, &status, __WALL);
}
}
// Reset back to the default handler
signal(sig, SIG_DFL);
raise(sig);
}
int minicoredump(int sig, int64_t tid, pid_t& pid)
{
static constexpr int64_t MIN_INTERVAL = 5 * 60 * 1000 * 1000; // 5min
static int64_t last_ts = 0;
int64_t now = ObTimeUtility::fast_current_time();
int64_t last = ATOMIC_LOAD(&last_ts);
int ret = OB_SUCCESS;
UNUSED(sig);
UNUSED(tid);
if (now - last < MIN_INTERVAL) {
ret = OB_EAGAIN;
} else if (!ATOMIC_BCAS(&last_ts, last, now)) {
ret = OB_EAGAIN;
} else if (-1 == access("bin/minicore.py", R_OK)) {
ret = OB_FILE_NOT_EXIST;
} else if (-1 == access(MINICORE_SHELL_PATH, R_OK)) {
if (0 == (pid = syscall(__NR_clone, CLONE_VFORK, nullptr, nullptr, nullptr, nullptr))) {
IGNORE_RETURN execlp("sh", "sh", "-c", MINICORE_SCRIPT, nullptr);
_exit(EXIT_FAILURE);
}
} else if (-1 != access(MINICORE_SHELL_PATH, X_OK)) {
if (0 == (pid = syscall(__NR_clone, CLONE_VFORK, nullptr, nullptr, nullptr, nullptr))) {
IGNORE_RETURN execlp("sh", "sh", MINICORE_SHELL_PATH, nullptr);
_exit(EXIT_FAILURE);
}
}
return ret;
}
int faststack()
{
static int64_t last_ts = 0;
int64_t now = ObTimeUtility::fast_current_time();
int64_t last = ATOMIC_LOAD(&last_ts);
int ret = OB_SUCCESS;
if (now - last < FASTSTACK_MIN_INTERVAL) {
ret = OB_EAGAIN;
} else if (!ATOMIC_BCAS(&last_ts, last, now)) {
ret = OB_EAGAIN;
} else if (-1 == access(FASTSTACK_SHELL_PATH, R_OK)) {
if (0 == syscall(__NR_clone, CLONE_VFORK | CLONE_PARENT, nullptr, nullptr, nullptr, nullptr)) {
IGNORE_RETURN execlp("sh", "sh", "-c", FASTSTACK_SCRIPT, nullptr);
_exit(EXIT_FAILURE);
}
} else if (-1 != access(FASTSTACK_SHELL_PATH, X_OK)) {
if (0 == syscall(__NR_clone, CLONE_VFORK | CLONE_PARENT, nullptr, nullptr, nullptr, nullptr)) {
IGNORE_RETURN execlp("sh", "sh", FASTSTACK_SHELL_PATH, nullptr);
_exit(EXIT_FAILURE);
}
}
LOG_WARN("faststack", K(now), K(ret));
return ret;
}
} // namespace common
} // namespace oceanbase