[FEAT MERGE]MiniCore

This commit is contained in:
nroskill
2023-06-05 02:12:43 +00:00
committed by ob-robot
parent 05822a3c70
commit d627936f7d
7 changed files with 145 additions and 81 deletions

View File

@ -70,7 +70,7 @@ int ObPLogFileStruct::reopen(const bool redirect_flag)
if (OB_UNLIKELY(strlen(filename_) <= 0)) { if (OB_UNLIKELY(strlen(filename_) <= 0)) {
LOG_STDERR("invalid argument log_file = %p\n", filename_); LOG_STDERR("invalid argument log_file = %p\n", filename_);
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
} else if (OB_UNLIKELY((tmp_fd = ::open(filename_, O_WRONLY | O_CREAT | O_APPEND , LOG_FILE_MODE)) < 0)) { } else if (OB_UNLIKELY((tmp_fd = ::open(filename_, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, LOG_FILE_MODE)) < 0)) {
LOG_STDERR("open file = %s errno = %d error = %m\n", filename_, errno); LOG_STDERR("open file = %s errno = %d error = %m\n", filename_, errno);
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
} else if (OB_UNLIKELY(0 != fstat(tmp_fd, &stat_))) { } else if (OB_UNLIKELY(0 != fstat(tmp_fd, &stat_))) {
@ -112,7 +112,7 @@ int ObPLogFileStruct::reopen_wf()
} else { } else {
char tmp_file_name[MAX_LOG_FILE_NAME_SIZE]; char tmp_file_name[MAX_LOG_FILE_NAME_SIZE];
(void)snprintf(tmp_file_name, sizeof(tmp_file_name), "%s.wf", filename_); (void)snprintf(tmp_file_name, sizeof(tmp_file_name), "%s.wf", filename_);
if (OB_UNLIKELY((tmp_fd = ::open(tmp_file_name, O_WRONLY | O_CREAT | O_APPEND , LOG_FILE_MODE)) < 0)) { if (OB_UNLIKELY((tmp_fd = ::open(tmp_file_name, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, LOG_FILE_MODE)) < 0)) {
LOG_STDERR("open file = %s errno = %d error = %m\n", tmp_file_name, errno); LOG_STDERR("open file = %s errno = %d error = %m\n", tmp_file_name, errno);
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
} else if (OB_UNLIKELY(0 != fstat(tmp_fd, &wf_stat_))) { } else if (OB_UNLIKELY(0 != fstat(tmp_fd, &wf_stat_))) {

View File

@ -742,7 +742,7 @@ void ObLogger::rotate_log(const char *filename,
tm.tm_hour, tm.tm_min, tm.tm_sec, static_cast<int>(t.tv_usec/1000)); tm.tm_hour, tm.tm_min, tm.tm_sec, static_cast<int>(t.tv_usec/1000));
ret = rename(filename, old_log_file); //If failed, TODO ret = rename(filename, old_log_file); //If failed, TODO
int tmp_fd = open(filename, O_WRONLY | O_CREAT | O_APPEND, ObPLogFileStruct::LOG_FILE_MODE); int tmp_fd = open(filename, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, ObPLogFileStruct::LOG_FILE_MODE);
if (tmp_fd > 0) { if (tmp_fd > 0) {
if (fd > STDERR_FILENO) { if (fd > STDERR_FILENO) {
(void)dup2(tmp_fd, fd); (void)dup2(tmp_fd, fd);
@ -786,7 +786,7 @@ void ObLogger::rotate_log(const char *filename,
} }
ret = rename(filename, old_log_file); //If failed, TODO ret = rename(filename, old_log_file); //If failed, TODO
int tmp_fd = open(filename, O_WRONLY | O_CREAT | O_APPEND, ObPLogFileStruct::LOG_FILE_MODE); int tmp_fd = open(filename, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, ObPLogFileStruct::LOG_FILE_MODE);
if (tmp_fd > 0) { if (tmp_fd > 0) {
if (redirect_flag) { if (redirect_flag) {
(void)dup2(tmp_fd, STDERR_FILENO); (void)dup2(tmp_fd, STDERR_FILENO);
@ -820,7 +820,7 @@ void ObLogger::rotate_log(const char *filename,
} }
} }
ret = rename(wf_filename, old_wf_log_file); //If failed, TODO ret = rename(wf_filename, old_wf_log_file); //If failed, TODO
tmp_fd = open(wf_filename, O_WRONLY | O_CREAT | O_APPEND, ObPLogFileStruct::LOG_FILE_MODE); tmp_fd = open(wf_filename, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, ObPLogFileStruct::LOG_FILE_MODE);
if (tmp_fd > 0) { if (tmp_fd > 0) {
if (wf_fd > STDERR_FILENO) { if (wf_fd > STDERR_FILENO) {
(void)dup2(tmp_fd, wf_fd); (void)dup2(tmp_fd, wf_fd);

View File

@ -42,6 +42,7 @@
#include "lib/oblog/ob_async_log_struct.h" #include "lib/oblog/ob_async_log_struct.h"
#include "lib/utility/ob_defer.h" #include "lib/utility/ob_defer.h"
#include "lib/oblog/ob_syslog_rate_limiter.h" #include "lib/oblog/ob_syslog_rate_limiter.h"
#include "lib/signal/ob_signal_handlers.h"
#define OB_LOG_MAX_PAR_MOD_SIZE 32 #define OB_LOG_MAX_PAR_MOD_SIZE 32
#define OB_LOG_MAX_SUB_MOD_SIZE 32 #define OB_LOG_MAX_SUB_MOD_SIZE 32
@ -1112,7 +1113,7 @@ inline void ObLogger::check_probe(
break; break;
} }
case ProbeAction::PROBE_STACK: { case ProbeAction::PROBE_STACK: {
IGNORE_RETURN raise(60); IGNORE_RETURN faststack();
break; break;
} }
default: { default: {

View File

@ -15,6 +15,9 @@
#include "lib/signal/ob_signal_handlers.h" #include "lib/signal/ob_signal_handlers.h"
#include <sys/prctl.h> #include <sys/prctl.h>
#include <dirent.h> #include <dirent.h>
#include <unistd.h>
#include <fstream>
#include <sys/wait.h>
#include "lib/profile/ob_trace_id.h" #include "lib/profile/ob_trace_id.h"
#include "lib/utility/utility.h" #include "lib/utility/utility.h"
#include "lib/signal/ob_libunwind.h" #include "lib/signal/ob_libunwind.h"
@ -29,6 +32,17 @@ namespace oceanbase
namespace common namespace common
{ {
static const int SIG_SET[] = {SIGABRT, SIGBUS, SIGFPE, SIGSEGV, SIGURG}; static const int SIG_SET[] = {SIGABRT, SIGBUS, SIGFPE, SIGSEGV, SIGURG};
static constexpr char MINICORE_SHELL_PATH[] = "tools/minicore.sh";
static constexpr char FASTSTACK_SHELL_PATH[] = "tools/callstack.sh";
static constexpr char MINICORE_SCRIPT[] = "if [ -e bin/minicore.py ]; then\n"
" python bin/minicore.py `cat $(pwd)/run/observer.pid` -c -o core.`cat $(pwd)/run/observer.pid`.mini\n"
"fi\n"
"[ $(ls -1 core.*.mini 2>/dev/null | wc -l) -gt 5 ] && ls -1 core.*.mini | sort | head -n 1 | xargs rm -f";
static constexpr char FASTSTACK_SCRIPT[] = "if [ -x \"$(command -v obstack)\" ]; then\n"
" obstack `cat $(pwd)/run/observer.pid` > stack.`cat $(pwd)/run/observer.pid`.`date +%Y%m%d%H%M%S`\n"
"fi\n"
"[ $(ls -1 stack.* 2>/dev/null | wc -l) -gt 100 ] && ls -1 stack.* | sort | head -n 1 | xargs rm -f";
static inline void handler(int sig, siginfo_t *s, void *p) static inline void handler(int sig, siginfo_t *s, void *p)
{ {
@ -138,15 +152,13 @@ void close_socket_fd()
void coredump_cb(int sig, siginfo_t *si, void *context) void coredump_cb(int sig, siginfo_t *si, void *context)
{ {
int ret = OB_SUCCESS;
if (g_coredump_num++ < 1) { if (g_coredump_num++ < 1) {
pid_t pid;
close_socket_fd(); close_socket_fd();
ret = minicoredump(sig, GETTID(), pid);
send_request_and_wait(VERB_LEVEL_2, send_request_and_wait(VERB_LEVEL_2,
syscall(SYS_gettid)/*exclude_id*/); syscall(SYS_gettid)/*exclude_id*/);
#define MINICORE 0
#if MINICORE
int pid = 0;
if ((pid = fork()) != 0) {
#endif
// parent or fork failed // parent or fork failed
timespec time = {0, 0}; timespec time = {0, 0};
clock_gettime(CLOCK_REALTIME, &time); clock_gettime(CLOCK_REALTIME, &time);
@ -208,23 +220,69 @@ void coredump_cb(int sig, siginfo_t *si, void *context)
iov[3].iov_base = end; iov[3].iov_base = end;
iov[3].iov_len = strlen(end); iov[3].iov_len = strlen(end);
writev(STDERR_FILENO, iov, sizeof(iov) / sizeof(iov[0])); writev(STDERR_FILENO, iov, sizeof(iov) / sizeof(iov[0]));
if (OB_SUCC(ret)) {
#if MINICORE int status = 0;
} else { waitpid(pid, &status, __WALL);
// child
prctl(PR_SET_NAME, "minicoredump");
int64_t total_size = 0;
if (lib::g_mem_cutter != nullptr) {
lib::g_mem_cutter->cut(total_size);
} }
DLOG(INFO, "[MINICORE], TOTAL FREED: %ld", total_size);
}
#endif
} }
// Reset back to the default handler // Reset back to the default handler
signal(sig, SIG_DFL); signal(sig, SIG_DFL);
raise(sig); raise(sig);
} }
int minicoredump(int sig, int64_t tid, pid_t& pid)
{
static constexpr int64_t MIN_INTERVAL = 5 * 60 * 1000 * 1000; // 5min
static int64_t last_ts = 0;
int64_t now = ObTimeUtility::fast_current_time();
int64_t last = ATOMIC_LOAD(&last_ts);
int ret = OB_SUCCESS;
UNUSED(sig);
UNUSED(tid);
if (now - last < MIN_INTERVAL) {
ret = OB_EAGAIN;
} else if (!ATOMIC_BCAS(&last_ts, last, now)) {
ret = OB_EAGAIN;
} else if (-1 == access("bin/minicore.py", R_OK)) {
ret = OB_FILE_NOT_EXIST;
} else if (-1 == access(MINICORE_SHELL_PATH, R_OK)) {
if (0 == (pid = syscall(__NR_clone, CLONE_VFORK, nullptr, nullptr, nullptr, nullptr))) {
IGNORE_RETURN execlp("sh", "sh", "-c", MINICORE_SCRIPT, nullptr);
_exit(EXIT_FAILURE);
}
} else if (-1 != access(MINICORE_SHELL_PATH, X_OK)) {
if (0 == (pid = syscall(__NR_clone, CLONE_VFORK, nullptr, nullptr, nullptr, nullptr))) {
IGNORE_RETURN execlp("sh", "sh", MINICORE_SHELL_PATH, nullptr);
_exit(EXIT_FAILURE);
}
}
return ret;
}
int faststack()
{
static constexpr int64_t MIN_INTERVAL = 1 * 60 * 1000 * 1000; // 1min
static int64_t last_ts = 0;
int64_t now = ObTimeUtility::fast_current_time();
int64_t last = ATOMIC_LOAD(&last_ts);
int ret = OB_SUCCESS;
if (now - last < MIN_INTERVAL) {
ret = OB_EAGAIN;
} else if (!ATOMIC_BCAS(&last_ts, last, now)) {
ret = OB_EAGAIN;
} else if (-1 == access(FASTSTACK_SHELL_PATH, R_OK)) {
if (0 == syscall(__NR_clone, CLONE_VFORK | CLONE_PARENT, nullptr, nullptr, nullptr, nullptr)) {
IGNORE_RETURN execlp("sh", "sh", "-c", FASTSTACK_SCRIPT, nullptr);
_exit(EXIT_FAILURE);
}
} else if (-1 != access(FASTSTACK_SHELL_PATH, X_OK)) {
if (0 == syscall(__NR_clone, CLONE_VFORK | CLONE_PARENT, nullptr, nullptr, nullptr, nullptr)) {
IGNORE_RETURN execlp("sh", "sh", FASTSTACK_SHELL_PATH, nullptr);
_exit(EXIT_FAILURE);
}
}
return ret;
}
} // namespace common } // namespace common
} // namespace oceanbase } // namespace oceanbase

View File

@ -21,6 +21,8 @@ namespace oceanbase
{ {
namespace common namespace common
{ {
extern int minicoredump(int sig, int64_t tid, pid_t& pid);
extern int faststack();
} // namespace common } // namespace common
} // namespace oceanbase } // namespace oceanbase

View File

@ -1264,6 +1264,8 @@ int ObTenant::recv_request(ObRequest &req)
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
ObTenantStatEstGuard guard(id_); ObTenantStatEstGuard guard(id_);
EVENT_INC(REQUEST_ENQUEUE_COUNT); EVENT_INC(REQUEST_ENQUEUE_COUNT);
} else if (OB_SIZE_OVERFLOW == ret) {
IGNORE_RETURN faststack();
} }
return ret; return ret;

View File

@ -77,6 +77,7 @@ void ObTenantNodeBalancer::run1()
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
lib::set_thread_name("OmtNodeBalancer"); lib::set_thread_name("OmtNodeBalancer");
while (!has_set_stop()) { while (!has_set_stop()) {
TenantUnits units; TenantUnits units;
int64_t sys_unit_cnt = 0; int64_t sys_unit_cnt = 0;