fix hang in do_with_crash_restore

This commit is contained in:
nroskill
2023-06-14 02:17:48 +00:00
committed by ob-robot
parent 58193a69d4
commit 63d659c58c
4 changed files with 42 additions and 37 deletions

View File

@ -281,6 +281,7 @@ int faststack()
_exit(EXIT_FAILURE); _exit(EXIT_FAILURE);
} }
} }
LOG_WARN("faststack", K(now), K(ret));
return ret; return ret;
} }

View File

@ -397,8 +397,10 @@ void ObTrace::end()
#ifndef NDEBUG #ifndef NDEBUG
check_leak_span(); check_leak_span();
#endif #endif
if (trace_id_.is_inited()) {
reset(); reset();
} }
}
ObSpanCtx* ObTrace::begin_span(uint32_t span_type, uint8_t level, bool is_follow) ObSpanCtx* ObTrace::begin_span(uint32_t span_type, uint8_t level, bool is_follow)
{ {

View File

@ -1869,6 +1869,7 @@ int dump_thread_info(lua_State *L)
"wait_event" "wait_event"
}; };
LuaVtableGenerator gen(L, columns); LuaVtableGenerator gen(L, columns);
pid_t pid = getpid();
StackMgr::Guard guard(g_stack_mgr); StackMgr::Guard guard(g_stack_mgr);
for(auto* header = *guard; OB_NOT_NULL(header) && !gen.is_end(); header = guard.next()) { for(auto* header = *guard; OB_NOT_NULL(header) && !gen.is_end(); header = guard.next()) {
auto* thread_base = (char*)(header->pth_); auto* thread_base = (char*)(header->pth_);
@ -1904,10 +1905,11 @@ int dump_thread_info(lua_State *L)
if (OB_NOT_NULL(locks[idx]) && j < 256) { if (OB_NOT_NULL(locks[idx]) && j < 256) {
bool has_segv = false; bool has_segv = false;
uint32_t val = 0; uint32_t val = 0;
do_with_crash_restore([&] { struct iovec local_iov = {&val, sizeof(val)};
val = *locks[idx]; struct iovec remote_iov = {locks[idx], sizeof(val)};
}, has_segv); ssize_t n = process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0);
if (!has_segv && 0 != val) { if (n != sizeof(val)) {
} else if (0 != val) {
j += snprintf(addrs + j, 256 - j, "%p ", locks[idx]); j += snprintf(addrs + j, 256 - j, "%p ", locks[idx]);
} }
} }
@ -1959,29 +1961,29 @@ int dump_thread_info(lua_State *L)
GET_OTHER_TSI_ADDR(rpc_dest_addr, &Thread::rpc_dest_addr_); GET_OTHER_TSI_ADDR(rpc_dest_addr, &Thread::rpc_dest_addr_);
constexpr int64_t BUF_LEN = 64; constexpr int64_t BUF_LEN = 64;
char wait_event[BUF_LEN]; char wait_event[BUF_LEN];
ObAddr addr;
struct iovec local_iov = {&addr, sizeof(ObAddr)};
struct iovec remote_iov = {thread_base + rpc_dest_addr_offset, sizeof(ObAddr)};
wait_event[0] = '\0'; wait_event[0] = '\0';
if (0 != join_addr) { if (0 != join_addr) {
IGNORE_RETURN snprintf(wait_event, BUF_LEN, "thread %u %ld", *(uint32_t*)(thread_base + tid_offset), tid_offset); IGNORE_RETURN snprintf(wait_event, BUF_LEN, "thread %u %ld", *(uint32_t*)(thread_base + tid_offset), tid_offset);
} else if (OB_NOT_NULL(wait_addr)) { } else if (OB_NOT_NULL(wait_addr)) {
bool has_segv = false;
uint32_t val = 0; uint32_t val = 0;
do_with_crash_restore([&] { struct iovec local_iov = {&val, sizeof(val)};
val = *wait_addr; struct iovec remote_iov = {wait_addr, sizeof(val)};
}, has_segv); ssize_t n = process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0);
if (has_segv) { if (n != sizeof(val)) {
} else if (0 != (val & (1<<30))) { } else if (0 != (val & (1<<30))) {
IGNORE_RETURN snprintf(wait_event, BUF_LEN, "wrlock on %u", val & 0x3fffffff); IGNORE_RETURN snprintf(wait_event, BUF_LEN, "wrlock on %u", val & 0x3fffffff);
} else { } else {
IGNORE_RETURN snprintf(wait_event, BUF_LEN, "%u rdlocks", val & 0x3fffffff); IGNORE_RETURN snprintf(wait_event, BUF_LEN, "%u rdlocks", val & 0x3fffffff);
} }
} else if (rpc_dest_addr.is_valid()) { } else if (sizeof(ObAddr) == process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0)
bool has_segv = false; && addr.is_valid()) {
do_with_crash_restore([&] { int ret = 0;
int ret = snprintf(wait_event, BUF_LEN, "rpc to "); if ((ret = snprintf(wait_event, BUF_LEN, "rpc to ")) > 0) {
if (ret > 0) { IGNORE_RETURN addr.to_string(wait_event + ret, BUF_LEN - ret);
IGNORE_RETURN rpc_dest_addr.to_string(wait_event + ret, BUF_LEN - ret);
} }
}, has_segv);
} else if (0 != (is_blocking & Thread::WAIT_IN_TENANT_QUEUE)) { } else if (0 != (is_blocking & Thread::WAIT_IN_TENANT_QUEUE)) {
IGNORE_RETURN snprintf(wait_event, BUF_LEN, "tenant worker request"); IGNORE_RETURN snprintf(wait_event, BUF_LEN, "tenant worker request");
} else if (0 != (is_blocking & Thread::WAIT_FOR_IO_EVENT)) { } else if (0 != (is_blocking & Thread::WAIT_FOR_IO_EVENT)) {

View File

@ -11,7 +11,6 @@
*/ */
#include "ob_all_virtual_thread.h" #include "ob_all_virtual_thread.h"
#include "lib/signal/ob_signal_utils.h"
#include "lib/thread/protected_stack_allocator.h" #include "lib/thread/protected_stack_allocator.h"
#define GET_OTHER_TSI_ADDR(var_name, addr) \ #define GET_OTHER_TSI_ADDR(var_name, addr) \
@ -53,6 +52,7 @@ int ObAllVirtualThread::inner_get_next_row(common::ObNewRow *&row)
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
if (!is_inited_) { if (!is_inited_) {
const int64_t col_count = output_column_ids_.count(); const int64_t col_count = output_column_ids_.count();
pid_t pid = getpid();
StackMgr::Guard guard(g_stack_mgr); StackMgr::Guard guard(g_stack_mgr);
for (auto* header = *guard; OB_NOT_NULL(header); header = guard.next()) { for (auto* header = *guard; OB_NOT_NULL(header); header = guard.next()) {
auto* thread_base = (char*)(header->pth_); auto* thread_base = (char*)(header->pth_);
@ -124,29 +124,29 @@ int ObAllVirtualThread::inner_get_next_row(common::ObNewRow *&row)
} }
case WAIT_EVENT: { case WAIT_EVENT: {
GET_OTHER_TSI_ADDR(rpc_dest_addr, &Thread::rpc_dest_addr_); GET_OTHER_TSI_ADDR(rpc_dest_addr, &Thread::rpc_dest_addr_);
ObAddr addr;
struct iovec local_iov = {&addr, sizeof(ObAddr)};
struct iovec remote_iov = {thread_base + rpc_dest_addr_offset, sizeof(ObAddr)};
wait_event_[0] = '\0'; wait_event_[0] = '\0';
if (0 != join_addr) { if (0 != join_addr) {
IGNORE_RETURN snprintf(wait_event_, 64, "thread %u", *(uint32_t*)(thread_base + tid_offset)); IGNORE_RETURN snprintf(wait_event_, 64, "thread %u", *(uint32_t*)(thread_base + tid_offset));
} else if (OB_NOT_NULL(wait_addr)) { } else if (OB_NOT_NULL(wait_addr)) {
bool has_segv = false;
uint32_t val = 0; uint32_t val = 0;
do_with_crash_restore([&] { struct iovec local_iov = {&val, sizeof(val)};
val = *wait_addr; struct iovec remote_iov = {wait_addr, sizeof(val)};
}, has_segv); ssize_t n = process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0);
if (has_segv) { if (n != sizeof(val)) {
} else if (0 != (val & (1<<30))) { } else if (0 != (val & (1<<30))) {
IGNORE_RETURN snprintf(wait_event_, 64, "wrlock on %u", val & 0x3fffffff); IGNORE_RETURN snprintf(wait_event_, 64, "wrlock on %u", val & 0x3fffffff);
} else { } else {
IGNORE_RETURN snprintf(wait_event_, 64, "%u rdlocks", val & 0x3fffffff); IGNORE_RETURN snprintf(wait_event_, 64, "%u rdlocks", val & 0x3fffffff);
} }
} else if (rpc_dest_addr.is_valid()) { } else if (sizeof(ObAddr) == process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0)
bool has_segv = false; && addr.is_valid()) {
do_with_crash_restore([&] { int ret = 0;
int ret = snprintf(wait_event_, 64, "rpc to "); if ((ret = snprintf(wait_event_, 64, "rpc to ")) > 0) {
if (ret > 0) { IGNORE_RETURN addr.to_string(wait_event_ + ret, 64 - ret);
IGNORE_RETURN rpc_dest_addr.to_string(wait_event_ + ret, 64 - ret);
} }
}, has_segv);
} else if (0 != (is_blocking & Thread::WAIT_IN_TENANT_QUEUE)) { } else if (0 != (is_blocking & Thread::WAIT_IN_TENANT_QUEUE)) {
IGNORE_RETURN snprintf(wait_event_, 64, "tenant worker requests"); IGNORE_RETURN snprintf(wait_event_, 64, "tenant worker requests");
} else if (0 != (is_blocking & Thread::WAIT_FOR_IO_EVENT)) { } else if (0 != (is_blocking & Thread::WAIT_FOR_IO_EVENT)) {
@ -181,12 +181,12 @@ int ObAllVirtualThread::inner_get_next_row(common::ObNewRow *&row)
for (int64_t i = 0, j = 0; i < cnt; ++i) { for (int64_t i = 0, j = 0; i < cnt; ++i) {
int64_t idx = (slot_cnt + i) % ARRAYSIZEOF(ObLatch::current_locks); int64_t idx = (slot_cnt + i) % ARRAYSIZEOF(ObLatch::current_locks);
if (OB_NOT_NULL(locks[idx]) && j < 256) { if (OB_NOT_NULL(locks[idx]) && j < 256) {
bool has_segv = false;
uint32_t val = 0; uint32_t val = 0;
do_with_crash_restore([&] { struct iovec local_iov = {&val, sizeof(val)};
val = *locks[idx]; struct iovec remote_iov = {locks[idx], sizeof(val)};
}, has_segv); ssize_t n = process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0);
if (!has_segv && 0 != val) { if (n != sizeof(val)) {
} else if (0 != val) {
j += snprintf(locks_addr_ + j, 256 - j, "%p ", locks[idx]); j += snprintf(locks_addr_ + j, 256 - j, "%p ", locks[idx]);
} }
} }