From fb090c7a83328c0099914a940c8742496d0aa81e Mon Sep 17 00:00:00 2001 From: "Racardo.Cui" Date: Fri, 28 May 2021 11:24:51 +0800 Subject: [PATCH 1/3] UCE bug fix --- src/gausskernel/cbb/bbox/gs_bbox.cpp | 10 ++++++++++ src/gausskernel/cbb/utils/gssignal/gs_signal.cpp | 9 ++------- src/gausskernel/process/postmaster/postmaster.cpp | 5 ++--- src/gausskernel/storage/buffer/buf_init.cpp | 3 ++- src/include/postmaster/postmaster.h | 1 + 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/gausskernel/cbb/bbox/gs_bbox.cpp b/src/gausskernel/cbb/bbox/gs_bbox.cpp index 340425358..ff8f68851 100644 --- a/src/gausskernel/cbb/bbox/gs_bbox.cpp +++ b/src/gausskernel/cbb/bbox/gs_bbox.cpp @@ -61,6 +61,11 @@ static void coredump_handler(int sig, siginfo_t *si, void *uc) { static volatile int64 first_tid = INVALID_TID; int64 cur_tid = (int64)pthread_self(); + if (sig == SIGBUS) { + g_instance.sigbus_cxt.sigbus_addr = si->si_addr; + g_instance.sigbus_cxt.sigbus_code = si->si_code; + SIGBUS_handler(sig); + } if (first_tid == INVALID_TID && __sync_bool_compare_and_swap(&first_tid, INVALID_TID, cur_tid)) { @@ -90,6 +95,11 @@ static void bbox_handler(int sig, siginfo_t *si, void *uc) { static volatile int64 first_tid = INVALID_TID; int64 cur_tid = (int64)pthread_self(); + if (sig == SIGBUS) { + g_instance.sigbus_cxt.sigbus_addr = si->si_addr; + g_instance.sigbus_cxt.sigbus_code = si->si_code; + SIGBUS_handler(sig); + } if (first_tid == INVALID_TID && __sync_bool_compare_and_swap(&first_tid, INVALID_TID, cur_tid)) { diff --git a/src/gausskernel/cbb/utils/gssignal/gs_signal.cpp b/src/gausskernel/cbb/utils/gssignal/gs_signal.cpp index 89962d08e..d69b72e2e 100644 --- a/src/gausskernel/cbb/utils/gssignal/gs_signal.cpp +++ b/src/gausskernel/cbb/utils/gssignal/gs_signal.cpp @@ -882,7 +882,6 @@ void* gs_signal_receiver_thread(void* args) sigaddset(&waitMask, SIGQUIT); sigaddset(&waitMask, SIGHUP); sigaddset(&waitMask, SIGUSR1); - sigaddset(&waitMask, SIGBUS); gs_signal_block_sigusr2(); @@ -929,12 +928,6 @@ static void gs_res_signal_handler(int signo, siginfo_t* siginfo, void* context) CurrentMemoryContext = oldContext; return; } - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS has si_addr, we only receive SIGBUS. Here si_addr is used to determine whether it - * is a SIGBUS signal */ - if (siginfo->si_addr) { - g_instance.sigbus_cxt.sigbus_addr = siginfo->si_addr; - g_instance.sigbus_cxt.sigbus_code = siginfo->si_code; - } /* Hornour the signal */ gs_signal_handle(); @@ -1095,6 +1088,7 @@ sigset_t gs_signal_unblock_sigusr2(void) (void)sigdelset(&intMask, SIGUSR2); (void)sigdelset(&intMask, SIGPROF); (void)sigdelset(&intMask, SIGSEGV); + (void)sigdelset(&intMask, SIGBUS); (void)sigdelset(&intMask, SIGFPE); (void)sigdelset(&intMask, SIGILL); (void)sigdelset(&intMask, SIGSYS); @@ -1117,6 +1111,7 @@ sigset_t gs_signal_block_sigusr2(void) (void)sigdelset(&intMask, SIGFPE); (void)sigdelset(&intMask, SIGILL); (void)sigdelset(&intMask, SIGSYS); + (void)sigdelset(&intMask, SIGBUS); pthread_sigmask(SIG_SETMASK, &intMask, &oldMask); diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index 2f3a7bd90..c4b5499e8 100755 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -334,7 +334,7 @@ static Port* ConnCreateToRecvGssock(pollfd* ufds, int idx, int* nSockets); static Port* ConnCreate(int serverFd); static void reset_shared(int port); static void SIGHUP_handler(SIGNAL_ARGS); -static void SIGBUS_handler(SIGNAL_ARGS); +void SIGBUS_handler(SIGNAL_ARGS); static void pmdie(SIGNAL_ARGS); static void startup_alarm(SIGNAL_ARGS); static void SetWalsndsNodeState(ClusterNodeState requester, ClusterNodeState others); @@ -1927,7 +1927,6 @@ int PostmasterMain(int argc, char* argv[]) (void)gspqsignal(SIGINT, pmdie); /* send SIGTERM and shut down */ (void)gspqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */ (void)gspqsignal(SIGTERM, pmdie); /* wait for children and shut down */ - (void)gspqsignal(SIGBUS, SIGBUS_handler); /* send SIGBUS and die or panic */ pqsignal(SIGALRM, SIG_IGN); /* ignored */ pqsignal(SIGPIPE, SIG_IGN); /* ignored */ @@ -4257,7 +4256,7 @@ static void SIGHUP_handler(SIGNAL_ARGS) 4. If the page is not dirty, execute pmdie to exit normally and print warning message. If the page is dirty, print the PANIC log and exit */ -static void SIGBUS_handler(SIGNAL_ARGS) +void SIGBUS_handler(SIGNAL_ARGS) { uint64 buffer_size; int buf_id; diff --git a/src/gausskernel/storage/buffer/buf_init.cpp b/src/gausskernel/storage/buffer/buf_init.cpp index ec407ffe2..54f4ca97c 100644 --- a/src/gausskernel/storage/buffer/buf_init.cpp +++ b/src/gausskernel/storage/buffer/buf_init.cpp @@ -109,7 +109,8 @@ void InitBufferPool(void) if (BBOX_BLACKLIST_SHARE_BUFFER) { bbox_blacklist_add(SHARED_BUFFER, t_thrd.storage_cxt.BufferBlocks, buffer_size); } - + ereport(LOG, (errmsg("Buffer pool start virtual address = %llu\n", (unsigned long long)t_thrd.storage_cxt.BufferBlocks))); + ereport(LOG, (errmsg("Buffer pool end virtual address = %llu\n", (unsigned long long)t_thrd.storage_cxt.BufferBlocks + buffer_size))); /* * The array used to sort to-be-checkpointed buffer ids is located in * shared memory, to avoid having to allocate significant amounts of diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index 8170931b2..39977f02e 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -244,4 +244,5 @@ extern uint64_t mc_timers_us(void); extern bool SetDBStateFileState(DbState state, bool optional); extern void GPCResetAll(); extern void initRandomState(TimestampTz start_time, TimestampTz stop_time); +extern void SIGBUS_handler(SIGNAL_ARGS); #endif /* _POSTMASTER_H */ From dccbad163a4bf9257449a53e68a6ac04b0acd099 Mon Sep 17 00:00:00 2001 From: "Racardo.Cui" Date: Sat, 29 May 2021 18:25:31 +0800 Subject: [PATCH 2/3] delete debug message --- src/gausskernel/storage/buffer/buf_init.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gausskernel/storage/buffer/buf_init.cpp b/src/gausskernel/storage/buffer/buf_init.cpp index 54f4ca97c..d383d8f8f 100644 --- a/src/gausskernel/storage/buffer/buf_init.cpp +++ b/src/gausskernel/storage/buffer/buf_init.cpp @@ -109,8 +109,7 @@ void InitBufferPool(void) if (BBOX_BLACKLIST_SHARE_BUFFER) { bbox_blacklist_add(SHARED_BUFFER, t_thrd.storage_cxt.BufferBlocks, buffer_size); } - ereport(LOG, (errmsg("Buffer pool start virtual address = %llu\n", (unsigned long long)t_thrd.storage_cxt.BufferBlocks))); - ereport(LOG, (errmsg("Buffer pool end virtual address = %llu\n", (unsigned long long)t_thrd.storage_cxt.BufferBlocks + buffer_size))); + /* * The array used to sort to-be-checkpointed buffer ids is located in * shared memory, to avoid having to allocate significant amounts of From bccc724800bbadc77581bd04e12c6b597a7980c7 Mon Sep 17 00:00:00 2001 From: "Racardo.Cui" Date: Mon, 31 May 2021 11:19:40 +0800 Subject: [PATCH 3/3] add SIGBUS in BlockSig sigmask --- src/common/backend/utils/errcodes.txt | 5 ++++ src/gausskernel/cbb/bbox/gs_bbox.cpp | 21 ++++++++-------- .../process/postmaster/postmaster.cpp | 24 ++++++++++++------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/common/backend/utils/errcodes.txt b/src/common/backend/utils/errcodes.txt index c03fa7083..bd2840a97 100644 --- a/src/common/backend/utils/errcodes.txt +++ b/src/common/backend/utils/errcodes.txt @@ -609,3 +609,8 @@ LL002 E ERRCODE_RELFILENODEMAP rel Section: Class TS - Timeseries Error TS000 E ERRCODE_TS_COMMON_ERROR timeseries_common_error TS001 E ERRCODE_TS_KEYTYPE_MISMATCH column_key_type_mismatch + +Section: Class UE - Uncorrected Error & Warning +UE000 E ERRCODE_UE_COMMON_ERROR uce_common_error +UE001 E ERRCODE_UE_DIRTY_PAGE uce_dirty_page_error +UE002 E ERRCODE_UE_CLEAN_PAGE uce_clean_page_error \ No newline at end of file diff --git a/src/gausskernel/cbb/bbox/gs_bbox.cpp b/src/gausskernel/cbb/bbox/gs_bbox.cpp index ff8f68851..e3e94074c 100644 --- a/src/gausskernel/cbb/bbox/gs_bbox.cpp +++ b/src/gausskernel/cbb/bbox/gs_bbox.cpp @@ -61,11 +61,6 @@ static void coredump_handler(int sig, siginfo_t *si, void *uc) { static volatile int64 first_tid = INVALID_TID; int64 cur_tid = (int64)pthread_self(); - if (sig == SIGBUS) { - g_instance.sigbus_cxt.sigbus_addr = si->si_addr; - g_instance.sigbus_cxt.sigbus_code = si->si_code; - SIGBUS_handler(sig); - } if (first_tid == INVALID_TID && __sync_bool_compare_and_swap(&first_tid, INVALID_TID, cur_tid)) { @@ -74,6 +69,11 @@ static void coredump_handler(int sig, siginfo_t *si, void *uc) if (g_instance.attr.attr_common.enable_ffic_log) { (void)gen_err_msg(sig, si, (ucontext_t *)uc); } + if (sig == SIGBUS) { + g_instance.sigbus_cxt.sigbus_addr = si->si_addr; + g_instance.sigbus_cxt.sigbus_code = si->si_code; + SIGBUS_handler(sig); + } } else { /* * Subsequent fatal error will go to here. If it comes from different thread, @@ -95,11 +95,6 @@ static void bbox_handler(int sig, siginfo_t *si, void *uc) { static volatile int64 first_tid = INVALID_TID; int64 cur_tid = (int64)pthread_self(); - if (sig == SIGBUS) { - g_instance.sigbus_cxt.sigbus_addr = si->si_addr; - g_instance.sigbus_cxt.sigbus_code = si->si_code; - SIGBUS_handler(sig); - } if (first_tid == INVALID_TID && __sync_bool_compare_and_swap(&first_tid, INVALID_TID, cur_tid)) { @@ -107,7 +102,11 @@ static void bbox_handler(int sig, siginfo_t *si, void *uc) if (g_instance.attr.attr_common.enable_ffic_log) { (void)gen_err_msg(sig, si, (ucontext_t *)uc); } - + if (sig == SIGBUS) { + g_instance.sigbus_cxt.sigbus_addr = si->si_addr; + g_instance.sigbus_cxt.sigbus_code = si->si_code; + SIGBUS_handler(sig); + } #ifndef ENABLE_MEMORY_CHECK sigset_t intMask; sigset_t oldMask; diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index c4b5499e8..28026179c 100755 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -4262,9 +4262,9 @@ void SIGBUS_handler(SIGNAL_ARGS) int buf_id; int si_code = g_instance.sigbus_cxt.sigbus_code; unsigned long long sigbus_addr = (unsigned long long)g_instance.sigbus_cxt.sigbus_addr; - gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL); if (si_code != SIGBUS_MCEERR_AR && si_code != SIGBUS_MCEERR_AO) { - ereport(PANIC, (errmsg("SIGBUS signal received, Gaussdb will shut down immediately"))); + ereport(PANIC, + (errcode(ERRCODE_UE_COMMON_ERROR), errmsg("SIGBUS signal received, Gaussdb will shut down immediately"))); } #ifdef __aarch64__ buffer_size = g_instance.attr.attr_storage.NBuffers * (Size)BLCKSZ + PG_CACHE_LINE_SIZE; @@ -4280,22 +4280,28 @@ void SIGBUS_handler(SIGNAL_ARGS) if (buf_desc->state & BM_DIRTY || buf_desc->state & BM_JUST_DIRTIED || buf_desc->state & BM_CHECKPOINT_NEEDED || buf_desc->state & BM_IO_IN_PROGRESS) { ereport(PANIC, - (errmsg("Uncorrected Error occurred at dirty page. The error address is: 0x%llx. Gaussdb will shut down immediately.", + (errcode(ERRCODE_UE_DIRTY_PAGE), + errmsg("Uncorrected Error occurred at dirty page. The error address is: 0x%llx. Gaussdb will shut " + "down immediately.", sigbus_addr))); } else { ereport(WARNING, - (errmsg( - "Uncorrected Error occurred at clean/free page. The error address is: 0x%llx. GaussDB will shutdown.", sigbus_addr))); + (errcode(ERRCODE_UE_CLEAN_PAGE), + errmsg("Uncorrected Error occurred at clean/free page. The error address is: 0x%llx. GaussDB will " + "shutdown.", + sigbus_addr))); pmdie(SIGBUS); } } else if (sigbus_addr == 0) { - ereport(PANIC, (errmsg("SIGBUS signal received, sigbus_addr is None. Gaussdb will shut down immediately"))); + ereport(PANIC, + (errcode(ERRCODE_UE_COMMON_ERROR), + errmsg("SIGBUS signal received, sigbus_addr is None. Gaussdb will shut down immediately"))); } else { ereport(PANIC, - (errmsg( - "SIGBUS signal received. The error address is: 0x%llx, Gaussdb will shut down immediately", sigbus_addr))); + (errcode(ERRCODE_UE_COMMON_ERROR), + errmsg("SIGBUS signal received. The error address is: 0x%llx, Gaussdb will shut down immediately", + sigbus_addr))); } - gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL); } void KillGraceThreads(void)