!1005 UCE故障bug修复,修改处理入口
Merge pull request !1005 from Ricardo_Cui/uce_fault
This commit is contained in:
@ -609,3 +609,8 @@ LL002 E ERRCODE_RELFILENODEMAP rel
|
||||
Section: Class TS - Timeseries Error
|
||||
TS000 E ERRCODE_TS_COMMON_ERROR timeseries_common_error
|
||||
TS001 E ERRCODE_TS_KEYTYPE_MISMATCH column_key_type_mismatch
|
||||
|
||||
Section: Class UE - Uncorrected Error & Warning
|
||||
UE000 E ERRCODE_UE_COMMON_ERROR uce_common_error
|
||||
UE001 E ERRCODE_UE_DIRTY_PAGE uce_dirty_page_error
|
||||
UE002 E ERRCODE_UE_CLEAN_PAGE uce_clean_page_error
|
||||
@ -69,6 +69,11 @@ static void coredump_handler(int sig, siginfo_t *si, void *uc)
|
||||
if (g_instance.attr.attr_common.enable_ffic_log) {
|
||||
(void)gen_err_msg(sig, si, (ucontext_t *)uc);
|
||||
}
|
||||
if (sig == SIGBUS) {
|
||||
g_instance.sigbus_cxt.sigbus_addr = si->si_addr;
|
||||
g_instance.sigbus_cxt.sigbus_code = si->si_code;
|
||||
SIGBUS_handler(sig);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Subsequent fatal error will go to here. If it comes from different thread,
|
||||
@ -97,7 +102,11 @@ static void bbox_handler(int sig, siginfo_t *si, void *uc)
|
||||
if (g_instance.attr.attr_common.enable_ffic_log) {
|
||||
(void)gen_err_msg(sig, si, (ucontext_t *)uc);
|
||||
}
|
||||
|
||||
if (sig == SIGBUS) {
|
||||
g_instance.sigbus_cxt.sigbus_addr = si->si_addr;
|
||||
g_instance.sigbus_cxt.sigbus_code = si->si_code;
|
||||
SIGBUS_handler(sig);
|
||||
}
|
||||
#ifndef ENABLE_MEMORY_CHECK
|
||||
sigset_t intMask;
|
||||
sigset_t oldMask;
|
||||
|
||||
@ -882,7 +882,6 @@ void* gs_signal_receiver_thread(void* args)
|
||||
sigaddset(&waitMask, SIGQUIT);
|
||||
sigaddset(&waitMask, SIGHUP);
|
||||
sigaddset(&waitMask, SIGUSR1);
|
||||
sigaddset(&waitMask, SIGBUS);
|
||||
|
||||
gs_signal_block_sigusr2();
|
||||
|
||||
@ -929,12 +928,6 @@ static void gs_res_signal_handler(int signo, siginfo_t* siginfo, void* context)
|
||||
CurrentMemoryContext = oldContext;
|
||||
return;
|
||||
}
|
||||
/* SIGILL, SIGFPE, SIGSEGV, SIGBUS has si_addr, we only receive SIGBUS. Here si_addr is used to determine whether it
|
||||
* is a SIGBUS signal */
|
||||
if (siginfo->si_addr) {
|
||||
g_instance.sigbus_cxt.sigbus_addr = siginfo->si_addr;
|
||||
g_instance.sigbus_cxt.sigbus_code = siginfo->si_code;
|
||||
}
|
||||
|
||||
/* Hornour the signal */
|
||||
gs_signal_handle();
|
||||
@ -1095,6 +1088,7 @@ sigset_t gs_signal_unblock_sigusr2(void)
|
||||
(void)sigdelset(&intMask, SIGUSR2);
|
||||
(void)sigdelset(&intMask, SIGPROF);
|
||||
(void)sigdelset(&intMask, SIGSEGV);
|
||||
(void)sigdelset(&intMask, SIGBUS);
|
||||
(void)sigdelset(&intMask, SIGFPE);
|
||||
(void)sigdelset(&intMask, SIGILL);
|
||||
(void)sigdelset(&intMask, SIGSYS);
|
||||
@ -1117,6 +1111,7 @@ sigset_t gs_signal_block_sigusr2(void)
|
||||
(void)sigdelset(&intMask, SIGFPE);
|
||||
(void)sigdelset(&intMask, SIGILL);
|
||||
(void)sigdelset(&intMask, SIGSYS);
|
||||
(void)sigdelset(&intMask, SIGBUS);
|
||||
|
||||
pthread_sigmask(SIG_SETMASK, &intMask, &oldMask);
|
||||
|
||||
|
||||
@ -334,7 +334,7 @@ static Port* ConnCreateToRecvGssock(pollfd* ufds, int idx, int* nSockets);
|
||||
static Port* ConnCreate(int serverFd);
|
||||
static void reset_shared(int port);
|
||||
static void SIGHUP_handler(SIGNAL_ARGS);
|
||||
static void SIGBUS_handler(SIGNAL_ARGS);
|
||||
void SIGBUS_handler(SIGNAL_ARGS);
|
||||
static void pmdie(SIGNAL_ARGS);
|
||||
static void startup_alarm(SIGNAL_ARGS);
|
||||
static void SetWalsndsNodeState(ClusterNodeState requester, ClusterNodeState others);
|
||||
@ -1927,7 +1927,6 @@ int PostmasterMain(int argc, char* argv[])
|
||||
(void)gspqsignal(SIGINT, pmdie); /* send SIGTERM and shut down */
|
||||
(void)gspqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */
|
||||
(void)gspqsignal(SIGTERM, pmdie); /* wait for children and shut down */
|
||||
(void)gspqsignal(SIGBUS, SIGBUS_handler); /* send SIGBUS and die or panic */
|
||||
|
||||
pqsignal(SIGALRM, SIG_IGN); /* ignored */
|
||||
pqsignal(SIGPIPE, SIG_IGN); /* ignored */
|
||||
@ -4257,15 +4256,15 @@ static void SIGHUP_handler(SIGNAL_ARGS)
|
||||
4. If the page is not dirty, execute pmdie to exit normally and print warning message. If the page is dirty,
|
||||
print the PANIC log and exit
|
||||
*/
|
||||
static void SIGBUS_handler(SIGNAL_ARGS)
|
||||
void SIGBUS_handler(SIGNAL_ARGS)
|
||||
{
|
||||
uint64 buffer_size;
|
||||
int buf_id;
|
||||
int si_code = g_instance.sigbus_cxt.sigbus_code;
|
||||
unsigned long long sigbus_addr = (unsigned long long)g_instance.sigbus_cxt.sigbus_addr;
|
||||
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
||||
if (si_code != SIGBUS_MCEERR_AR && si_code != SIGBUS_MCEERR_AO) {
|
||||
ereport(PANIC, (errmsg("SIGBUS signal received, Gaussdb will shut down immediately")));
|
||||
ereport(PANIC,
|
||||
(errcode(ERRCODE_UE_COMMON_ERROR), errmsg("SIGBUS signal received, Gaussdb will shut down immediately")));
|
||||
}
|
||||
#ifdef __aarch64__
|
||||
buffer_size = g_instance.attr.attr_storage.NBuffers * (Size)BLCKSZ + PG_CACHE_LINE_SIZE;
|
||||
@ -4281,22 +4280,28 @@ static void SIGBUS_handler(SIGNAL_ARGS)
|
||||
if (buf_desc->state & BM_DIRTY || buf_desc->state & BM_JUST_DIRTIED || buf_desc->state & BM_CHECKPOINT_NEEDED ||
|
||||
buf_desc->state & BM_IO_IN_PROGRESS) {
|
||||
ereport(PANIC,
|
||||
(errmsg("Uncorrected Error occurred at dirty page. The error address is: 0x%llx. Gaussdb will shut down immediately.",
|
||||
(errcode(ERRCODE_UE_DIRTY_PAGE),
|
||||
errmsg("Uncorrected Error occurred at dirty page. The error address is: 0x%llx. Gaussdb will shut "
|
||||
"down immediately.",
|
||||
sigbus_addr)));
|
||||
} else {
|
||||
ereport(WARNING,
|
||||
(errmsg(
|
||||
"Uncorrected Error occurred at clean/free page. The error address is: 0x%llx. GaussDB will shutdown.", sigbus_addr)));
|
||||
(errcode(ERRCODE_UE_CLEAN_PAGE),
|
||||
errmsg("Uncorrected Error occurred at clean/free page. The error address is: 0x%llx. GaussDB will "
|
||||
"shutdown.",
|
||||
sigbus_addr)));
|
||||
pmdie(SIGBUS);
|
||||
}
|
||||
} else if (sigbus_addr == 0) {
|
||||
ereport(PANIC, (errmsg("SIGBUS signal received, sigbus_addr is None. Gaussdb will shut down immediately")));
|
||||
ereport(PANIC,
|
||||
(errcode(ERRCODE_UE_COMMON_ERROR),
|
||||
errmsg("SIGBUS signal received, sigbus_addr is None. Gaussdb will shut down immediately")));
|
||||
} else {
|
||||
ereport(PANIC,
|
||||
(errmsg(
|
||||
"SIGBUS signal received. The error address is: 0x%llx, Gaussdb will shut down immediately", sigbus_addr)));
|
||||
(errcode(ERRCODE_UE_COMMON_ERROR),
|
||||
errmsg("SIGBUS signal received. The error address is: 0x%llx, Gaussdb will shut down immediately",
|
||||
sigbus_addr)));
|
||||
}
|
||||
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
||||
}
|
||||
|
||||
void KillGraceThreads(void)
|
||||
|
||||
@ -109,7 +109,7 @@ void InitBufferPool(void)
|
||||
if (BBOX_BLACKLIST_SHARE_BUFFER) {
|
||||
bbox_blacklist_add(SHARED_BUFFER, t_thrd.storage_cxt.BufferBlocks, buffer_size);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The array used to sort to-be-checkpointed buffer ids is located in
|
||||
* shared memory, to avoid having to allocate significant amounts of
|
||||
|
||||
@ -244,4 +244,5 @@ extern uint64_t mc_timers_us(void);
|
||||
extern bool SetDBStateFileState(DbState state, bool optional);
|
||||
extern void GPCResetAll();
|
||||
extern void initRandomState(TimestampTz start_time, TimestampTz stop_time);
|
||||
extern void SIGBUS_handler(SIGNAL_ARGS);
|
||||
#endif /* _POSTMASTER_H */
|
||||
|
||||
Reference in New Issue
Block a user