【bugfix】修复按需回放redo阶段备机读业务同时触发reform,导主节点死锁,最后报错退出的问题

This commit is contained in:
congzhou2603
2024-09-14 09:18:33 +08:00
parent 92970291a6
commit c36364b1ba
3 changed files with 22 additions and 9 deletions

View File

@ -553,6 +553,9 @@ bool SSOndemandRequestPrimaryRedo(BufferTag tag)
return false;
} else if (SS_STANDBY_ONDEMAND_NORMAL || SS_PRIMARY_MODE) {
return true;
// retry after reform finish
} else if (SS_IN_REFORM) {
return false;
}
ereport(DEBUG1,

View File

@ -2171,6 +2171,16 @@ int CBOndemandRedoPageForStandby(void *block_key, int32 *redo_status)
return GS_SUCCESS;;
}
if (SS_IN_REFORM) {
ereport(WARNING, (errmodule(MOD_DMS),
errmsg("[SS][On-demand][%u/%u/%u/%d %d-%u] Reform happend when primary redo page for standby,"
"return ONDEMAND_REDO_FAIL.",
tag->rnode.spcNode, tag->rnode.dbNode,
tag->rnode.relNode, tag->rnode.bucketNode, tag->forkNum, tag->blockNum)));
*redo_status = ONDEMAND_REDO_FAIL;
return GS_SUCCESS;
}
Buffer buffer = InvalidBuffer;
uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount;
*redo_status = ONDEMAND_REDO_DONE;

View File

@ -6357,11 +6357,11 @@ retry:
* hold the content shared lock all the time, give the MES from the primary a chance to get it,
* and the timeout time of the primary and standby servers is modified to open the unlocking
* time window.
*/
*/
if (!dms_standby_retry_read && SS_STANDBY_MODE) {
dms_standby_retry_read = true;
mode = BUFFER_LOCK_EXCLUSIVE;
}
dms_standby_retry_read = true;
mode = BUFFER_LOCK_EXCLUSIVE;
}
pg_usleep(5000L);
goto retry;
} else if (dms_standby_retry_read) {
@ -6371,11 +6371,11 @@ retry:
*
* A good idea would be to add the ability to lock downgrade for LWLock.
*/
mode = origin_mode;
dms_standby_retry_read = false;
LWLockRelease(buf->content_lock);
goto retry;
}
mode = origin_mode;
dms_standby_retry_read = false;
LWLockRelease(buf->content_lock);
goto retry;
}
}
}