845 lines
33 KiB
C++
845 lines
33 KiB
C++
/*
|
|
* Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
*
|
|
* openGauss is licensed under Mulan PSL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
* You may obtain a copy of Mulan PSL v2 at:
|
|
*
|
|
* http://license.coscl.org.cn/MulanPSL2
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PSL v2 for more details.
|
|
* -------------------------------------------------------------------------
|
|
*
|
|
* segxlog.cpp
|
|
*
|
|
* IDENTIFICATION
|
|
* src/gausskernel/storage/smgr/segment/segxlog.cpp
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/xlog_basic.h"
|
|
#include "access/xlogproc.h"
|
|
#include "access/xlogutils.h"
|
|
#include "access/multi_redo_api.h"
|
|
#include "access/double_write.h"
|
|
#include "catalog/storage_xlog.h"
|
|
#include "commands/tablespace.h"
|
|
#include "executor/executor.h"
|
|
#include "storage/smgr/segment.h"
|
|
|
|
/*
|
|
* Truncate segment size
|
|
*/
|
|
static void redo_truncate(XLogReaderState *record)
|
|
{
|
|
RedoBufferInfo buffer_info;
|
|
XLogRedoAction redo_action = XLogReadBufferForRedo(record, 0, &buffer_info);
|
|
if (redo_action == BLK_NEEDS_REDO) {
|
|
BlockNumber nblocks = *(BlockNumber *)XLogRecGetBlockData(record, 0, NULL);
|
|
|
|
Page page = buffer_info.pageinfo.page;
|
|
SegmentHead *seg_head = (SegmentHead *)PageGetContents(page);
|
|
seg_head->nblocks = nblocks;
|
|
PageSetLSN(page, buffer_info.lsn);
|
|
|
|
SegMarkBufferDirty(buffer_info.buf);
|
|
}
|
|
|
|
if (BufferIsValid(buffer_info.buf)) {
|
|
SegUnlockReleaseBuffer(buffer_info.buf);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Move a list of buckets.
|
|
*/
|
|
static void redo_move_buckets(Buffer buffer, const char *data)
|
|
{
|
|
xl_seg_bktentry_tag_t *bktentry;
|
|
uint32 nentry;
|
|
|
|
nentry = *(uint32 *)data;
|
|
bktentry = (xl_seg_bktentry_tag_t *)(data + sizeof(uint32));
|
|
|
|
BktHeadMapBlock *mapblock = (BktHeadMapBlock *)PageGetContents(BufferGetPage(buffer));
|
|
for (uint32 i = 0; i < nentry; i++) {
|
|
uint32 mapentry_id = bktentry[i].bktentry_id;
|
|
mapblock->head_block[mapentry_id] = bktentry[i].bktentry_header;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Record redis info for bucket relation.
|
|
*/
|
|
static void redo_bucket_add_redisinfo(Buffer buffer, const char *data)
|
|
{
|
|
SegRedisInfo *redis_info = (SegRedisInfo *)data;
|
|
|
|
if (redis_info->nwords == 0) {
|
|
BufferDesc *buf_desc = GetBufferDescriptor(buffer - 1);
|
|
Relation reln = CreateFakeRelcacheEntry(buf_desc->tag.rnode);
|
|
|
|
RelationOpenSmgr(reln);
|
|
flush_all_buffers(reln, InvalidOid);
|
|
FreeFakeRelcacheEntry(reln);
|
|
}
|
|
|
|
BktMainHead *head = (BktMainHead *)PageGetContents(BufferGetPage(buffer));
|
|
head->redis_info = *redis_info;
|
|
}
|
|
|
|
static void redo_unset_bitmap(Buffer buffer, const char *data)
|
|
{
|
|
uint16 bitid = *(uint16 *)data;
|
|
data += sizeof(uint16);
|
|
|
|
df_map_page_t *log_map_page = (df_map_page_t *)data;
|
|
df_map_page_t *map_page = (df_map_page_t *)PageGetContents(BufferGetPage(buffer));
|
|
|
|
if (map_page->first_page != log_map_page->first_page) {
|
|
ereport(PANIC,
|
|
(errmsg("MapPage's first_page is not consistent, it's %u on disk but should be %u according to xlog",
|
|
map_page->first_page, log_map_page->first_page)));
|
|
}
|
|
|
|
if (DF_MAP_FREE(map_page->bitmap, bitid)) {
|
|
ereport(PANIC, (errmsg("Try to unset bitmap which is free: %u", bitid)));
|
|
}
|
|
map_page->dirty_last = log_map_page->dirty_last;
|
|
map_page->free_begin = log_map_page->free_begin;
|
|
map_page->free_bits = log_map_page->free_bits;
|
|
DF_MAP_UNSET(map_page->bitmap, bitid);
|
|
}
|
|
|
|
static void redo_set_bitmap(RedoBufferTag buftag, Buffer buffer, const char *data)
|
|
{
|
|
uint16 bitid = *(uint16 *)data;
|
|
data += sizeof(uint16);
|
|
|
|
df_map_page_t *log_map_page = (df_map_page_t *)data;
|
|
df_map_page_t *map_page = (df_map_page_t *)PageGetContents(BufferGetPage(buffer));
|
|
|
|
if (map_page->first_page != log_map_page->first_page) {
|
|
ereport(PANIC,
|
|
(errmsg("MapPage's first_page is not consistent, it's %u on disk but should be %u according to xlog",
|
|
map_page->first_page, log_map_page->first_page),
|
|
errhint("segment-page may have bug")));
|
|
}
|
|
if (DF_MAP_NOT_FREE(map_page->bitmap, bitid)) {
|
|
ereport(PANIC,
|
|
(errmsg("Try to set bitmap which is not free: %u", bitid), errhint("segment-page may have bug")));
|
|
}
|
|
map_page->dirty_last = log_map_page->dirty_last;
|
|
map_page->free_begin = log_map_page->free_begin;
|
|
map_page->free_bits = log_map_page->free_bits;
|
|
DF_MAP_SET(map_page->bitmap, bitid);
|
|
|
|
/* Extend data file if necessary. First get the extent start */
|
|
BlockNumber blkno = map_page->first_page + EXTENT_TYPE_TO_SIZE(buftag.rnode.relNode) * bitid;
|
|
/* Then plus one extent length */
|
|
BlockNumber target = blkno + EXTENT_TYPE_TO_SIZE(buftag.rnode.relNode);
|
|
SegSpace *spc = spc_open(buftag.rnode.spcNode, buftag.rnode.dbNode, false);
|
|
SegmentCheck(spc != NULL);
|
|
spc_extend_file(spc, buftag.rnode.relNode, buftag.forknum, target);
|
|
}
|
|
|
|
static void redo_maphead_allocated_extents(Buffer buffer, const char *data)
|
|
{
|
|
XLogDataSpaceAllocateExtent *xlog_data = (XLogDataSpaceAllocateExtent *)data;
|
|
df_map_head_t *map_head = (df_map_head_t *)PageGetContents(BufferGetPage(buffer));
|
|
|
|
map_head->allocated_extents = xlog_data->allocated_extents;
|
|
map_head->free_group = xlog_data->free_group;
|
|
map_head->groups[xlog_data->free_group].free_page = xlog_data->free_page;
|
|
map_head->high_water_mark = xlog_data->hwm;
|
|
}
|
|
|
|
static void redo_init_seghead(Buffer buffer, const char *data)
|
|
{
|
|
BufferDesc *buf_desc = GetBufferDescriptor(buffer - 1);
|
|
SegmentCheck(buf_desc->tag.rnode.relNode == EXTENT_SIZE_TO_TYPE(SEGMENT_HEAD_EXTENT_SIZE));
|
|
XLogRecPtr lsn = *(XLogRecPtr *)(data);
|
|
eg_init_segment_head_buffer_content(buffer, buf_desc->tag.blockNum, lsn);
|
|
}
|
|
|
|
static void redo_update_seghead(Buffer buffer, const char *data)
|
|
{
|
|
XLogDataUpdateSegmentHead *xlog_data = (XLogDataUpdateSegmentHead *)data;
|
|
|
|
SegmentHead *head = (SegmentHead *)PageGetContents(BufferGetPage(buffer));
|
|
|
|
if (xlog_data->nblocks != head->nblocks) {
|
|
ereport(PANIC,
|
|
(errmsg("redo update seghead, but target head's nblocks is %u, but should be %u according to xlog",
|
|
head->nblocks, xlog_data->nblocks),
|
|
errhint("segment-page may have bug")));
|
|
}
|
|
|
|
if (xlog_data->level0_slot >= 0) {
|
|
head->level0_slots[xlog_data->level0_slot] = xlog_data->level0_value;
|
|
}
|
|
if (xlog_data->level1_slot >= 0) {
|
|
head->level1_slots[xlog_data->level1_slot] = xlog_data->level1_value;
|
|
}
|
|
head->nextents = xlog_data->nextents;
|
|
head->total_blocks = xlog_data->total_blocks;
|
|
/* If all extents is freed, flush segment header to disk */
|
|
if (head->total_blocks == 0) {
|
|
SegmentCheck(head->nextents == 0);
|
|
FlushOneSegmentBuffer(buffer);
|
|
}
|
|
}
|
|
|
|
static void redo_new_level0_page(Buffer buffer, const char *data)
|
|
{
|
|
BlockNumber first_extent = *(BlockNumber *)data;
|
|
Page level0_page = BufferGetPage(buffer);
|
|
|
|
SegPageInit(level0_page, BLCKSZ);
|
|
PageHeader header = (PageHeader)level0_page;
|
|
header->pd_lower += sizeof(BMTLevel0Page);
|
|
|
|
BMTLevel0Page *bmt_level0_page = (BMTLevel0Page *)PageGetContents(level0_page);
|
|
bmt_level0_page->slots[0] = first_extent;
|
|
bmt_level0_page->magic = BMTLEVEL0_MAGIC;
|
|
}
|
|
|
|
static void redo_level0_page_add_extent(Buffer buffer, const char *data)
|
|
{
|
|
XLogDataSetLevel0Page *xlog_data = (XLogDataSetLevel0Page *)data;
|
|
|
|
BMTLevel0Page *bmt_level0_page = (BMTLevel0Page *)PageGetContents(BufferGetPage(buffer));
|
|
bmt_level0_page->slots[xlog_data->slot] = xlog_data->extent;
|
|
}
|
|
|
|
static void redo_seghead_add_fork_segment(Buffer buffer, const char *data)
|
|
{
|
|
int forknum = *(int *)data;
|
|
data += sizeof(int);
|
|
BlockNumber forkhead = *(BlockNumber *)data;
|
|
|
|
SegmentHead *seghead = (SegmentHead *)PageGetContents(BufferGetPage(buffer));
|
|
seghead->fork_head[forknum] = forkhead;
|
|
}
|
|
|
|
static void redo_unlink_seghead_ptr(Buffer buffer, const char *data)
|
|
{
|
|
off_t offset = *(off_t *)data;
|
|
|
|
BlockNumber *owner_pointer = (BlockNumber *)((char *)PageGetContents(BufferGetPage(buffer)) + offset);
|
|
*owner_pointer = InvalidBlockNumber;
|
|
}
|
|
|
|
static void redo_init_bucket_main_head(Buffer buffer, const char *data)
|
|
{
|
|
Page main_head_page = BufferGetPage(buffer);
|
|
SegPageInit(main_head_page, BLCKSZ);
|
|
((PageHeader)main_head_page)->pd_lower += sizeof(BktMainHead);
|
|
|
|
BktMainHead *main_head = (BktMainHead *)PageGetContents(main_head_page);
|
|
main_head->magic = BUCKET_SEGMENT_MAGIC;
|
|
main_head->lsn = *(XLogRecPtr *)data;
|
|
main_head->redis_info.redis_xid = InvalidTransactionId;
|
|
main_head->redis_info.nwords = 0;
|
|
}
|
|
|
|
static void redo_bucket_free_mapblock(Buffer buffer, const char *data)
|
|
{
|
|
uint32 map_id = *(uint32 *)data;
|
|
BktMainHead *main_head = (BktMainHead *)PageGetContents(BufferGetPage(buffer));
|
|
main_head->bkt_map[map_id] = InvalidBlockNumber;
|
|
}
|
|
|
|
static void redo_bucket_add_mapblock(Buffer buffer, const char *data)
|
|
{
|
|
uint32 blockid = *(uint32 *)data;
|
|
data += sizeof(uint32);
|
|
BlockNumber mapblock = *(BlockNumber *)data;
|
|
|
|
BktMainHead *main_head = (BktMainHead *)PageGetContents(BufferGetPage(buffer));
|
|
main_head->bkt_map[blockid] = mapblock;
|
|
}
|
|
|
|
static void redo_bucket_init_mapblock(Buffer buffer, const char *data)
|
|
{
|
|
XLogRecPtr lsn = *(XLogRecPtr *)data;
|
|
bucket_init_map_page(buffer, lsn);
|
|
}
|
|
|
|
static void redo_bucket_add_bkthead(Buffer buffer, const char *data)
|
|
{
|
|
int map_entry_id = *(int *)data;
|
|
data += sizeof(int);
|
|
BlockNumber head_block = *(BlockNumber *)data;
|
|
|
|
BktHeadMapBlock *map_block = (BktHeadMapBlock *)PageGetContents(BufferGetPage(buffer));
|
|
map_block->head_block[map_entry_id] = head_block;
|
|
}
|
|
|
|
static void redo_space_update_hwm(Buffer buffer, const char *data)
|
|
{
|
|
XLogDataUpdateSpaceHWM *xlog_data = (XLogDataUpdateSpaceHWM *)data;
|
|
df_map_head_t *map_head = (df_map_head_t *)PageGetContents(BufferGetPage(buffer));
|
|
if (map_head->high_water_mark != xlog_data->old_hwm) {
|
|
ereport(PANIC, (errmsg("update space high water mark, old hwm is %u, but should be %u according to xlog",
|
|
map_head->high_water_mark, xlog_data->old_hwm),
|
|
errhint("segment-page may have bug")));
|
|
}
|
|
if (map_head->group_count != xlog_data->old_groupcnt) {
|
|
ereport(PANIC, (errmsg("update map group count, old count is %u, but should be %u according to xlog",
|
|
map_head->group_count, xlog_data->old_groupcnt)));
|
|
}
|
|
map_head->high_water_mark = xlog_data->new_hwm;
|
|
map_head->group_count = xlog_data->new_groupcnt;
|
|
}
|
|
|
|
static void redo_set_inverse_pointer(Buffer buffer, const char *data)
|
|
{
|
|
uint32 offset = *(uint32 *)data;
|
|
data += sizeof(uint32);
|
|
ExtentInversePointer iptr = *(ExtentInversePointer *)data;
|
|
|
|
ExtentInversePointer *eips = (ExtentInversePointer *)PageGetContents(BufferGetBlock(buffer));
|
|
eips[offset] = iptr;
|
|
}
|
|
|
|
static void redo_shrink_seghead_update(Buffer buffer, const char *data)
|
|
{
|
|
XLogMoveExtent *xlog_data = (XLogMoveExtent *)data;
|
|
SegmentHead *seghead = (SegmentHead *)PageGetContents(BufferGetBlock(buffer));
|
|
int extent_id = xlog_data->extent_id;
|
|
|
|
if (extent_id < BMT_HEADER_LEVEL0_SLOTS) {
|
|
/* Level0 extent, needs updating segment head */
|
|
SegmentCheck(seghead->level0_slots[extent_id] == xlog_data->old_extent);
|
|
seghead->level0_slots[extent_id] = xlog_data->new_extent;
|
|
}
|
|
|
|
SEGMENTTEST(SEGMENT_REDO_UPDATE_SEGHEAD, (errmsg("error happens when replaying segment head update in shrink")));
|
|
}
|
|
|
|
#define REL_NODE_FORMAT(rnode) rnode.spcNode, rnode.dbNode, rnode.relNode, rnode.bucketNode
|
|
|
|
/* create hash table using shared memory when first needed */
|
|
struct HTAB* redo_create_remain_segs_htbl()
|
|
{
|
|
HASHCTL ctl;
|
|
|
|
errno_t errorno = memset_s(&ctl, sizeof(ctl), 0, sizeof(ctl));
|
|
securec_check(errorno, "", "");
|
|
|
|
ctl.keysize = sizeof(RemainExtentHashTag);
|
|
ctl.entrysize = sizeof(ExtentTag);
|
|
ctl.hash = tag_hash;
|
|
int flag = HASH_ELEM | HASH_FUNCTION;
|
|
|
|
return HeapMemInitHash("remain_segs", 1000, DF_MAP_GROUP_EXTENTS, &ctl, flag);
|
|
}
|
|
|
|
static void redo_xlog_log_alloc_seg(Buffer buffer, TransactionId xid)
|
|
{
|
|
AutoMutexLock remainSegsLock(&g_instance.xlog_cxt.remain_segs_lock);
|
|
remainSegsLock.lock();
|
|
|
|
Assert(TransactionIdIsValid(xid));
|
|
|
|
if (t_thrd.xlog_cxt.remain_segs == NULL) {
|
|
t_thrd.xlog_cxt.remain_segs = redo_create_remain_segs_htbl();
|
|
}
|
|
|
|
BufferDesc *bufDesc = GetBufferDescriptor(buffer - 1);
|
|
|
|
RemainExtentHashTag remainExtentHashTag;
|
|
remainExtentHashTag.rnode = bufDesc->tag.rnode;
|
|
remainExtentHashTag.rnode.relNode = bufDesc->tag.blockNum;
|
|
remainExtentHashTag.extentType = bufDesc->tag.rnode.relNode;
|
|
|
|
bool found = false;
|
|
ExtentTag* extentTag = (ExtentTag *)hash_search(t_thrd.xlog_cxt.remain_segs, (void *)&remainExtentHashTag,
|
|
HASH_ENTER, &found);
|
|
if (found) {
|
|
ereport(WARNING, (errmsg("Segment [%u, %u, %u, %d] already existed in remain segs, Xid %lu,"
|
|
"remainExtentType %u.", REL_NODE_FORMAT(remainExtentHashTag.rnode), extentTag->xid,
|
|
extentTag->remainExtentType)));
|
|
} else {
|
|
extentTag->remainExtentType = ALLOC_SEGMENT;
|
|
extentTag->xid = xid;
|
|
extentTag->forkNum = InvalidForkNumber;
|
|
extentTag->lsn = InvalidXLogRecPtr;
|
|
|
|
ereport(DEBUG5, (errmodule(MOD_SEGMENT_PAGE), errmsg("Segment [%u, %u, %u, %d] is alloced, cur xid %lu.",
|
|
REL_NODE_FORMAT(remainExtentHashTag.rnode), xid)));
|
|
}
|
|
remainSegsLock.unLock();
|
|
}
|
|
|
|
static void redo_xlog_forget_alloc_seg(Buffer buffer, const char* data, int data_len)
|
|
{
|
|
int first_part_data_len = sizeof(uint16) + sizeof(df_map_page_t);
|
|
if (data_len <= first_part_data_len) {
|
|
Assert(data_len == first_part_data_len);
|
|
return;
|
|
}
|
|
|
|
Assert(data_len == (sizeof(uint16) + sizeof(df_map_page_t) + sizeof(BlockNumber)));
|
|
BlockNumber* blk_num = (BlockNumber *)(data + sizeof(uint16) + sizeof(df_map_page_t));
|
|
BufferDesc *bufDesc = GetBufferDescriptor(buffer - 1);
|
|
|
|
RemainExtentHashTag remainExtentHashTag;
|
|
remainExtentHashTag.rnode = bufDesc->tag.rnode;
|
|
remainExtentHashTag.rnode.relNode = *blk_num;
|
|
remainExtentHashTag.extentType = bufDesc->tag.rnode.relNode;
|
|
|
|
AutoMutexLock remain_segs_lock(&g_instance.xlog_cxt.remain_segs_lock);
|
|
remain_segs_lock.lock();
|
|
if (t_thrd.xlog_cxt.remain_segs == NULL) {
|
|
t_thrd.xlog_cxt.remain_segs = redo_create_remain_segs_htbl();
|
|
}
|
|
|
|
bool found = false;
|
|
ExtentTag* extentTag = (ExtentTag *)hash_search(t_thrd.xlog_cxt.remain_segs, (void *)&(remainExtentHashTag),
|
|
HASH_REMOVE, &found);
|
|
if (found) {
|
|
ereport(DEBUG5, (errmodule(MOD_SEGMENT_PAGE), errmsg("Segment [%u, %u, %u, %d] is really freed after"
|
|
"drop trxn committed, xid %lu, remainExtentType %u.", REL_NODE_FORMAT(remainExtentHashTag.rnode),
|
|
extentTag->xid, extentTag->remainExtentType)));
|
|
} else {
|
|
ereport(DEBUG5, (errmodule(MOD_SEGMENT_PAGE), errmsg("Segment [%u, %u, %u, %d] is not found"
|
|
"in remain segs htbl.", REL_NODE_FORMAT(remainExtentHashTag.rnode))));
|
|
}
|
|
remain_segs_lock.unLock();
|
|
}
|
|
|
|
static void redo_xlog_log_shrink_extent(Buffer buffer, const char* data)
|
|
{
|
|
XLogMoveExtent *xlog_data = (XLogMoveExtent *)data;
|
|
SegmentCheck(xlog_data->old_extent != InvalidBlockNumber);
|
|
|
|
AutoMutexLock remain_segs_lock(&g_instance.xlog_cxt.remain_segs_lock);
|
|
remain_segs_lock.lock();
|
|
if (t_thrd.xlog_cxt.remain_segs == NULL) {
|
|
t_thrd.xlog_cxt.remain_segs = redo_create_remain_segs_htbl();
|
|
}
|
|
|
|
bool found = false;
|
|
|
|
RemainExtentHashTag remainExtentHashTag;
|
|
/* spcNode and dbNode are the same as logic rnode */
|
|
remainExtentHashTag.rnode = xlog_data->logic_rnode;
|
|
/* relNode is the old extent that may be leaked */
|
|
remainExtentHashTag.rnode.relNode = xlog_data->old_extent;
|
|
remainExtentHashTag.rnode.bucketNode = SegmentBktId;
|
|
/* extentType is calculated by extent id */
|
|
remainExtentHashTag.extentType = EXTENT_SIZE_TO_TYPE(ExtentSizeByCount(xlog_data->extent_id));
|
|
|
|
ExtentTag* extentTag = (ExtentTag *)hash_search(t_thrd.xlog_cxt.remain_segs, (void *)&remainExtentHashTag,
|
|
HASH_ENTER, &found);
|
|
if (found) {
|
|
ereport(WARNING, (errmsg("Segment [%u, %u, %u, %d] Extent %u should not be repeatedly founded, xid %lu,"
|
|
"remainExtentType %u", REL_NODE_FORMAT(remainExtentHashTag.rnode), xlog_data->old_extent, extentTag->xid,
|
|
extentTag->remainExtentType)));
|
|
} else {
|
|
extentTag->remainExtentType = SHRINK_EXTENT;
|
|
extentTag->forkNum = xlog_data->forknum;
|
|
|
|
extentTag->xid = InvalidTransactionId;
|
|
extentTag->lsn = InvalidXLogRecPtr;
|
|
ereport(DEBUG5, (errmodule(MOD_SEGMENT_PAGE), errmsg("Segment [%u, %u, %u, %d] Extent %u is replaced "
|
|
"during shrinking in shrink extents.", REL_NODE_FORMAT(remainExtentHashTag.rnode), xlog_data->old_extent)));
|
|
}
|
|
remain_segs_lock.unLock();
|
|
}
|
|
|
|
void redo_xlog_deal_alloc_seg(uint8 opCode, Buffer buffer, const char* data, int data_len, TransactionId xid)
|
|
{
|
|
if (opCode == SPCXLOG_INIT_SEGHEAD) {
|
|
unsigned char is_seg_head = *(unsigned char *)(data + sizeof(XLogRecPtr));
|
|
if (is_seg_head == 0) {
|
|
return;
|
|
}
|
|
redo_xlog_log_alloc_seg(buffer, xid);
|
|
} else if (opCode == SPCXLOG_INIT_BUCKET_HEAD) {
|
|
redo_xlog_log_alloc_seg(buffer, xid);
|
|
} else if (opCode == SPCXLOG_FREE_BITMAP) {
|
|
redo_xlog_forget_alloc_seg(buffer, data, data_len);
|
|
} else if (opCode == SPCXLOG_SHRINK_SEGHEAD_UPDATE) {
|
|
redo_xlog_log_shrink_extent(buffer, data);
|
|
}
|
|
}
|
|
|
|
void redo_atomic_xlog_dispatch(uint8 opCode, RedoBufferInfo *redo_buf, const char *data)
|
|
{
|
|
Buffer buffer = redo_buf->buf;
|
|
ereport(DEBUG5, (errmodule(MOD_SEGMENT_PAGE), errmsg("redo_atomic_xlog_dispatch opCode: %u", opCode)));
|
|
if (opCode == SPCXLOG_SET_BITMAP) {
|
|
redo_set_bitmap(redo_buf->blockinfo, buffer, data);
|
|
} else if (opCode == SPCXLOG_FREE_BITMAP) {
|
|
redo_unset_bitmap(buffer, data);
|
|
} else if (opCode == SPCXLOG_MAPHEAD_ALLOCATED_EXTENTS) {
|
|
redo_maphead_allocated_extents(buffer, data);
|
|
} else if (opCode == SPCXLOG_INIT_SEGHEAD) {
|
|
redo_init_seghead(buffer, data);
|
|
} else if (opCode == SPCXLOG_UPDATE_SEGHEAD) {
|
|
redo_update_seghead(buffer, data);
|
|
} else if (opCode == SPCXLOG_NEW_LEVEL0_PAGE) {
|
|
redo_new_level0_page(buffer, data);
|
|
} else if (opCode == SPCXLOG_LEVEL0_PAGE_ADD_EXTENT) {
|
|
redo_level0_page_add_extent(buffer, data);
|
|
} else if (opCode == SPCXLOG_SEGHEAD_ADD_FORK_SEGMENT) {
|
|
redo_seghead_add_fork_segment(buffer, data);
|
|
} else if (opCode == SPCXLOG_UNLINK_SEGHEAD_PTR) {
|
|
redo_unlink_seghead_ptr(buffer, data);
|
|
} else if (opCode == SPCXLOG_INIT_BUCKET_HEAD) {
|
|
redo_init_bucket_main_head(buffer, data);
|
|
} else if (opCode == SPCXLOG_BUCKET_FREE_MAPBLOCK) {
|
|
redo_bucket_free_mapblock(buffer, data);
|
|
} else if (opCode == SPCXLOG_BUCKET_ADD_MAPBLOCK) {
|
|
redo_bucket_add_mapblock(buffer, data);
|
|
} else if (opCode == SPCXLOG_BUCKET_INIT_MAPBLOCK) {
|
|
redo_bucket_init_mapblock(buffer, data);
|
|
} else if (opCode == SPCXLOG_BUCKET_ADD_BKTHEAD) {
|
|
redo_bucket_add_bkthead(buffer, data);
|
|
} else if (opCode == SPCXLOG_SPACE_UPDATE_HWM) {
|
|
redo_space_update_hwm(buffer, data);
|
|
} else if (opCode == SPCXLOG_SET_INVERSE_POINTER) {
|
|
redo_set_inverse_pointer(buffer, data);
|
|
} else if (opCode == SPCXLOG_SEG_MOVE_BUCKETS) {
|
|
redo_move_buckets(buffer, data);
|
|
} else if (opCode == SPCXLOG_BUCKET_ADD_REDISINFO) {
|
|
redo_bucket_add_redisinfo(buffer, data);
|
|
} else {
|
|
SegmentCheck(opCode == SPCXLOG_SHRINK_SEGHEAD_UPDATE);
|
|
redo_shrink_seghead_update(buffer, data);
|
|
}
|
|
}
|
|
|
|
void move_extent_flush_buffer(XLogMoveExtent *xlog_data)
|
|
{
|
|
BlockNumber logic_start = ExtentIdToLogicBlockNum(xlog_data->extent_id);
|
|
for (int i=0; i<ExtentSizeByCount(xlog_data->extent_id); i++) {
|
|
BlockNumber blk = logic_start + i;
|
|
if (blk >= xlog_data->nblocks) {
|
|
break;
|
|
}
|
|
|
|
Buffer buffer = try_get_moved_pagebuf(&xlog_data->logic_rnode, xlog_data->forknum, blk);
|
|
if (BufferIsValid(buffer)) {
|
|
BlockNumber old_seg_blockno = xlog_data->old_extent + i;
|
|
BlockNumber new_seg_blockno = xlog_data->new_extent + i;
|
|
|
|
BufferDesc *buf_desc = BufferGetBufferDescriptor(buffer);
|
|
if (buf_desc->extra->seg_blockno == old_seg_blockno) {
|
|
uint32 buf_state = LockBufHdr(buf_desc);
|
|
if (buf_state & BM_DIRTY) {
|
|
/* spin-lock should be released before IO */
|
|
UnlockBufHdr(buf_desc, buf_state);
|
|
|
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
|
/* Flush data to the old block */
|
|
FlushOneBufferIncludeDW(buf_desc);
|
|
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
|
} else {
|
|
UnlockBufHdr(buf_desc, buf_state);
|
|
}
|
|
|
|
/* It's dirty, but we must unpin buffer before InvalidateBuffer */
|
|
UnpinBuffer(buf_desc, true);
|
|
buf_state = LockBufHdr(buf_desc);
|
|
if (RelFileNodeEquals(buf_desc->tag.rnode, xlog_data->logic_rnode) &&
|
|
buf_desc->tag.forkNum == xlog_data->forknum && buf_desc->tag.blockNum == blk) {
|
|
InvalidateBuffer(buf_desc);
|
|
} else {
|
|
UnlockBufHdr(buf_desc, buf_state);
|
|
}
|
|
} else {
|
|
/* Get here only because standby read after we modifiy the segment head */
|
|
SegmentCheck(buf_desc->extra->seg_blockno == new_seg_blockno);
|
|
UnpinBuffer(buf_desc, true);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void redo_atomic_xlog(XLogReaderState *record)
|
|
{
|
|
int nbuffers = *(int *)XLogRecGetData(record);
|
|
Buffer buffers[XLR_MAX_BLOCK_ID];
|
|
bool is_need_log_remain_segs = IsNeedLogRemainSegs(record->EndRecPtr);
|
|
|
|
XLogMoveExtent move_extent_xlog;
|
|
bool need_flush_buffer_for_shrink = false;
|
|
|
|
for (int i = 0; i < nbuffers; i++) {
|
|
RedoBufferInfo redo_buf;
|
|
|
|
// If WILL_INT flag is set, force to redo.
|
|
bool will_init = record->blocks[i].flags & BKPBLOCK_WILL_INIT;
|
|
XLogRedoAction redo_action;
|
|
if (will_init) {
|
|
XLogInitBufferForRedo(record, i, &redo_buf);
|
|
/*
|
|
* If tablespace is dropped, XLogInitBufferForRedo will return an invalid buffer.
|
|
* We do not make a directoy in the place where the tablespace symlink would be like
|
|
* heap-disk storage, otherwise space metadata block (like MapHead block) may be
|
|
* inconsistent.
|
|
*/
|
|
redo_action = BufferIsValid(redo_buf.buf) ? BLK_NEEDS_REDO : BLK_NOTFOUND;
|
|
} else {
|
|
redo_action = XLogReadBufferForRedo(record, i, &redo_buf);
|
|
}
|
|
buffers[i] = redo_buf.buf;
|
|
|
|
Size len = 0;
|
|
char *data = XLogRecGetBlockData(record, i, &len);
|
|
SegmentCheck(len != 0);
|
|
DecodedXLogBlockOp decoded_op = XLogAtomicDecodeBlockData(data, len);
|
|
|
|
if (redo_action == BLK_NEEDS_REDO) {
|
|
for (int j = 0; j < decoded_op.operations; j++) {
|
|
redo_atomic_xlog_dispatch(decoded_op.op[j], &redo_buf, decoded_op.data[j]);
|
|
}
|
|
|
|
PageSetLSN(redo_buf.pageinfo.page, redo_buf.lsn);
|
|
SegMarkBufferDirty(redo_buf.buf);
|
|
}
|
|
|
|
for (int j = 0; j < decoded_op.operations; j++) {
|
|
if (decoded_op.op[j] == SPCXLOG_SHRINK_SEGHEAD_UPDATE) {
|
|
need_flush_buffer_for_shrink = true;
|
|
move_extent_xlog = *(XLogMoveExtent *)(decoded_op.data[j]);
|
|
}
|
|
|
|
if (is_need_log_remain_segs) {
|
|
redo_xlog_deal_alloc_seg(decoded_op.op[j], redo_buf.buf,
|
|
decoded_op.data[j], decoded_op.data_len[j],
|
|
XLogRecGetXid(record));
|
|
}
|
|
}
|
|
}
|
|
|
|
SEGMENTTEST(SEGMENT_REPLAY_ATOMIC_OP, (errmsg("error happens during replaying atomic xlog")));
|
|
|
|
for (int i = 0; i < nbuffers; i++) {
|
|
if (BufferIsValid(buffers[i])) {
|
|
SegUnlockReleaseBuffer(buffers[i]);
|
|
}
|
|
}
|
|
|
|
/* We must handle buffer flush after releasing the segment head buffer to avoid dead lock with standby-read */
|
|
if (need_flush_buffer_for_shrink) {
|
|
SEGMENTTEST(SEGMENT_FLUSH_MOVED_EXTENT_BUFFER, (errmsg("error happens just before flush moved extent buffer")));
|
|
move_extent_flush_buffer(&move_extent_xlog);
|
|
}
|
|
}
|
|
|
|
static void redo_seghead_extend(XLogReaderState *record)
|
|
{
|
|
RedoBufferInfo redo_buf;
|
|
XLogRedoAction redo_action = XLogReadBufferForRedo(record, 0, &redo_buf);
|
|
if (redo_action == BLK_NEEDS_REDO) {
|
|
char *data = XLogRecGetBlockData(record, 0, NULL);
|
|
XLogDataSegmentExtend *xlog_data = (XLogDataSegmentExtend *)data;
|
|
|
|
SegmentHead *seghead = (SegmentHead *)PageGetContents(redo_buf.pageinfo.page);
|
|
if (seghead->nblocks != xlog_data->old_nblocks) {
|
|
ereport(PANIC, (errmsg("data inconsistent when redo seghead_extend, nblocks is %u on disk, but should be "
|
|
"%u according to xlog",
|
|
seghead->nblocks, xlog_data->old_nblocks)));
|
|
}
|
|
seghead->nblocks = xlog_data->new_nblocks;
|
|
|
|
PageSetLSN(redo_buf.pageinfo.page, redo_buf.lsn);
|
|
SegMarkBufferDirty(redo_buf.buf);
|
|
}
|
|
|
|
if (BufferIsValid(redo_buf.buf)) {
|
|
SegUnlockReleaseBuffer(redo_buf.buf);
|
|
}
|
|
|
|
XLogInitBufferForRedo(record, 1, &redo_buf);
|
|
if (BufferIsValid(redo_buf.buf)) {
|
|
memset_s(redo_buf.pageinfo.page, BLCKSZ, 0, BLCKSZ);
|
|
PageSetLSN(redo_buf.pageinfo.page, redo_buf.lsn);
|
|
MarkBufferDirty(redo_buf.buf);
|
|
UnlockReleaseBuffer(redo_buf.buf);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Ensure this function is executed alone.
|
|
*/
|
|
static void redo_create_extent_group(XLogReaderState *record)
|
|
{
|
|
char *data = XLogRecGetData(record);
|
|
RelFileNode *rnode = (RelFileNode *)data;
|
|
ForkNumber forknum = *(ForkNumber *)(data + sizeof(RelFileNode));
|
|
|
|
/* Create tablespace directory on the standby */
|
|
TablespaceCreateDbspace(rnode->spcNode, rnode->dbNode, true);
|
|
|
|
/* Create SegSpace object in memory */
|
|
SegSpace *spc = spc_init_space_node(rnode->spcNode, rnode->dbNode);
|
|
|
|
eg_init_data_files(&spc->extent_group[EXTENT_TYPE_TO_GROUPID(rnode->relNode)][forknum], true, record->EndRecPtr);
|
|
}
|
|
|
|
static void redo_init_map_page(XLogReaderState *record)
|
|
{
|
|
RedoBufferInfo redo_buf;
|
|
XLogInitBufferForRedo(record, 0, &redo_buf);
|
|
BlockNumber first_page = *(BlockNumber *)XLogRecGetData(record);
|
|
|
|
eg_init_bitmap_page_content(redo_buf.pageinfo.page, first_page);
|
|
PageSetLSN(redo_buf.pageinfo.page, redo_buf.lsn);
|
|
SegMarkBufferDirty(redo_buf.buf);
|
|
SegUnlockReleaseBuffer(redo_buf.buf);
|
|
}
|
|
|
|
static void redo_init_inverse_point_page(XLogReaderState *record)
|
|
{
|
|
RedoBufferInfo redo_buf;
|
|
XLogInitBufferForRedo(record, 0, &redo_buf);
|
|
SegPageInit(redo_buf.pageinfo.page, BLCKSZ);
|
|
PageSetLSN(redo_buf.pageinfo.page, redo_buf.lsn);
|
|
SegMarkBufferDirty(redo_buf.buf);
|
|
SegUnlockReleaseBuffer(redo_buf.buf);
|
|
}
|
|
|
|
static void redo_add_new_group(XLogReaderState *record)
|
|
{
|
|
RedoBufferInfo redo_buf;
|
|
XLogRedoAction redo_action = XLogReadBufferForRedo(record, 0, &redo_buf);
|
|
|
|
if (redo_action == BLK_NEEDS_REDO) {
|
|
xl_new_map_group_info_t *new_map_group = (xl_new_map_group_info_t *)XLogRecGetData(record);
|
|
|
|
df_map_head_t *map_head = (df_map_head_t *)PageGetContents(redo_buf.pageinfo.page);
|
|
map_head->group_count = new_map_group->group_count;
|
|
SegmentCheck(map_head->group_count > 0);
|
|
|
|
df_map_group_t *map_group = &map_head->groups[map_head->group_count - 1];
|
|
map_group->first_map = new_map_group->first_map_pageno;
|
|
map_group->page_count = new_map_group->group_size;
|
|
map_group->free_page = 0;
|
|
PageSetLSN(redo_buf.pageinfo.page, redo_buf.lsn);
|
|
SegMarkBufferDirty(redo_buf.buf);
|
|
}
|
|
|
|
if (BufferIsValid(redo_buf.buf)) {
|
|
SegUnlockReleaseBuffer(redo_buf.buf);
|
|
}
|
|
}
|
|
|
|
static void redo_space_shrink(XLogReaderState *record)
|
|
{
|
|
XLogDataSpaceShrink *xlog_data = (XLogDataSpaceShrink *)XLogRecGetData(record);
|
|
SegSpace *spc = spc_open(xlog_data->rnode.spcNode, xlog_data->rnode.dbNode, false);
|
|
if (spc_status(spc) == SpaceDataFileStatus::EMPTY) {
|
|
ereport(LOG, (errmsg("redo space shrink, target space <%u, %u, %u> does not exist", xlog_data->rnode.spcNode,
|
|
xlog_data->rnode.dbNode, xlog_data->rnode.relNode)));
|
|
return;
|
|
}
|
|
SegExtentGroup *seg = &spc->extent_group[EXTENT_TYPE_TO_GROUPID(xlog_data->rnode.relNode)][xlog_data->forknum];
|
|
|
|
char *path = relpathperm(xlog_data->rnode, xlog_data->forknum);
|
|
ereport(LOG, (errmsg("call space shrink files, filename: %s, xlog lsn: %lX", path, record->EndRecPtr)));
|
|
pfree(path);
|
|
spc_shrink_files(seg, xlog_data->target_size, true);
|
|
|
|
/* forget metadata buffer that uses physical block number */
|
|
XLogTruncateRelation(seg->rnode, seg->forknum, xlog_data->target_size);
|
|
/* forget data buffer that uses logical block number */
|
|
XLogTruncateSegmentSpace(seg->rnode, seg->forknum, xlog_data->target_size);
|
|
}
|
|
|
|
static void redo_space_drop(XLogReaderState *record)
|
|
{
|
|
char *data = (char *)XLogRecGetData(record);
|
|
Oid spcNode = *(Oid *)data;
|
|
Oid dbNode = *(Oid *)(data + sizeof(Oid));
|
|
spc_drop(spcNode, dbNode, true);
|
|
XLogDropSegmentSpace(spcNode, dbNode);
|
|
}
|
|
|
|
void seg_redo_new_page_copy_and_flush(BufferTag *tag, char *data, XLogRecPtr lsn)
|
|
{
|
|
char page[BLCKSZ] __attribute__((__aligned__(ALIGNOF_BUFFER))) = {0};
|
|
|
|
errno_t er = memcpy_s(page, BLCKSZ, data, BLCKSZ);
|
|
securec_check(er, "\0", "\0");
|
|
|
|
PageSetLSN(page, lsn);
|
|
PageSetChecksumInplace(page, tag->blockNum);
|
|
|
|
if (FORCE_FINISH_ENABLED) {
|
|
update_max_page_flush_lsn(lsn, t_thrd.proc_cxt.MyProcPid, false);
|
|
}
|
|
|
|
if (dw_enabled() && pg_atomic_read_u32(&g_instance.ckpt_cxt_ctl->current_page_writer_count) > 0) {
|
|
bool flush_old_file = false;
|
|
uint16 pos = seg_dw_single_flush_without_buffer(*tag, (Block)page, &flush_old_file);
|
|
t_thrd.proc->dw_pos = pos;
|
|
t_thrd.proc->flush_new_dw = !flush_old_file;
|
|
SegSpace *spc = spc_open(tag->rnode.spcNode, tag->rnode.dbNode, false);
|
|
SegmentCheck(spc != NULL);
|
|
seg_physical_write(spc, tag->rnode, tag->forkNum, tag->blockNum, page, true);
|
|
if (flush_old_file) {
|
|
g_instance.dw_single_cxt.recovery_buf.single_flush_state[pos] = true;
|
|
} else {
|
|
g_instance.dw_single_cxt.single_flush_state[pos] = true;
|
|
}
|
|
t_thrd.proc->dw_pos = -1;
|
|
} else {
|
|
SegSpace *spc = spc_open(tag->rnode.spcNode, tag->rnode.dbNode, false);
|
|
SegmentCheck(spc != NULL);
|
|
seg_physical_write(spc, tag->rnode, tag->forkNum, tag->blockNum, page, true);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* This xlog only copy data to the new block, without modifying data in buffer. If the logic block being in the
|
|
* buffer pool, its pblk points to the old block. The buffer descriptor can not have the logic blocknumber and the new
|
|
* physical block number because we do not know whether we should use the old or thew new physical block for the same
|
|
* logic block, as later segment head modification can either success or fail.
|
|
*/
|
|
static void redo_new_page(XLogReaderState *record)
|
|
{
|
|
Assert(record != NULL);
|
|
BufferTag *tag = (BufferTag *)XLogRecGetData(record);
|
|
seg_redo_new_page_copy_and_flush(tag, (char *)XLogRecGetData(record) + sizeof(BufferTag), record->EndRecPtr);
|
|
}
|
|
|
|
void segpage_smgr_redo(XLogReaderState *record)
|
|
{
|
|
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
|
if (info == XLOG_SEG_ATOMIC_OPERATION) {
|
|
redo_atomic_xlog(record);
|
|
} else if (info == XLOG_SEG_SEGMENT_EXTEND) {
|
|
redo_seghead_extend(record);
|
|
} else if (info == XLOG_SEG_CREATE_EXTENT_GROUP) {
|
|
redo_create_extent_group(record);
|
|
} else if (info == XLOG_SEG_INIT_MAPPAGE) {
|
|
redo_init_map_page(record);
|
|
} else if (info == XLOG_SEG_INIT_INVRSPTR_PAGE) {
|
|
redo_init_inverse_point_page(record);
|
|
} else if (info == XLOG_SEG_ADD_NEW_GROUP) {
|
|
redo_add_new_group(record);
|
|
} else if (info == XLOG_SEG_TRUNCATE) {
|
|
redo_truncate(record);
|
|
} else if (info == XLOG_SEG_SPACE_SHRINK) {
|
|
redo_space_shrink(record);
|
|
} else if (info == XLOG_SEG_SPACE_DROP) {
|
|
redo_space_drop(record);
|
|
} else if (info == XLOG_SEG_NEW_PAGE) {
|
|
redo_new_page(record);
|
|
} else {
|
|
ereport(PANIC, (errmsg("smgr_redo: unknown op code %u", info)));
|
|
}
|
|
}
|