387 lines
14 KiB
C
387 lines
14 KiB
C
/*
|
|
* Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
*
|
|
* openGauss is licensed under Mulan PSL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
* You may obtain a copy of Mulan PSL v2 at:
|
|
*
|
|
* http://license.coscl.org.cn/MulanPSL2
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PSL v2 for more details.
|
|
* ---------------------------------------------------------------------------------------
|
|
*
|
|
* double_write.h
|
|
* Define some inline function of double write and export some interfaces.
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/include/access/double_write.h
|
|
*
|
|
* ---------------------------------------------------------------------------------------
|
|
*/
|
|
#ifndef DOUBLE_WRITE_H
|
|
#define DOUBLE_WRITE_H
|
|
|
|
#include "double_write_basic.h"
|
|
#include "storage/buf/buf_internals.h"
|
|
#include "storage/checksum_impl.h"
|
|
|
|
typedef enum BufTagVer {
|
|
ORIGIN_TAG = 0,
|
|
HASHBUCKET_TAG,
|
|
PAGE_COMPRESS_TAG
|
|
} BufTagVer;
|
|
|
|
typedef struct st_dw_batch {
|
|
dw_page_head_t head;
|
|
uint16 page_num; /* for batch head, number of data pages */
|
|
uint16 buftag_ver;
|
|
BufferTag buf_tag[0]; /* to locate the data pages in batch */
|
|
} dw_batch_t;
|
|
|
|
typedef struct st_dw_batch_nohbkt {
|
|
dw_page_head_t head;
|
|
uint16 page_num; /* for batch head, number of data pages */
|
|
uint16 buftag_ver;
|
|
BufferTagFirstVer buf_tag[0]; /* to locate the data pages in batch */
|
|
} dw_batch_first_ver;
|
|
|
|
typedef struct dw_single_first_flush_item {
|
|
uint16 dwn; /* double write number, updated when file header changed */
|
|
BufferTag buf_tag;
|
|
}dw_first_flush_item;
|
|
|
|
|
|
typedef struct dw_single_flush_item {
|
|
uint16 data_page_idx; /* from zero start, indicates the slot of the data page. */
|
|
uint16 dwn; /* double write number, updated when file header changed */
|
|
BufferTag buf_tag;
|
|
pg_crc32c crc; /* CRC of all above ... MUST BE LAST! */
|
|
}dw_single_flush_item;
|
|
|
|
/* Used by double_write to mark the buffers which are not flushed in the given buf_id array. */
|
|
static const int DW_INVALID_BUFFER_ID = -1;
|
|
/* steal high bit from pagenum as the flag of hashbucket or segpage */
|
|
#define IS_HASH_BKT_SEGPAGE_MASK (0x8000)
|
|
#define GET_REL_PGAENUM(pagenum) (pagenum & ~IS_HASH_BKT_SEGPAGE_MASK)
|
|
|
|
/**
|
|
* Dirty data pages in one batch
|
|
* The number of data pages depends on the number of BufferTag one page can hold
|
|
*/
|
|
static const uint16 DW_BATCH_DATA_PAGE_MAX =
|
|
(uint16)((BLCKSZ - sizeof(dw_batch_t) - sizeof(dw_page_tail_t)) / sizeof(BufferTag));
|
|
|
|
static const uint16 DW_BATCH_DATA_PAGE_MAX_FOR_NOHBK =
|
|
(uint16)((BLCKSZ - sizeof(dw_batch_first_ver) - sizeof(dw_page_tail_t)) / sizeof(BufferTagFirstVer));
|
|
|
|
/* 1 head + data + 1 tail */
|
|
static const uint16 DW_EXTRA_FOR_ONE_BATCH = 2;
|
|
|
|
/* 1 head + data + [1 tail, 2 head] + data + 2 tail */
|
|
static const uint16 DW_EXTRA_FOR_TWO_BATCH = 3;
|
|
|
|
static const uint16 DW_BATCH_MIN = (1 + DW_EXTRA_FOR_ONE_BATCH);
|
|
|
|
static const uint16 DW_BATCH_MAX = (DW_BATCH_DATA_PAGE_MAX + DW_EXTRA_FOR_ONE_BATCH);
|
|
|
|
/* 2 batches at most for one perform */
|
|
static const uint16 DW_DIRTY_PAGE_MAX = (DW_BATCH_DATA_PAGE_MAX + DW_BATCH_DATA_PAGE_MAX);
|
|
|
|
static const uint16 DW_BUF_MAX = (DW_DIRTY_PAGE_MAX + DW_EXTRA_FOR_TWO_BATCH);
|
|
|
|
static const uint16 DW_BATCH_MAX_FOR_NOHBK = (DW_BATCH_DATA_PAGE_MAX_FOR_NOHBK + DW_EXTRA_FOR_ONE_BATCH);
|
|
|
|
/* 2 batches at most for one perform */
|
|
static const uint16 DW_DIRTY_PAGE_MAX_FOR_NOHBK = (DW_BATCH_DATA_PAGE_MAX_FOR_NOHBK + DW_BATCH_DATA_PAGE_MAX_FOR_NOHBK);
|
|
|
|
static const uint16 DW_BUF_MAX_FOR_NOHBK = (DW_DIRTY_PAGE_MAX_FOR_NOHBK + DW_EXTRA_FOR_TWO_BATCH);
|
|
|
|
|
|
#define GET_DW_BATCH_DATA_PAGE_MAX(contain_hashbucket) (!contain_hashbucket ? DW_BATCH_DATA_PAGE_MAX_FOR_NOHBK : DW_BATCH_DATA_PAGE_MAX)
|
|
|
|
#define GET_DW_BATCH_MAX(contain_hashbucket) (!contain_hashbucket ? DW_BATCH_MAX_FOR_NOHBK : DW_BATCH_MAX)
|
|
|
|
|
|
#define GET_DW_DIRTY_PAGE_MAX(contain_hashbucket) (!contain_hashbucket ? DW_DIRTY_PAGE_MAX_FOR_NOHBK : DW_DIRTY_PAGE_MAX)
|
|
|
|
#define GET_DW_MEM_CTX_MAX_BLOCK_SIZE(contain_hashbucket) (!contain_hashbucket ? DW_MEM_CTX_MAX_BLOCK_SIZE_FOR_NOHBK : DW_MEM_CTX_MAX_BLOCK_SIZE)
|
|
|
|
/*
|
|
* 1 block for alignment, 1 for file_head, 1 for reading data_page during recovery
|
|
* and DW_BUF_MAX for double_write buffer.
|
|
*/
|
|
static const uint32 DW_MEM_CTX_MAX_BLOCK_SIZE = ((1 + 1 + 1 + DW_BUF_MAX) * BLCKSZ);
|
|
|
|
static const uint32 DW_MEM_CTX_MAX_BLOCK_SIZE_FOR_NOHBK = ((1 + 1 + 1 + DW_BUF_MAX_FOR_NOHBK) * BLCKSZ);
|
|
|
|
const uint16 SINGLE_BLOCK_TAG_NUM = BLCKSZ / sizeof(dw_single_flush_item);
|
|
|
|
static const uint32 DW_BOOTSTRAP_VERSION = 91261;
|
|
const uint32 DW_SUPPORT_SINGLE_FLUSH_VERSION = 92266;
|
|
const uint32 DW_SUPPORT_NEW_SINGLE_FLUSH = 92433;
|
|
const uint32 DW_SUPPORT_MULTIFILE_FLUSH = 92568;
|
|
const uint32 DW_SUPPORT_BCM_VERSION = 92550;
|
|
const uint32 DW_SUPPORT_REABLE_DOUBLE_WRITE = 92590;
|
|
|
|
|
|
/* dw single flush file information, version is DW_SUPPORT_SINGLE_FLUSH_VERSION */
|
|
/* file head + storage buffer tag page + data page */
|
|
const int DW_SINGLE_FILE_SIZE = (1 + 161 + 32768) * BLCKSZ;
|
|
|
|
/* Reserve 8 bytes for bufferTag upgrade. now usepage num is 32768 * sizeof(dw_single_flush_item) / 8192 */
|
|
const int DW_SINGLE_BUFTAG_PAGE_NUM = 161;
|
|
const int DW_SINGLE_DIRTY_PAGE_NUM = 32768;
|
|
|
|
|
|
/* new dw single flush file, version is DW_SUPPORT_NEW_SINGLE_FLUSH */
|
|
/* file head + first version data page + file head + storage buffer tag page + second version data page */
|
|
const uint32 DW_NEW_SINGLE_FILE_SIZE = (32768 * BLCKSZ);
|
|
const uint16 DW_SECOND_BUFTAG_PAGE_NUM = 4;
|
|
const uint16 DW_SECOND_DATA_PAGE_NUM = (SINGLE_BLOCK_TAG_NUM * DW_SECOND_BUFTAG_PAGE_NUM);
|
|
const uint16 DW_FIRST_DATA_PAGE_NUM = (32768 - DW_SECOND_DATA_PAGE_NUM - DW_SECOND_BUFTAG_PAGE_NUM - 2);
|
|
const uint16 DW_SECOND_BUFTAG_START_IDX = 1 + DW_FIRST_DATA_PAGE_NUM + 1; /* two head */
|
|
const uint16 DW_SECOND_DATA_START_IDX = DW_SECOND_BUFTAG_START_IDX + DW_SECOND_BUFTAG_PAGE_NUM;
|
|
|
|
inline bool dw_buf_valid_dirty(uint32 buf_state)
|
|
{
|
|
if (ENABLE_DMS && ENABLE_DSS_AIO) {
|
|
return true;
|
|
}
|
|
|
|
return ((buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY));
|
|
}
|
|
|
|
inline bool dw_buf_ckpt_needed(uint32 buf_state)
|
|
{
|
|
return ((buf_state & (BM_VALID | BM_DIRTY | BM_CHECKPOINT_NEEDED)) == (BM_VALID | BM_DIRTY | BM_CHECKPOINT_NEEDED));
|
|
}
|
|
|
|
inline bool dw_verify_file_head_checksum(dw_file_head_t* file_head)
|
|
{
|
|
uint32 checksum;
|
|
uint16 org_cks = file_head->tail.checksum;
|
|
|
|
file_head->tail.checksum = 0;
|
|
checksum = pg_checksum_block((char*)file_head, sizeof(dw_file_head_t));
|
|
file_head->tail.checksum = org_cks;
|
|
|
|
return (org_cks == REDUCE_CKS2UINT16(checksum));
|
|
}
|
|
|
|
inline bool dw_verify_file_head(dw_file_head_t* file_head)
|
|
{
|
|
return file_head->head.dwn == file_head->tail.dwn && dw_verify_file_head_checksum(file_head);
|
|
}
|
|
|
|
inline void dw_calc_meta_checksum(dw_batch_meta_file* meta)
|
|
{
|
|
uint32 checksum;
|
|
meta->checksum = 0;
|
|
checksum = pg_checksum_block((char*)meta, sizeof(dw_batch_meta_file));
|
|
meta->checksum = REDUCE_CKS2UINT16(checksum);
|
|
}
|
|
|
|
|
|
inline void dw_calc_file_head_checksum(dw_file_head_t* file_head)
|
|
{
|
|
uint32 checksum;
|
|
file_head->tail.checksum = 0;
|
|
checksum = pg_checksum_block((char*)file_head, sizeof(dw_file_head_t));
|
|
file_head->tail.checksum = REDUCE_CKS2UINT16(checksum);
|
|
}
|
|
|
|
inline bool dw_verify_batch_checksum(dw_batch_t* batch)
|
|
{
|
|
uint32 checksum;
|
|
uint16 org_cks = DW_PAGE_CHECKSUM(batch);
|
|
|
|
DW_PAGE_CHECKSUM(batch) = 0;
|
|
checksum = pg_checksum_block((char*)batch, BLCKSZ);
|
|
DW_PAGE_CHECKSUM(batch) = org_cks;
|
|
|
|
return (org_cks == REDUCE_CKS2UINT16(checksum));
|
|
}
|
|
|
|
inline bool dw_verify_page(dw_batch_t* page)
|
|
{
|
|
return (page)->head.dwn == DW_PAGE_TAIL(page)->dwn && dw_verify_batch_checksum(page);
|
|
}
|
|
|
|
inline void dw_calc_batch_checksum(dw_batch_t* batch)
|
|
{
|
|
uint32 checksum;
|
|
|
|
DW_PAGE_CHECKSUM(batch) = 0;
|
|
checksum = pg_checksum_block((char*)batch, BLCKSZ);
|
|
DW_PAGE_CHECKSUM(batch) = REDUCE_CKS2UINT16(checksum);
|
|
}
|
|
|
|
inline dw_batch_t* dw_batch_tail_page(dw_batch_t* head_page)
|
|
{
|
|
return (dw_batch_t*)((char*)head_page + BLCKSZ * (GET_REL_PGAENUM(head_page->page_num) + 1));
|
|
}
|
|
|
|
/**
|
|
* verify the batch head and tail page, including dwn and checksum
|
|
* @param head_page batch head
|
|
* @param dwn double write number
|
|
* @return true dwn and checksum match
|
|
*/
|
|
inline bool dw_verify_batch(dw_batch_t* head_page, uint16 dwn)
|
|
{
|
|
if (head_page->head.dwn == dwn && dw_verify_page(head_page)) {
|
|
dw_batch_t* tail_page = dw_batch_tail_page(head_page);
|
|
return tail_page->head.dwn == dwn && dw_verify_page(tail_page);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
inline uint64 dw_page_distance(void* left, void* right)
|
|
{
|
|
return ((char*)right - (char*)left) / BLCKSZ;
|
|
}
|
|
|
|
int64 dw_seek_file(int fd, int64 offset, int32 origin);
|
|
|
|
void dw_pread_file(int fd, void* buf, int size, int64 offset);
|
|
|
|
void dw_pwrite_file(int fd, const void* buf, int size, int64 offset, const char* fileName);
|
|
|
|
/**
|
|
* generate the file for the database first boot
|
|
*/
|
|
void dw_bootstrap();
|
|
|
|
/**
|
|
* do the memory allocate, spin_lock init, LWLock assign and double write recovery
|
|
* all the half-written pages should be recovered after this
|
|
* it should be finished before XLOG module start which may replay redo log
|
|
*/
|
|
void dw_init();
|
|
void dw_ext_init();
|
|
|
|
/**
|
|
* double write only work when incremental checkpoint enabled and double write enabled
|
|
* @return true if both enabled
|
|
*/
|
|
inline bool dw_enabled()
|
|
{
|
|
if (ENABLE_DMS && ENABLE_REFORM) {
|
|
if ((SS_STANDBY_PROMOTING && t_thrd.role != STARTUP && !g_instance.dms_cxt.dw_init) ||
|
|
(SS_STANDBY_MODE && !SS_STANDBY_PROMOTING) || SS_PRIMARY_DEMOTED ||
|
|
(SS_STANDBY_FAILOVER && t_thrd.role != STARTUP && !g_instance.dms_cxt.dw_init)) {
|
|
return false;
|
|
}
|
|
}
|
|
return (ENABLE_INCRE_CKPT && g_instance.attr.attr_storage.enable_double_write);
|
|
}
|
|
|
|
/**
|
|
* flush the buffers identified by the buf_id in buf_id_arr to double write file
|
|
* a token_id is returned, thus double write wish the caller to return it after the
|
|
* caller finish flushing the buffers to data file and forwarding the fsync request
|
|
* @param buf_id_arr the buffer id array which is used to get page from global buffer
|
|
* @param size the array size
|
|
*/
|
|
void dw_perform_batch_flush(uint32 size, CkptSortItem *dirty_buf_list, int thread_id, ThrdDwCxt* thrd_dw_cxt);
|
|
|
|
/**
|
|
* truncate the pages in double write file after ckpt or before exit
|
|
* wait for tokens, thus all the relative data file flush and fsync request forwarded
|
|
* then its safe to call fsync to make sure pages on data file
|
|
* and then safe to discard those pages on double write file
|
|
*/
|
|
void dw_truncate();
|
|
|
|
/**
|
|
* double write exit after XLOG exit.
|
|
* data file flushing, page writer and checkpointer thread may still running. wait for them.
|
|
*/
|
|
void dw_exit(bool single);
|
|
|
|
/**
|
|
* If double write is enabled and pagewriter is running,
|
|
* the dirty pages should only be flushed by pagewriter.
|
|
*/
|
|
inline bool dw_page_writer_running()
|
|
{
|
|
return (dw_enabled() && pg_atomic_read_u32(&g_instance.ckpt_cxt_ctl->current_page_writer_count) > 0);
|
|
}
|
|
|
|
/**
|
|
* If enable dms and aio, the aio_in_process should be false.
|
|
*/
|
|
inline bool dw_buf_valid_aio_finished(BufferDesc *buf_desc, uint32 buf_state)
|
|
{
|
|
if (!ENABLE_DMS || !ENABLE_DSS_AIO) {
|
|
return true;
|
|
}
|
|
|
|
return ((buf_state & BM_VALID) && ((buf_state & BM_DIRTY) || buf_desc->extra->aio_in_progress));
|
|
}
|
|
|
|
extern bool free_space_enough(int buf_id);
|
|
|
|
extern void dw_generate_single_file();
|
|
extern void dw_recovery_partial_write_single();
|
|
extern void dw_single_file_truncate(bool is_first);
|
|
extern void dw_generate_new_single_file();
|
|
extern void dw_cxt_init_single();
|
|
|
|
extern bool dw_verify_pg_checksum(PageHeader page_header, BlockNumber blockNum, bool dw_file);
|
|
extern void dw_log_recovery_page(int elevel, const char *state, BufferTag buf_tag);
|
|
extern bool dw_read_data_page(BufferTag buf_tag, SMgrRelation reln, char* data_block);
|
|
extern void dw_log_page_header(PageHeader page);
|
|
extern int buftag_compare(const void *pa, const void *pb);
|
|
extern void dw_encrypt_page(BufferTag tag, char* buf);
|
|
|
|
extern uint16 first_version_dw_single_flush(BufferDesc *buf_desc);
|
|
extern void dw_single_file_recycle(bool is_first);
|
|
extern bool backend_can_flush_dirty_page();
|
|
extern void dw_force_reset_single_file();
|
|
extern void reset_dw_pos_flag();
|
|
extern void clean_proc_dw_buf();
|
|
extern void init_proc_dw_buf();
|
|
extern void dw_prepare_file_head(char *file_head, uint16 start, uint16 dwn, int32 dw_version = -1);
|
|
extern void dw_set_pg_checksum(char *page, BlockNumber blockNum);
|
|
extern void dw_extend_file(int fd, const void *buf, int buf_size, int64 size,
|
|
int64 file_expect_size, bool single, char* file_name);
|
|
|
|
extern void dw_transfer_phybuffer_addr(const BufferDesc *buf_desc, BufferTag *buf_tag);
|
|
uint16 second_version_dw_single_flush(BufferTag tag, Block block, XLogRecPtr page_lsn,
|
|
bool encrypt, BufferTag phy_tag);
|
|
|
|
extern uint16 seg_dw_single_flush_without_buffer(BufferTag tag, Block block, bool* flush_old_file);
|
|
extern uint16 seg_dw_single_flush(BufferDesc *buf_desc, bool* flush_old_file);
|
|
extern void wait_all_single_dw_finish_flush_old();
|
|
extern void wait_all_single_dw_finish_flush(bool is_first);
|
|
extern uint16 dw_single_flush_internal_old(BufferTag tag, Block block, XLogRecPtr page_lsn,
|
|
BufferTag phy_tag, bool *dw_flush);
|
|
extern void dw_single_old_file_truncate();
|
|
|
|
extern void dw_recover_batch_meta_file(int fd, dw_batch_meta_file *batch_meta_file);
|
|
extern void dw_fetch_batch_file_name(int i, char* buf);
|
|
extern void wait_all_dw_page_finish_flush();
|
|
extern void dw_generate_meta_file(dw_batch_meta_file* batch_meta_file);
|
|
extern void dw_generate_batch_files(int batch_file_num, uint64 dw_file_size);
|
|
extern void dw_remove_batch_file(int dw_file_num);
|
|
extern void dw_remove_batch_meta_file();
|
|
extern void dw_recover_all_partial_write_batch(knl_g_dw_context *batch_cxt);
|
|
extern void dw_cxt_init_batch();
|
|
extern void dw_remove_file(const char* file_name);
|
|
extern int dw_open_file(const char* file_name);
|
|
extern int dw_create_file(const char* file_name);
|
|
extern void dw_upgrade_renable_double_write();
|
|
|
|
extern void dw_blocked_for_snapshot();
|
|
extern void dw_released_after_snapshot();
|
|
extern bool is_dw_snapshot_blocked();
|
|
|
|
|
|
#endif /* DOUBLE_WRITE_H */
|