fixed read log failed during flashback
This commit is contained in:
@ -66,55 +66,56 @@ bool ObSimpleLogClusterTestBase::need_add_arb_server_ = false;
|
|||||||
constexpr int64_t timeout_ts_us = 3 * 1000 * 1000;
|
constexpr int64_t timeout_ts_us = 3 * 1000 * 1000;
|
||||||
|
|
||||||
|
|
||||||
//TEST_F(TestObSimpleLogClusterRestart, read_block_in_flashback)
|
TEST_F(TestObSimpleLogClusterRestart, read_block_in_flashback)
|
||||||
//{
|
{
|
||||||
// disable_hot_cache_ = true;
|
disable_hot_cache_ = true;
|
||||||
// SET_CASE_LOG_FILE(TEST_NAME, "read_block_in_flashback");
|
SET_CASE_LOG_FILE(TEST_NAME, "read_block_in_flashback");
|
||||||
// OB_LOGGER.set_log_level("TRACE");
|
OB_LOGGER.set_log_level("TRACE");
|
||||||
// const int64_t id = ATOMIC_AAF(&palf_id_, 1);
|
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
|
||||||
// int64_t leader_idx = 0;
|
int64_t leader_idx = 0;
|
||||||
// PalfHandleImplGuard leader;
|
PalfHandleImplGuard leader;
|
||||||
// PalfEnv *palf_env = NULL;
|
PalfEnv *palf_env = NULL;
|
||||||
// EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader));
|
EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader));
|
||||||
//
|
|
||||||
// EXPECT_EQ(OB_SUCCESS, submit_log(leader, 2 * 32 + 2, id, MAX_LOG_BODY_SIZE));
|
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 2 * 32 + 2, id, MAX_LOG_BODY_SIZE));
|
||||||
// EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.get_palf_handle_impl()->get_max_lsn()));
|
EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.get_palf_handle_impl()->get_max_lsn()));
|
||||||
//
|
|
||||||
// block_id_t min_block_id, max_block_id;
|
block_id_t min_block_id, max_block_id;
|
||||||
// LogStorage *log_storage = &leader.get_palf_handle_impl()->log_engine_.log_storage_;
|
LogStorage *log_storage = &leader.get_palf_handle_impl()->log_engine_.log_storage_;
|
||||||
// EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id));
|
EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id));
|
||||||
// EXPECT_EQ(2, max_block_id);
|
EXPECT_EQ(2, max_block_id);
|
||||||
// SCN scn;
|
SCN scn;
|
||||||
// char block_name_tmp[OB_MAX_FILE_NAME_LENGTH];
|
char block_name_tmp[OB_MAX_FILE_NAME_LENGTH];
|
||||||
// EXPECT_EQ(OB_SUCCESS, block_id_to_tmp_string(max_block_id, block_name_tmp, OB_MAX_FILE_NAME_LENGTH));
|
EXPECT_EQ(OB_SUCCESS, block_id_to_tmp_string(max_block_id, block_name_tmp, OB_MAX_FILE_NAME_LENGTH));
|
||||||
// char block_name[OB_MAX_FILE_NAME_LENGTH];
|
char block_name[OB_MAX_FILE_NAME_LENGTH];
|
||||||
// EXPECT_EQ(OB_SUCCESS, block_id_to_string(max_block_id, block_name, OB_MAX_FILE_NAME_LENGTH));
|
EXPECT_EQ(OB_SUCCESS, block_id_to_string(max_block_id, block_name, OB_MAX_FILE_NAME_LENGTH));
|
||||||
// ::renameat(log_storage->block_mgr_.dir_fd_, block_name, log_storage->block_mgr_.dir_fd_, block_name_tmp);
|
::renameat(log_storage->block_mgr_.dir_fd_, block_name, log_storage->block_mgr_.dir_fd_, block_name_tmp);
|
||||||
// EXPECT_EQ(-1, ::openat(log_storage->block_mgr_.dir_fd_, block_name, LOG_READ_FLAG));
|
EXPECT_EQ(-1, ::openat(log_storage->block_mgr_.dir_fd_, block_name, LOG_READ_FLAG));
|
||||||
// EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
||||||
// EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
||||||
//
|
|
||||||
// // 测试边界场景,read_log_tail_为文件中间,最后一个文件完全被flashback掉, 此时log_tail_是最后一个文件头
|
// 测试边界场景,read_log_tail_为文件中间,最后一个文件完全被flashback掉, 此时log_tail_是最后一个文件头
|
||||||
// log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||||
// EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
||||||
// EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
||||||
//
|
|
||||||
// // 测试边界场景,read_log_tail_最后一个文件头,最后一个文件完全被flashback掉
|
// 测试边界场景,read_log_tail_最后一个文件头,最后一个文件完全被flashback掉
|
||||||
// log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||||
// log_storage->readable_log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
log_storage->readable_log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||||
// EXPECT_EQ(OB_ITER_END, read_log(leader));
|
EXPECT_EQ(OB_ITER_END, read_log(leader));
|
||||||
// EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn));
|
EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn));
|
||||||
//
|
|
||||||
// // 测试边界场景,readable_log_tail_还没改变前检验是否可读通过,直接读文件时报错文件不存在。
|
// 不太好模拟这种场景,考虑引入debug sync
|
||||||
// log_storage->log_tail_ = LSN(3*PALF_BLOCK_SIZE);
|
// // 测试边界场景,readable_log_tail_还没改变前检验是否可读通过,直接读文件时报错文件不存在。
|
||||||
// log_storage->readable_log_tail_ = LSN(3*PALF_BLOCK_SIZE);
|
// log_storage->log_tail_ = LSN(3*PALF_BLOCK_SIZE);
|
||||||
// // 设置max_block_id_为1是为了构造check_read_out_of_bound返回OB_ERR_OUT_OF_UPPER_BOUND的场景
|
// log_storage->readable_log_tail_ = LSN(3*PALF_BLOCK_SIZE);
|
||||||
// log_storage->block_mgr_.max_block_id_ = 1;
|
// // 设置max_block_id_为1是为了构造check_read_out_of_bound返回OB_ERR_OUT_OF_UPPER_BOUND的场景
|
||||||
// // log_storage返回OB_ERR_OUT_OF_UPPER_BOUND, iterator将其转换为OB_ITER_END
|
// log_storage->block_mgr_.max_block_id_ = 1;
|
||||||
// EXPECT_EQ(OB_ITER_END, read_log(leader));
|
// // log_storage返回OB_ERR_OUT_OF_UPPER_BOUND, iterator将其转换为OB_ITER_END
|
||||||
// EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn));
|
// EXPECT_EQ(OB_ITER_END, read_log(leader));
|
||||||
//}
|
// EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn));
|
||||||
//
|
}
|
||||||
|
|
||||||
TEST_F(TestObSimpleLogClusterRestart, restart_when_first_log_block_is_empty)
|
TEST_F(TestObSimpleLogClusterRestart, restart_when_first_log_block_is_empty)
|
||||||
{
|
{
|
||||||
SET_CASE_LOG_FILE(TEST_NAME, "restart_when_first_log_block_is_empty");
|
SET_CASE_LOG_FILE(TEST_NAME, "restart_when_first_log_block_is_empty");
|
||||||
|
|||||||
@ -678,36 +678,51 @@ int LogStorage::check_read_out_of_bound_(const block_id_t &block_id,
|
|||||||
block_id_t max_block_id = LOG_INVALID_BLOCK_ID;
|
block_id_t max_block_id = LOG_INVALID_BLOCK_ID;
|
||||||
LSN readable_log_tail;
|
LSN readable_log_tail;
|
||||||
int64_t curr_flashback_version = OB_INVALID_TIMESTAMP;
|
int64_t curr_flashback_version = OB_INVALID_TIMESTAMP;
|
||||||
get_readable_log_tail_guarded_by_lock_(readable_log_tail, curr_flashback_version);
|
block_id_t readable_end_block_id = LOG_INVALID_BLOCK_ID;
|
||||||
block_id_t readable_end_block_id = lsn_2_block(readable_log_tail, logical_block_size_);
|
// get_block_id_range firstly, ensure that readable_end_block_id is smaller than or equal to max_block_id
|
||||||
|
// before write any new data.
|
||||||
|
if (OB_FAIL(get_block_id_range(min_block_id, max_block_id)) && OB_ENTRY_NOT_EXIST != ret) {
|
||||||
|
PALF_LOG(ERROR, "get_block_id_range failed", K(ret), K(min_block_id), K(max_block_id));
|
||||||
|
// get_readable_log_tail_guarded_by_lock_ is a barrier point, all read operations
|
||||||
|
// can read integrity data if flashback_version is same as curr_flashback_version
|
||||||
|
} else if (FALSE_IT(get_readable_log_tail_guarded_by_lock_(readable_log_tail, curr_flashback_version))) {
|
||||||
|
} else if (FALSE_IT(readable_end_block_id = lsn_2_block(readable_log_tail, logical_block_size_))) {
|
||||||
// if read data is concurrently with flashback, return OB_NEED_RETRY.
|
// if read data is concurrently with flashback, return OB_NEED_RETRY.
|
||||||
// to avoid unnecessary failure, only check flashback_version when read block need to be overwriting.
|
// to avoid unnecessary failure, only check flashback_version when read block need to be overwriting.
|
||||||
// NB: update 'reabable_log_tail_' and 'flashback_version_' is atomic, and updating is performed before
|
// NB: update 'reabable_log_tail_' and 'flashback_version_' is atomic, and updating is performed before
|
||||||
// overwriting.
|
// overwriting.
|
||||||
if (block_id >= readable_end_block_id && flashback_version != curr_flashback_version) {
|
} else if (block_id >= readable_end_block_id && flashback_version != curr_flashback_version) {
|
||||||
ret = OB_NEED_RETRY;
|
ret = OB_NEED_RETRY;
|
||||||
PALF_LOG(WARN, "there is flashbacking during read data, need read retry",
|
PALF_LOG(WARN, "there is flashbacking during read data, need read retry",
|
||||||
KPC(this), K(flashback_version), K(curr_flashback_version),
|
KPC(this), K(flashback_version), K(curr_flashback_version),
|
||||||
K(min_block_id), K(max_block_id), K(block_id));
|
K(min_block_id), K(max_block_id), K(block_id));
|
||||||
} else if (OB_FAIL(get_block_id_range(min_block_id, max_block_id))
|
// double check after read data. the block whose name is smaller than 'min_block_id' has been deleted
|
||||||
&& OB_ENTRY_NOT_EXIST != ret) {
|
// by GC or rebuild, and the data which read successfully may be not intergrity, therefore return OB_ERR_OUT_OF_LOWER_BOUND
|
||||||
PALF_LOG(ERROR, "get_block_id_range failed", K(ret), K(min_block_id), K(max_block_id));
|
|
||||||
} else if (min_block_id > block_id) {
|
} else if (min_block_id > block_id) {
|
||||||
ret = OB_ERR_OUT_OF_LOWER_BOUND;
|
ret = OB_ERR_OUT_OF_LOWER_BOUND;
|
||||||
PALF_LOG(INFO, "read something out of lower bound, the block may be deleted by GC or rebuild",
|
PALF_LOG(INFO, "read something out of lower bound, the block may be deleted by GC or rebuild",
|
||||||
K(min_block_id), K(max_block_id), K(block_id));
|
K(min_block_id), K(max_block_id), K(block_id));
|
||||||
// there is no possibility read data out of upper bound because we have checked flashback_version.
|
// there is no possibility read data out of upper bound because we have checked flashback_version and checkd
|
||||||
|
// read_lsn whether is greater than readable_log_tail before 'check_read_out_of_bound_'.
|
||||||
} else if (block_id > max_block_id) {
|
} else if (block_id > max_block_id) {
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
PALF_LOG(ERROR, "unexpected error, the block to be read is greater than max_block_id",
|
PALF_LOG(ERROR, "unexpected error, the block to be read is greater than max_block_id",
|
||||||
K(min_block_id), K(max_block_id), K(block_id));
|
K(min_block_id), K(max_block_id), K(block_id));
|
||||||
}
|
}
|
||||||
// if there is no block whose names with 'block_id' and 'block_id' is in range of [min_block_id, max_block_id]
|
if (OB_SUCC(ret) && no_such_block) {
|
||||||
// return OB_ERR_UNEXPECTED.
|
// if there is no block whose names with 'block_id' and 'block_id' is in range of [min_block_id, max_block_id)
|
||||||
if (OB_SUCC(ret) && no_such_block
|
// return OB_ERR_UNEXPECTED.
|
||||||
&& min_block_id <= block_id && block_id <= max_block_id) {
|
if (min_block_id <= block_id && block_id < max_block_id) {
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
PALF_LOG(ERROR, "unexpected error, the block may be deleted by human", K(min_block_id), K(max_block_id), K(block_id));
|
PALF_LOG(ERROR, "unexpected error, the block may be deleted by human", KPC(this), K(flashback_version),
|
||||||
|
K(min_block_id), K(max_block_id), K(block_id));
|
||||||
|
// if max_block_id == block_id, means that the block whose names with 'block_id' is renaming during
|
||||||
|
// flashback, therefore return OB_NEED_RETRY.
|
||||||
|
} else if (max_block_id == block_id) {
|
||||||
|
ret = OB_NEED_RETRY;
|
||||||
|
PALF_LOG(WARN, "in flashback, the block is renaming", KPC(this), K(flashback_version), K(min_block_id),
|
||||||
|
K(max_block_id), K(block_id));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user