fixed read log failed during flashback
This commit is contained in:
@ -66,45 +66,46 @@ bool ObSimpleLogClusterTestBase::need_add_arb_server_ = false;
|
||||
constexpr int64_t timeout_ts_us = 3 * 1000 * 1000;
|
||||
|
||||
|
||||
//TEST_F(TestObSimpleLogClusterRestart, read_block_in_flashback)
|
||||
//{
|
||||
// disable_hot_cache_ = true;
|
||||
// SET_CASE_LOG_FILE(TEST_NAME, "read_block_in_flashback");
|
||||
// OB_LOGGER.set_log_level("TRACE");
|
||||
// const int64_t id = ATOMIC_AAF(&palf_id_, 1);
|
||||
// int64_t leader_idx = 0;
|
||||
// PalfHandleImplGuard leader;
|
||||
// PalfEnv *palf_env = NULL;
|
||||
// EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader));
|
||||
//
|
||||
// EXPECT_EQ(OB_SUCCESS, submit_log(leader, 2 * 32 + 2, id, MAX_LOG_BODY_SIZE));
|
||||
// EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.get_palf_handle_impl()->get_max_lsn()));
|
||||
//
|
||||
// block_id_t min_block_id, max_block_id;
|
||||
// LogStorage *log_storage = &leader.get_palf_handle_impl()->log_engine_.log_storage_;
|
||||
// EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id));
|
||||
// EXPECT_EQ(2, max_block_id);
|
||||
// SCN scn;
|
||||
// char block_name_tmp[OB_MAX_FILE_NAME_LENGTH];
|
||||
// EXPECT_EQ(OB_SUCCESS, block_id_to_tmp_string(max_block_id, block_name_tmp, OB_MAX_FILE_NAME_LENGTH));
|
||||
// char block_name[OB_MAX_FILE_NAME_LENGTH];
|
||||
// EXPECT_EQ(OB_SUCCESS, block_id_to_string(max_block_id, block_name, OB_MAX_FILE_NAME_LENGTH));
|
||||
// ::renameat(log_storage->block_mgr_.dir_fd_, block_name, log_storage->block_mgr_.dir_fd_, block_name_tmp);
|
||||
// EXPECT_EQ(-1, ::openat(log_storage->block_mgr_.dir_fd_, block_name, LOG_READ_FLAG));
|
||||
// EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
||||
// EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
||||
//
|
||||
// // 测试边界场景,read_log_tail_为文件中间,最后一个文件完全被flashback掉, 此时log_tail_是最后一个文件头
|
||||
// log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||
// EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
||||
// EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
||||
//
|
||||
// // 测试边界场景,read_log_tail_最后一个文件头,最后一个文件完全被flashback掉
|
||||
// log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||
// log_storage->readable_log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||
// EXPECT_EQ(OB_ITER_END, read_log(leader));
|
||||
// EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn));
|
||||
//
|
||||
TEST_F(TestObSimpleLogClusterRestart, read_block_in_flashback)
|
||||
{
|
||||
disable_hot_cache_ = true;
|
||||
SET_CASE_LOG_FILE(TEST_NAME, "read_block_in_flashback");
|
||||
OB_LOGGER.set_log_level("TRACE");
|
||||
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
|
||||
int64_t leader_idx = 0;
|
||||
PalfHandleImplGuard leader;
|
||||
PalfEnv *palf_env = NULL;
|
||||
EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader));
|
||||
|
||||
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 2 * 32 + 2, id, MAX_LOG_BODY_SIZE));
|
||||
EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.get_palf_handle_impl()->get_max_lsn()));
|
||||
|
||||
block_id_t min_block_id, max_block_id;
|
||||
LogStorage *log_storage = &leader.get_palf_handle_impl()->log_engine_.log_storage_;
|
||||
EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id));
|
||||
EXPECT_EQ(2, max_block_id);
|
||||
SCN scn;
|
||||
char block_name_tmp[OB_MAX_FILE_NAME_LENGTH];
|
||||
EXPECT_EQ(OB_SUCCESS, block_id_to_tmp_string(max_block_id, block_name_tmp, OB_MAX_FILE_NAME_LENGTH));
|
||||
char block_name[OB_MAX_FILE_NAME_LENGTH];
|
||||
EXPECT_EQ(OB_SUCCESS, block_id_to_string(max_block_id, block_name, OB_MAX_FILE_NAME_LENGTH));
|
||||
::renameat(log_storage->block_mgr_.dir_fd_, block_name, log_storage->block_mgr_.dir_fd_, block_name_tmp);
|
||||
EXPECT_EQ(-1, ::openat(log_storage->block_mgr_.dir_fd_, block_name, LOG_READ_FLAG));
|
||||
EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
||||
EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
||||
|
||||
// 测试边界场景,read_log_tail_为文件中间,最后一个文件完全被flashback掉, 此时log_tail_是最后一个文件头
|
||||
log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||
EXPECT_EQ(OB_NEED_RETRY, read_log(leader));
|
||||
EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn));
|
||||
|
||||
// 测试边界场景,read_log_tail_最后一个文件头,最后一个文件完全被flashback掉
|
||||
log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||
log_storage->readable_log_tail_ = LSN(2*PALF_BLOCK_SIZE);
|
||||
EXPECT_EQ(OB_ITER_END, read_log(leader));
|
||||
EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn));
|
||||
|
||||
// 不太好模拟这种场景,考虑引入debug sync
|
||||
// // 测试边界场景,readable_log_tail_还没改变前检验是否可读通过,直接读文件时报错文件不存在。
|
||||
// log_storage->log_tail_ = LSN(3*PALF_BLOCK_SIZE);
|
||||
// log_storage->readable_log_tail_ = LSN(3*PALF_BLOCK_SIZE);
|
||||
@ -113,8 +114,8 @@ constexpr int64_t timeout_ts_us = 3 * 1000 * 1000;
|
||||
// // log_storage返回OB_ERR_OUT_OF_UPPER_BOUND, iterator将其转换为OB_ITER_END
|
||||
// EXPECT_EQ(OB_ITER_END, read_log(leader));
|
||||
// EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn));
|
||||
//}
|
||||
//
|
||||
}
|
||||
|
||||
TEST_F(TestObSimpleLogClusterRestart, restart_when_first_log_block_is_empty)
|
||||
{
|
||||
SET_CASE_LOG_FILE(TEST_NAME, "restart_when_first_log_block_is_empty");
|
||||
|
@ -678,36 +678,51 @@ int LogStorage::check_read_out_of_bound_(const block_id_t &block_id,
|
||||
block_id_t max_block_id = LOG_INVALID_BLOCK_ID;
|
||||
LSN readable_log_tail;
|
||||
int64_t curr_flashback_version = OB_INVALID_TIMESTAMP;
|
||||
get_readable_log_tail_guarded_by_lock_(readable_log_tail, curr_flashback_version);
|
||||
block_id_t readable_end_block_id = lsn_2_block(readable_log_tail, logical_block_size_);
|
||||
block_id_t readable_end_block_id = LOG_INVALID_BLOCK_ID;
|
||||
// get_block_id_range firstly, ensure that readable_end_block_id is smaller than or equal to max_block_id
|
||||
// before write any new data.
|
||||
if (OB_FAIL(get_block_id_range(min_block_id, max_block_id)) && OB_ENTRY_NOT_EXIST != ret) {
|
||||
PALF_LOG(ERROR, "get_block_id_range failed", K(ret), K(min_block_id), K(max_block_id));
|
||||
// get_readable_log_tail_guarded_by_lock_ is a barrier point, all read operations
|
||||
// can read integrity data if flashback_version is same as curr_flashback_version
|
||||
} else if (FALSE_IT(get_readable_log_tail_guarded_by_lock_(readable_log_tail, curr_flashback_version))) {
|
||||
} else if (FALSE_IT(readable_end_block_id = lsn_2_block(readable_log_tail, logical_block_size_))) {
|
||||
// if read data is concurrently with flashback, return OB_NEED_RETRY.
|
||||
// to avoid unnecessary failure, only check flashback_version when read block need to be overwriting.
|
||||
// NB: update 'reabable_log_tail_' and 'flashback_version_' is atomic, and updating is performed before
|
||||
// overwriting.
|
||||
if (block_id >= readable_end_block_id && flashback_version != curr_flashback_version) {
|
||||
} else if (block_id >= readable_end_block_id && flashback_version != curr_flashback_version) {
|
||||
ret = OB_NEED_RETRY;
|
||||
PALF_LOG(WARN, "there is flashbacking during read data, need read retry",
|
||||
KPC(this), K(flashback_version), K(curr_flashback_version),
|
||||
K(min_block_id), K(max_block_id), K(block_id));
|
||||
} else if (OB_FAIL(get_block_id_range(min_block_id, max_block_id))
|
||||
&& OB_ENTRY_NOT_EXIST != ret) {
|
||||
PALF_LOG(ERROR, "get_block_id_range failed", K(ret), K(min_block_id), K(max_block_id));
|
||||
// double check after read data. the block whose name is smaller than 'min_block_id' has been deleted
|
||||
// by GC or rebuild, and the data which read successfully may be not intergrity, therefore return OB_ERR_OUT_OF_LOWER_BOUND
|
||||
} else if (min_block_id > block_id) {
|
||||
ret = OB_ERR_OUT_OF_LOWER_BOUND;
|
||||
PALF_LOG(INFO, "read something out of lower bound, the block may be deleted by GC or rebuild",
|
||||
K(min_block_id), K(max_block_id), K(block_id));
|
||||
// there is no possibility read data out of upper bound because we have checked flashback_version.
|
||||
// there is no possibility read data out of upper bound because we have checked flashback_version and checkd
|
||||
// read_lsn whether is greater than readable_log_tail before 'check_read_out_of_bound_'.
|
||||
} else if (block_id > max_block_id) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
PALF_LOG(ERROR, "unexpected error, the block to be read is greater than max_block_id",
|
||||
K(min_block_id), K(max_block_id), K(block_id));
|
||||
}
|
||||
// if there is no block whose names with 'block_id' and 'block_id' is in range of [min_block_id, max_block_id]
|
||||
if (OB_SUCC(ret) && no_such_block) {
|
||||
// if there is no block whose names with 'block_id' and 'block_id' is in range of [min_block_id, max_block_id)
|
||||
// return OB_ERR_UNEXPECTED.
|
||||
if (OB_SUCC(ret) && no_such_block
|
||||
&& min_block_id <= block_id && block_id <= max_block_id) {
|
||||
if (min_block_id <= block_id && block_id < max_block_id) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
PALF_LOG(ERROR, "unexpected error, the block may be deleted by human", K(min_block_id), K(max_block_id), K(block_id));
|
||||
PALF_LOG(ERROR, "unexpected error, the block may be deleted by human", KPC(this), K(flashback_version),
|
||||
K(min_block_id), K(max_block_id), K(block_id));
|
||||
// if max_block_id == block_id, means that the block whose names with 'block_id' is renaming during
|
||||
// flashback, therefore return OB_NEED_RETRY.
|
||||
} else if (max_block_id == block_id) {
|
||||
ret = OB_NEED_RETRY;
|
||||
PALF_LOG(WARN, "in flashback, the block is renaming", KPC(this), K(flashback_version), K(min_block_id),
|
||||
K(max_block_id), K(block_id));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user