From 4d44fedfa1a9bf4a19daaf925795c74a604cce31 Mon Sep 17 00:00:00 2001 From: HaHaJeff Date: Tue, 11 Jul 2023 14:25:42 +0000 Subject: [PATCH] remove a defense code which will case restart failed in cornor case --- mittest/logservice/CMakeLists.txt | 1 + .../test_ob_simple_log_basic_func.cpp | 99 ---- .../logservice/test_ob_simple_log_restart.cpp | 542 ++++++++++++++++++ ...test_ob_simple_log_single_replica_func.cpp | 247 -------- src/logservice/palf/log_engine.cpp | 7 +- src/logservice/palf/palf_env_impl.cpp | 6 +- src/logservice/palf/palf_env_impl.h | 2 +- 7 files changed, 554 insertions(+), 350 deletions(-) create mode 100644 mittest/logservice/test_ob_simple_log_restart.cpp diff --git a/mittest/logservice/CMakeLists.txt b/mittest/logservice/CMakeLists.txt index 3d4e359fd1..61e45dc5cf 100644 --- a/mittest/logservice/CMakeLists.txt +++ b/mittest/logservice/CMakeLists.txt @@ -37,6 +37,7 @@ ob_unittest_clog(test_ob_simple_log_throttling_arb test_ob_simple_log_throttling ob_unittest_clog(test_ob_simple_log_config_change_mock_ele test_ob_simple_log_config_change_mock_ele.cpp) ob_unittest_clog(test_ob_simple_log_arb_mock_ele test_ob_simple_log_arb_mock_ele.cpp) ob_unittest_clog(test_ob_simple_log_flashback_arb test_ob_simple_log_flashback_arb.cpp) +ob_unittest_clog(test_ob_simple_log_restart test_ob_simple_log_restart.cpp) add_subdirectory(archiveservice) diff --git a/mittest/logservice/test_ob_simple_log_basic_func.cpp b/mittest/logservice/test_ob_simple_log_basic_func.cpp index 73c2de2a4e..e3d3485650 100644 --- a/mittest/logservice/test_ob_simple_log_basic_func.cpp +++ b/mittest/logservice/test_ob_simple_log_basic_func.cpp @@ -142,105 +142,6 @@ TEST_F(TestObSimpleLogClusterBasicFunc, test_max_padding_size) PALF_LOG(INFO, "end test submit_log", K(id), K(guard)); } -TEST_F(TestObSimpleLogClusterBasicFunc, restart_and_clear_tmp_files) -{ - SET_CASE_LOG_FILE(TEST_NAME, "restart_and_clear_tmp_files"); - ObTimeGuard guard("restart_and_clear_tmp_files", 0); - const int64_t id = ATOMIC_AAF(&palf_id_, 1); - int64_t leader_idx = 0; - std::string logserver_dir; - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); - guard.click("create"); - logserver_dir = leader.palf_env_impl_->log_dir_; - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, leader_idx, 1 * 1024 * 1024)); - guard.click("submit_log"); - while (leader.palf_handle_impl_->get_end_lsn() - < LSN(100 * 1024 * 1024ul)) { - usleep(100 * 1000); - } - } - const std::string base_dir = logserver_dir; - const std::string tmp_1_dir = base_dir + "/10000.tmp/log/"; - const std::string mkdir_tmp_1 = "mkdir -p " + tmp_1_dir; - const std::string dir_2 = base_dir + "/10000000"; - const std::string dir_normal_file = base_dir + "/10000000/log/1"; - const std::string dir_normal_file1 = base_dir + "/10000000/meta/"; - const std::string mkdir_2 = "mkdir -p " + dir_normal_file; - const std::string mkdir_3 = "mkdir -p " + dir_normal_file1; - system(mkdir_tmp_1.c_str()); - system(mkdir_2.c_str()); - system(mkdir_3.c_str()); - int ret = OB_SUCCESS; - guard.click("mkdir"); - EXPECT_EQ(OB_ERR_UNEXPECTED, restart_paxos_groups()); - CLOG_LOG(INFO, "after restart_paxos_groups after exist tmp dir"); - guard.click("restart"); - const std::string rm_dir_2 = "rm -rf " + dir_2; - system(rm_dir_2.c_str()); - guard.click("rm_dir"); - if (OB_FAIL(restart_paxos_groups())) { - PALF_LOG(ERROR, "restart_paxos_groups failed", K(ret)); - } else { - { - CLOG_LOG(INFO, "after restart_paxos_groups after remove tmp dir"); - guard.click("restart"); - bool result = false; - EXPECT_EQ(OB_SUCCESS, - common::FileDirectoryUtils::is_exists(tmp_1_dir.c_str(), result)); - EXPECT_EQ(result, false); - PalfHandleImplGuard leader1; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader1, leader_idx)); - guard.click("get_leader"); - LogStorage *log_storage = - &leader1.palf_handle_impl_->log_engine_.log_storage_; - LSN lsn_origin_log_tail = log_storage->get_log_tail_guarded_by_lock_(); - EXPECT_EQ(OB_SUCCESS, submit_log(leader1, 10, leader_idx, 1 * 1024 * 1024)); - while (log_storage->log_tail_ == lsn_origin_log_tail) { - usleep(1 * 1000); - PALF_LOG(INFO, "log_tail is same", KPC(log_storage), K(lsn_origin_log_tail)); - } - guard.click("submit_log"); - EXPECT_EQ(OB_ITER_END, read_log(leader1)); - guard.click("read_log"); - PALF_LOG(INFO, "finish read_log", KPC(log_storage), K(lsn_origin_log_tail), KPC(leader1.palf_handle_impl_)); - } - // 验证tenant下有临时文件的场景,该临时文件需要归还给log_pool - { - PalfHandleImplGuard leader1; - int64_t leader_idx1 = 0; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader1, leader_idx1)); - std::string palf_log_dir = leader1.palf_handle_impl_->log_engine_.log_storage_.block_mgr_.log_dir_; - ObISimpleLogServer *i_server = get_cluster()[leader_idx1]; - ObSimpleLogServer *server = dynamic_cast(i_server); - std::string log_pool = server->log_block_pool_.log_pool_path_; - const block_id_t min_block_id = server->log_block_pool_.min_block_id_; - char src[1024] = {'\0'}; - char dest[1024] = {'\0'}; - block_id_to_tmp_string(10000, dest, 1024); - block_id_to_string(min_block_id, src, 1024); - std::string src_str = log_pool + "/" + src; - std::string dest_str = palf_log_dir + "/" + dest; - std::string mv_system = "mv " + src_str + " " + dest_str; - system(mv_system.c_str()); - bool result1 = false; - EXPECT_EQ(OB_SUCCESS, - common::FileDirectoryUtils::is_exists(dest_str.c_str(), result1)); - EXPECT_EQ(true, result1); - leader1.reset(); - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - EXPECT_EQ(OB_SUCCESS, - common::FileDirectoryUtils::is_exists(dest_str.c_str(), result1)); - EXPECT_EQ(false, result1); - } - EXPECT_EQ(OB_SUCCESS, remove_dir()); - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - } - EXPECT_EQ(OB_SUCCESS, ret); - PALF_LOG(INFO, "end test restart", K(id), K(guard)); -} - bool check_locate_correct(const std::vector &lsn_array, const std::vector &scn_array, const share::SCN input_scn, const LSN result_lsn, diff --git a/mittest/logservice/test_ob_simple_log_restart.cpp b/mittest/logservice/test_ob_simple_log_restart.cpp new file mode 100644 index 0000000000..f636bdc8d1 --- /dev/null +++ b/mittest/logservice/test_ob_simple_log_restart.cpp @@ -0,0 +1,542 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "lib/ob_define.h" +#include "lib/ob_errno.h" +#include +#include +#include +#include +#define private public +#define protected public +#include "env/ob_simple_log_cluster_env.h" +#undef private +#undef protected +#include "logservice/palf/log_reader_utils.h" +#include "logservice/palf/log_define.h" +#include "logservice/palf/log_group_entry_header.h" +#include "logservice/palf/log_io_worker.h" +#include "logservice/palf/lsn.h" + +const std::string TEST_NAME = "log_restart"; +using namespace oceanbase::common; +using namespace oceanbase; +namespace oceanbase +{ +using namespace logservice; +namespace unittest +{ +class TestObSimpleLogClusterRestart : public ObSimpleLogClusterTestEnv +{ +public: + TestObSimpleLogClusterRestart() : ObSimpleLogClusterTestEnv() + { + int ret = init(); + if (OB_SUCCESS != ret) { + throw std::runtime_error("TestObSimpleLogClusterLogEngine init failed"); + } + } + ~TestObSimpleLogClusterRestart() + { + destroy(); + } + int init() + { + return OB_SUCCESS; + } + void destroy() + {} + int64_t id_; + PalfHandleImplGuard leader_; +}; + +int64_t ObSimpleLogClusterTestBase::member_cnt_ = 1; +int64_t ObSimpleLogClusterTestBase::node_cnt_ = 1; +std::string ObSimpleLogClusterTestBase::test_name_ = TEST_NAME; +bool ObSimpleLogClusterTestBase::need_add_arb_server_ = false; +constexpr int64_t timeout_ts_us = 3 * 1000 * 1000; + + +TEST_F(TestObSimpleLogClusterRestart, read_block_in_flashback) +{ + disable_hot_cache_ = true; + SET_CASE_LOG_FILE(TEST_NAME, "read_block_in_flashback"); + OB_LOGGER.set_log_level("TRACE"); + const int64_t id = ATOMIC_AAF(&palf_id_, 1); + int64_t leader_idx = 0; + PalfHandleImplGuard leader; + PalfEnv *palf_env = NULL; + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); + + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 2 * 32 + 2, id, MAX_LOG_BODY_SIZE)); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.get_palf_handle_impl()->get_max_lsn())); + + block_id_t min_block_id, max_block_id; + LogStorage *log_storage = &leader.get_palf_handle_impl()->log_engine_.log_storage_; + EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); + EXPECT_EQ(2, max_block_id); + SCN scn; + char block_name_tmp[OB_MAX_FILE_NAME_LENGTH]; + EXPECT_EQ(OB_SUCCESS, block_id_to_tmp_string(max_block_id, block_name_tmp, OB_MAX_FILE_NAME_LENGTH)); + char block_name[OB_MAX_FILE_NAME_LENGTH]; + EXPECT_EQ(OB_SUCCESS, block_id_to_string(max_block_id, block_name, OB_MAX_FILE_NAME_LENGTH)); + ::renameat(log_storage->block_mgr_.dir_fd_, block_name, log_storage->block_mgr_.dir_fd_, block_name_tmp); + EXPECT_EQ(-1, ::openat(log_storage->block_mgr_.dir_fd_, block_name, LOG_READ_FLAG)); + EXPECT_EQ(OB_NEED_RETRY, read_log(leader)); + EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn)); + + // 测试边界场景,read_log_tail_为文件中间,最后一个文件完全被flashback掉, 此时log_tail_是最后一个文件头 + log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE); + EXPECT_EQ(OB_NEED_RETRY, read_log(leader)); + EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn)); + + // 测试边界场景,read_log_tail_最后一个文件头,最后一个文件完全被flashback掉 + log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE); + log_storage->readable_log_tail_ = LSN(2*PALF_BLOCK_SIZE); + EXPECT_EQ(OB_ITER_END, read_log(leader)); + EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn)); + + // 测试边界场景,readable_log_tail_还没改变前检验是否可读通过,直接读文件时报错文件不存在。 + log_storage->log_tail_ = LSN(3*PALF_BLOCK_SIZE); + log_storage->readable_log_tail_ = LSN(3*PALF_BLOCK_SIZE); + // 设置max_block_id_为1是为了构造check_read_out_of_bound返回OB_ERR_OUT_OF_UPPER_BOUND的场景 + log_storage->block_mgr_.max_block_id_ = 1; + // log_storage返回OB_ERR_OUT_OF_UPPER_BOUND, iterator将其转换为OB_ITER_END + EXPECT_EQ(OB_ITER_END, read_log(leader)); + EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn)); +} + +TEST_F(TestObSimpleLogClusterRestart, restart_when_first_log_block_is_empty) +{ + SET_CASE_LOG_FILE(TEST_NAME, "restart_when_first_log_block_is_empty"); + OB_LOGGER.set_log_level("TRACE"); + const int64_t id = ATOMIC_AAF(&palf_id_, 1); + int64_t leader_idx = 0; + // 创建日志流后不写入任何数据 + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); + } + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + // 测试truncate场景 + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + EXPECT_EQ(LSN(PALF_INITIAL_LSN_VAL), leader.palf_handle_impl_->get_max_lsn()); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); + EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->log_engine_.truncate(LSN(0))); + } + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + LSN rebuild_lsn(2*PALF_BLOCK_SIZE); + // 测试rebuild场景 + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + EXPECT_EQ(LSN(PALF_INITIAL_LSN_VAL), leader.palf_handle_impl_->get_max_lsn()); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 40, id, MAX_LOG_BODY_SIZE)); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_max_lsn())); + PalfBaseInfo base_info; + base_info.generate_by_default(); + base_info.curr_lsn_ = rebuild_lsn; + base_info.prev_log_info_.accum_checksum_ = 10000; + base_info.prev_log_info_.log_id_ = 100; + base_info.prev_log_info_.lsn_ = rebuild_lsn - 4096; + base_info.prev_log_info_.log_proposal_id_ = 2; + base_info.prev_log_info_.scn_ = leader.palf_handle_impl_->get_max_scn(); + + leader.palf_handle_impl_->state_mgr_.role_ = FOLLOWER; + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->disable_sync()); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->advance_base_info(base_info, true)); + while (leader.palf_handle_impl_->log_engine_.log_storage_.get_end_lsn() != rebuild_lsn) { + sleep(1); + PALF_LOG(INFO, "has not finish rebuild", K(leader.palf_handle_impl_->log_engine_.log_storage_)); + } + } + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + // 测试flashback场景 + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + EXPECT_EQ(rebuild_lsn, leader.palf_handle_impl_->get_max_lsn()); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_max_lsn())); + int64_t mode_version; + switch_append_to_flashback(leader, mode_version); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->flashback(mode_version, SCN::min_scn(), 10*1000*1000)); + } + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + EXPECT_EQ(rebuild_lsn, leader.palf_handle_impl_->get_max_lsn()); + int64_t mode_version; + switch_flashback_to_append(leader, mode_version); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_max_lsn())); + } +} + +TEST_F(TestObSimpleLogClusterRestart, test_restart) +{ + SET_CASE_LOG_FILE(TEST_NAME, "test_restart"); + int64_t id = ATOMIC_AAF(&palf_id_, 1); + int64_t leader_idx = 0; + char meta_fd[OB_MAX_FILE_NAME_LENGTH] = {'\0'}; + char log_fd[OB_MAX_FILE_NAME_LENGTH] = {'\0'}; + ObServerLogBlockMgr *pool = NULL; + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); + wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); + LogEngine *log_engine = &leader.palf_handle_impl_->log_engine_; + char *meta_log_dir = log_engine->log_meta_storage_.block_mgr_.log_dir_; + char *log_dir = log_engine->log_storage_.block_mgr_.log_dir_; + EXPECT_EQ(OB_SUCCESS, get_log_pool(leader_idx, pool)); + char *pool_dir = pool->log_pool_path_; + snprintf(meta_fd, OB_MAX_FILE_NAME_LENGTH, "mv %s/%d %s/%d", meta_log_dir, 0, pool_dir, 10000000); + snprintf(log_fd, OB_MAX_FILE_NAME_LENGTH, "mv %s/%d %s/%d", log_dir, 0, pool_dir, 100000001); + system(meta_fd); + } + OB_LOGGER.set_log_level("TRACE"); + sleep(1); + EXPECT_EQ(OB_ERR_UNEXPECTED, restart_paxos_groups()); + system(log_fd); + PALF_LOG(INFO, "first restart_paxos_groups, after meta dir is empty while log dir is not"); + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + + // 验证切文件过程中宕机重启 + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 33, id, MAX_LOG_BODY_SIZE)); + wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); + block_id_t min_block_id, max_block_id; + LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; + LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; + EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); + EXPECT_EQ(1, max_block_id); + // 模拟只switch block,但没有更新manifest, 此时manifest依旧是1, 宕机重启后由于2号文件为空,manifest会被更新为2 + EXPECT_EQ(OB_SUCCESS, log_storage->truncate(LSN(PALF_BLOCK_SIZE))); + EXPECT_EQ(OB_SUCCESS, log_storage->update_manifest_(1)); + EXPECT_EQ(PALF_BLOCK_SIZE, log_storage->curr_block_writable_size_); + EXPECT_EQ(1, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); + } + PALF_LOG(INFO, "second restart_paxos_groups after restart in process of switch block"); + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + //检查manifest是否为3 + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); + LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; + EXPECT_EQ(2, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); + } + PALF_LOG(INFO, "third restart_paxos_groups"); + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + // 验证重启后新建日志流 + { + PalfHandleImplGuard leader; + id = ATOMIC_AAF(&palf_id_, 1); + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 66, id, MAX_LOG_BODY_SIZE)); + wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); + EXPECT_EQ(OB_ITER_END, read_log(leader)); + } + // 验证truncate或flashback过程中,修改完manifest后,删除文件前宕机重启(删除1个文件) + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + block_id_t min_block_id, max_block_id; + // 此时manifest为3 + LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; + LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; + EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); + EXPECT_EQ(2, max_block_id); + EXPECT_EQ(3, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); + // truncate 或 flashback会先更新manifest为2 + EXPECT_EQ(OB_SUCCESS, log_storage->update_manifest_(2)); + EXPECT_EQ(2, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); + } + PALF_LOG(INFO, "fourth restart_paxos_groups after modify manifest while not delete block"); + // 验证truncate或flashback过程中,修改完manifest后,truncaet/flashback正好将最后一个文件清空 + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + block_id_t min_block_id, max_block_id; + // 此时manifest为2 + LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; + LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; + EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); + EXPECT_EQ(2, max_block_id); + // 尽管manifest为2,但在这种场景下,2号文件是可以删除的 + EXPECT_EQ(2, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); + EXPECT_EQ(OB_SUCCESS, log_storage->truncate(LSN(2*PALF_BLOCK_SIZE))); + EXPECT_EQ(OB_SUCCESS, log_storage->update_manifest_(2)); + } + PALF_LOG(INFO, "five restart_paxos_groups after modify manifest and last block is empty"); + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + block_id_t min_block_id, max_block_id; + // 重启之后,由于磁盘上最大的文件为2,同时该文件为空,此时会更新manifest为3 + LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; + LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; + EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); + EXPECT_EQ(2, max_block_id); + EXPECT_EQ(3, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); + wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); + EXPECT_EQ(3, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); + } + PALF_LOG(INFO, "six restart_paxos_groups"); + // 验证base lsn 大于持久化的committed 位点 + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; + LogIOWorker *iow = leader.palf_handle_impl_->log_engine_.log_io_worker_; + int64_t epoch = leader.palf_handle_impl_->log_engine_.palf_epoch_; + int64_t palf_id = leader.palf_handle_impl_->palf_id_; + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 29, id, MAX_LOG_BODY_SIZE)); + EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); + // 预期log_tail接近文件2的尾部 + EXPECT_LE(LSN(3*PALF_BLOCK_SIZE) - log_storage->log_tail_, 5*1024*1024); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_end_lsn())); + sleep(1); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, 1000)); + wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); + EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); + IOTaskConsumeCond cond(palf_id, epoch); + EXPECT_EQ(OB_SUCCESS, iow->submit_io_task(&cond)); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id, MAX_LOG_BODY_SIZE)); + while (1) { + if (leader.palf_handle_impl_->sw_.last_submit_end_lsn_ < leader.palf_handle_impl_->get_max_lsn()) { + usleep(5000); + leader.palf_handle_impl_->sw_.freeze_mode_ = FEEDBACK_FREEZE_MODE; + leader.palf_handle_impl_->sw_.feedback_freeze_last_log_(); + PALF_LOG(INFO, "has log in sw", "last_submit_end_lsn", leader.palf_handle_impl_->sw_.last_submit_end_lsn_, + "max_lsn", leader.palf_handle_impl_->get_max_lsn()); + } else { + break; + } + } + cond.cond_.signal(); + EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); + PALF_LOG(INFO, "after wait_lsn_until_flushed", "end_lsn:", leader.palf_handle_impl_->get_end_lsn(), + "max_lsn:", leader.palf_handle_impl_->get_end_lsn()); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_max_lsn())); + EXPECT_GE(leader.palf_handle_impl_->get_max_lsn(), LSN(3*PALF_BLOCK_SIZE)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_base_lsn(LSN(3*PALF_BLOCK_SIZE))); + } + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + PALF_LOG(INFO, "seven restart_paxos_groups after committed lsn is smaller than base lsn"); + // 验证rebuild过程中持久化palf_base_info后,宕机重启 + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; + LogIOWorker *iow = leader.palf_handle_impl_->log_engine_.log_io_worker_; + int64_t epoch = leader.palf_handle_impl_->log_engine_.palf_epoch_; + int64_t palf_id = leader.palf_handle_impl_->palf_id_; + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id, MAX_LOG_BODY_SIZE)); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_end_lsn())); + sleep(1); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, 1000)); + wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); + EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); + IOTaskConsumeCond cond(palf_id, epoch); + EXPECT_EQ(OB_SUCCESS, iow->submit_io_task(&cond)); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id, MAX_LOG_BODY_SIZE)); + while (1) { + if (leader.palf_handle_impl_->sw_.last_submit_end_lsn_ < leader.palf_handle_impl_->get_max_lsn()) { + usleep(5000); + leader.palf_handle_impl_->sw_.freeze_mode_ = FEEDBACK_FREEZE_MODE; + leader.palf_handle_impl_->sw_.feedback_freeze_last_log_(); + PALF_LOG(INFO, "has log in sw", "last_submit_end_lsn", leader.palf_handle_impl_->sw_.last_submit_end_lsn_, + "max_lsn", leader.palf_handle_impl_->get_max_lsn()); + } else { + break; + } + } + cond.cond_.signal(); + EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); + PALF_LOG(INFO, "after wait_lsn_until_flushed", "end_lsn:", leader.palf_handle_impl_->get_end_lsn(), + "max_lsn:", leader.palf_handle_impl_->get_end_lsn()); + EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_max_lsn())); + EXPECT_GE(leader.palf_handle_impl_->get_max_lsn(), LSN(3*PALF_BLOCK_SIZE)); + PalfBaseInfo base_info; + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_base_info( + leader.palf_handle_impl_->get_max_lsn(), base_info)); + LogSnapshotMeta snapshot; + base_info.prev_log_info_.lsn_ = LSN(10*PALF_BLOCK_SIZE - 10*1024); + EXPECT_EQ(OB_SUCCESS, snapshot.generate(LSN(10*PALF_BLOCK_SIZE), base_info.prev_log_info_)); + FlushMetaCbCtx meta_ctx; + meta_ctx.type_ = SNAPSHOT_META; + meta_ctx.base_lsn_ = snapshot.base_lsn_; + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->log_engine_.submit_flush_snapshot_meta_task(meta_ctx, snapshot)); + sleep(2); + } + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + PALF_LOG(INFO, "seven restart_paxos_groups after committed lsn is smaller than base lsn"); + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + EXPECT_EQ(LSN(10*PALF_BLOCK_SIZE), leader.palf_handle_impl_->get_max_lsn()); + EXPECT_EQ(LSN(10*PALF_BLOCK_SIZE), leader.palf_handle_impl_->log_engine_.log_storage_.get_end_lsn()); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id, 1000)); + EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); + } +} + +TEST_F(TestObSimpleLogClusterRestart, advance_base_lsn_with_restart) +{ + SET_CASE_LOG_FILE(TEST_NAME, "advance_base_lsn_with_restart"); + OB_LOGGER.set_log_level("INFO"); + const int64_t id = ATOMIC_AAF(&palf_id_, 1); + PALF_LOG(INFO, "start advance_base_lsn", K(id)); + int64_t leader_idx = 0; + int64_t log_ts = 1; + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id)); + sleep(2); + LSN log_tail = + leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_; + for (int64_t i = 0; i < 4096; i++) { + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->enable_vote()); + } + while (LSN(4096 * 4096 + log_tail.val_) != + leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_) + { + sleep(1); + } + } + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_base_lsn(LSN(0))); + } +} + +TEST_F(TestObSimpleLogClusterRestart, restart_and_clear_tmp_files) +{ + SET_CASE_LOG_FILE(TEST_NAME, "restart_and_clear_tmp_files"); + ObTimeGuard guard("restart_and_clear_tmp_files", 0); + const int64_t id = ATOMIC_AAF(&palf_id_, 1); + int64_t leader_idx = 0; + std::string logserver_dir; + { + PalfHandleImplGuard leader; + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); + guard.click("create"); + logserver_dir = leader.palf_env_impl_->log_dir_; + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, leader_idx, 1 * 1024 * 1024)); + guard.click("submit_log"); + while (leader.palf_handle_impl_->get_end_lsn() + < LSN(100 * 1024 * 1024ul)) { + usleep(100 * 1000); + } + } + const std::string base_dir = logserver_dir; + const std::string tmp_1_dir = base_dir + "/10000.tmp/log/"; + const std::string mkdir_tmp_1 = "mkdir -p " + tmp_1_dir; + const std::string dir_2 = base_dir + "/10000000"; + const std::string dir_normal_file = base_dir + "/10000000/log/1"; + const std::string dir_normal_file1 = base_dir + "/10000000/meta/"; + const std::string mkdir_2 = "mkdir -p " + dir_normal_file; + const std::string mkdir_3 = "mkdir -p " + dir_normal_file1; + system(mkdir_tmp_1.c_str()); + system(mkdir_2.c_str()); + system(mkdir_3.c_str()); + int ret = OB_SUCCESS; + guard.click("mkdir"); + EXPECT_EQ(OB_ERR_UNEXPECTED, restart_paxos_groups()); + CLOG_LOG(INFO, "after restart_paxos_groups after exist tmp dir"); + guard.click("restart"); + const std::string rm_dir_2 = "rm -rf " + dir_2; + system(rm_dir_2.c_str()); + guard.click("rm_dir"); + if (OB_FAIL(restart_paxos_groups())) { + PALF_LOG(ERROR, "restart_paxos_groups failed", K(ret)); + } else { + { + CLOG_LOG(INFO, "after restart_paxos_groups after remove tmp dir"); + guard.click("restart"); + bool result = false; + EXPECT_EQ(OB_SUCCESS, + common::FileDirectoryUtils::is_exists(tmp_1_dir.c_str(), result)); + EXPECT_EQ(result, false); + PalfHandleImplGuard leader1; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader1, leader_idx)); + guard.click("get_leader"); + LogStorage *log_storage = + &leader1.palf_handle_impl_->log_engine_.log_storage_; + LSN lsn_origin_log_tail = log_storage->get_log_tail_guarded_by_lock_(); + EXPECT_EQ(OB_SUCCESS, submit_log(leader1, 10, leader_idx, 1 * 1024 * 1024)); + while (log_storage->log_tail_ == lsn_origin_log_tail) { + usleep(1 * 1000); + PALF_LOG(INFO, "log_tail is same", KPC(log_storage), K(lsn_origin_log_tail)); + } + guard.click("submit_log"); + EXPECT_EQ(OB_ITER_END, read_log(leader1)); + guard.click("read_log"); + PALF_LOG(INFO, "finish read_log", KPC(log_storage), K(lsn_origin_log_tail), KPC(leader1.palf_handle_impl_)); + } + // 验证tenant下有临时文件的场景,该临时文件需要归还给log_pool + { + PalfHandleImplGuard leader1; + int64_t leader_idx1 = 0; + EXPECT_EQ(OB_SUCCESS, get_leader(id, leader1, leader_idx1)); + std::string palf_log_dir = leader1.palf_handle_impl_->log_engine_.log_storage_.block_mgr_.log_dir_; + ObISimpleLogServer *i_server = get_cluster()[leader_idx1]; + ObSimpleLogServer *server = dynamic_cast(i_server); + std::string log_pool = server->log_block_pool_.log_pool_path_; + const block_id_t min_block_id = server->log_block_pool_.min_block_id_; + char src[1024] = {'\0'}; + char dest[1024] = {'\0'}; + block_id_to_tmp_string(10000, dest, 1024); + block_id_to_string(min_block_id, src, 1024); + std::string src_str = log_pool + "/" + src; + std::string dest_str = palf_log_dir + "/" + dest; + std::string mv_system = "mv " + src_str + " " + dest_str; + system(mv_system.c_str()); + bool result1 = false; + EXPECT_EQ(OB_SUCCESS, + common::FileDirectoryUtils::is_exists(dest_str.c_str(), result1)); + EXPECT_EQ(true, result1); + leader1.reset(); + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + EXPECT_EQ(OB_SUCCESS, + common::FileDirectoryUtils::is_exists(dest_str.c_str(), result1)); + EXPECT_EQ(false, result1); + } + EXPECT_EQ(OB_SUCCESS, remove_dir()); + EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); + } + EXPECT_EQ(OB_SUCCESS, ret); + PALF_LOG(INFO, "end test restart", K(id), K(guard)); +} + +} // namespace unittest +} // namespace oceanbase + +int main(int argc, char **argv) +{ + RUN_SIMPLE_LOG_CLUSTER_TEST(TEST_NAME); +} diff --git a/mittest/logservice/test_ob_simple_log_single_replica_func.cpp b/mittest/logservice/test_ob_simple_log_single_replica_func.cpp index 28f7b666a3..e4d793b27a 100644 --- a/mittest/logservice/test_ob_simple_log_single_replica_func.cpp +++ b/mittest/logservice/test_ob_simple_log_single_replica_func.cpp @@ -191,38 +191,6 @@ TEST_F(TestObSimpleLogClusterSingleReplica, delete_paxos_group) PALF_LOG(INFO, "end test delete_paxos_group", K(id)); } -TEST_F(TestObSimpleLogClusterSingleReplica, advance_base_lsn) -{ - SET_CASE_LOG_FILE(TEST_NAME, "advance_base_lsn"); - OB_LOGGER.set_log_level("INFO"); - const int64_t id = ATOMIC_AAF(&palf_id_, 1); - PALF_LOG(INFO, "start advance_base_lsn", K(id)); - int64_t leader_idx = 0; - int64_t log_ts = 1; - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id)); - sleep(2); - LSN log_tail = - leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_; - for (int64_t i = 0; i < 4096; i++) { - EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->enable_vote()); - } - while (LSN(4096 * 4096 + log_tail.val_) != - leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_) - { - sleep(1); - } - } - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_base_lsn(LSN(0))); - } -} - TEST_F(TestObSimpleLogClusterSingleReplica, single_replica_flashback) { SET_CASE_LOG_FILE(TEST_NAME, "single_replica_flashback"); @@ -717,172 +685,6 @@ TEST_F(TestObSimpleLogClusterSingleReplica, test_meta) } } -TEST_F(TestObSimpleLogClusterSingleReplica, test_restart) -{ - SET_CASE_LOG_FILE(TEST_NAME, "test_restart"); - int64_t id = ATOMIC_AAF(&palf_id_, 1); - int64_t leader_idx = 0; - char meta_fd[OB_MAX_FILE_NAME_LENGTH] = {'\0'}; - char log_fd[OB_MAX_FILE_NAME_LENGTH] = {'\0'}; - ObServerLogBlockMgr *pool = NULL; - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); - wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); - LogEngine *log_engine = &leader.palf_handle_impl_->log_engine_; - char *meta_log_dir = log_engine->log_meta_storage_.block_mgr_.log_dir_; - char *log_dir = log_engine->log_storage_.block_mgr_.log_dir_; - EXPECT_EQ(OB_SUCCESS, get_log_pool(leader_idx, pool)); - char *pool_dir = pool->log_pool_path_; - snprintf(meta_fd, OB_MAX_FILE_NAME_LENGTH, "mv %s/%d %s/%d", meta_log_dir, 0, pool_dir, 10000000); - snprintf(log_fd, OB_MAX_FILE_NAME_LENGTH, "mv %s/%d %s/%d", log_dir, 0, pool_dir, 100000001); - system(meta_fd); - } - OB_LOGGER.set_log_level("TRACE"); - sleep(1); - EXPECT_EQ(OB_ERR_UNEXPECTED, restart_paxos_groups()); - system(log_fd); - PALF_LOG(INFO, "first restart_paxos_groups, after meta dir is empty while log dir is not"); - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - - // 验证切文件过程中宕机重启 - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 33, id, MAX_LOG_BODY_SIZE)); - wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); - block_id_t min_block_id, max_block_id; - LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; - LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; - EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); - EXPECT_EQ(1, max_block_id); - // 模拟只switch block,但没有更新manifest, 此时manifest依旧是1, 宕机重启后由于2号文件为空,manifest会被更新为2 - EXPECT_EQ(OB_SUCCESS, log_storage->truncate(LSN(PALF_BLOCK_SIZE))); - EXPECT_EQ(OB_SUCCESS, log_storage->update_manifest_(1)); - EXPECT_EQ(PALF_BLOCK_SIZE, log_storage->curr_block_writable_size_); - EXPECT_EQ(1, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); - } - PALF_LOG(INFO, "second restart_paxos_groups after restart in process of switch block"); - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - //检查manifest是否为3 - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); - LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; - EXPECT_EQ(2, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); - } - PALF_LOG(INFO, "third restart_paxos_groups"); - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - // 验证重启后新建日志流 - { - PalfHandleImplGuard leader; - id = ATOMIC_AAF(&palf_id_, 1); - EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 66, id, MAX_LOG_BODY_SIZE)); - wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); - EXPECT_EQ(OB_ITER_END, read_log(leader)); - } - // 验证truncate或flashback过程中,修改完manifest后,删除文件前宕机重启(删除1个文件) - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - block_id_t min_block_id, max_block_id; - // 此时manifest为3 - LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; - LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; - EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); - EXPECT_EQ(2, max_block_id); - EXPECT_EQ(3, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); - // truncate 或 flashback会先更新manifest为2 - EXPECT_EQ(OB_SUCCESS, log_storage->update_manifest_(2)); - EXPECT_EQ(2, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); - } - PALF_LOG(INFO, "fourth restart_paxos_groups after modify manifest while not delete block"); - // 验证truncate或flashback过程中,修改完manifest后,truncaet/flashback正好将最后一个文件清空 - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - block_id_t min_block_id, max_block_id; - // 此时manifest为2 - LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; - LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; - EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); - EXPECT_EQ(2, max_block_id); - // 尽管manifest为2,但在这种场景下,2号文件是可以删除的 - EXPECT_EQ(2, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); - EXPECT_EQ(OB_SUCCESS, log_storage->truncate(LSN(2*PALF_BLOCK_SIZE))); - EXPECT_EQ(OB_SUCCESS, log_storage->update_manifest_(2)); - } - PALF_LOG(INFO, "five restart_paxos_groups after modify manifest and last block is empty"); - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - block_id_t min_block_id, max_block_id; - // 重启之后,由于磁盘上最大的文件为2,同时该文件为空,此时会更新manifest为3 - LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; - LogStorage *meta_storage = &leader.get_palf_handle_impl()->log_engine_.log_meta_storage_; - EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); - EXPECT_EQ(2, max_block_id); - EXPECT_EQ(3, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); - wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); - EXPECT_EQ(3, lsn_2_block(meta_storage->log_block_header_.min_lsn_, PALF_BLOCK_SIZE)); - } - PALF_LOG(INFO, "six restart_paxos_groups"); - // 验证base lsn 大于持久化的committed 位点 - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; - LogIOWorker *iow = leader.palf_handle_impl_->log_engine_.log_io_worker_; - int64_t epoch = leader.palf_handle_impl_->log_engine_.palf_epoch_; - int64_t palf_id = leader.palf_handle_impl_->palf_id_; - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 29, id, MAX_LOG_BODY_SIZE)); - EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); - // 预期log_tail接近文件2的尾部 - EXPECT_LE(LSN(3*PALF_BLOCK_SIZE) - log_storage->log_tail_, 5*1024*1024); - EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_end_lsn())); - sleep(1); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id, 1000)); - wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader); - EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); - IOTaskConsumeCond cond(palf_id, epoch); - EXPECT_EQ(OB_SUCCESS, iow->submit_io_task(&cond)); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id, MAX_LOG_BODY_SIZE)); - while (1) { - if (leader.palf_handle_impl_->sw_.last_submit_end_lsn_ < leader.palf_handle_impl_->get_max_lsn()) { - usleep(5000); - leader.palf_handle_impl_->sw_.freeze_mode_ = FEEDBACK_FREEZE_MODE; - leader.palf_handle_impl_->sw_.feedback_freeze_last_log_(); - PALF_LOG(INFO, "has log in sw", "last_submit_end_lsn", leader.palf_handle_impl_->sw_.last_submit_end_lsn_, - "max_lsn", leader.palf_handle_impl_->get_max_lsn()); - } else { - break; - } - } - cond.cond_.signal(); - EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); - PALF_LOG(INFO, "after wait_lsn_until_flushed", "end_lsn:", leader.palf_handle_impl_->get_end_lsn(), - "max_lsn:", leader.palf_handle_impl_->get_end_lsn()); - EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_max_lsn())); - EXPECT_GE(leader.palf_handle_impl_->get_max_lsn(), LSN(3*PALF_BLOCK_SIZE)); - EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_base_lsn(LSN(3*PALF_BLOCK_SIZE))); - } - EXPECT_EQ(OB_SUCCESS, restart_paxos_groups()); - PALF_LOG(INFO, "seven restart_paxos_groups after committed lsn is smaller than base lsn"); - { - PalfHandleImplGuard leader; - EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx)); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id, 1000)); - EXPECT_EQ(OB_SUCCESS, wait_lsn_until_flushed(leader.palf_handle_impl_->get_max_lsn(), leader)); - } -} - TEST_F(TestObSimpleLogClusterSingleReplica, test_iterator) { SET_CASE_LOG_FILE(TEST_NAME, "test_iterator"); @@ -1998,55 +1800,6 @@ TEST_F(TestObSimpleLogClusterSingleReplica, test_iterator_with_flashback) } } -TEST_F(TestObSimpleLogClusterSingleReplica, read_block_in_flashback) -{ - disable_hot_cache_ = true; - SET_CASE_LOG_FILE(TEST_NAME, "read_block_in_flashback"); - OB_LOGGER.set_log_level("TRACE"); - const int64_t id = ATOMIC_AAF(&palf_id_, 1); - int64_t leader_idx = 0; - PalfHandleImplGuard leader; - PalfEnv *palf_env = NULL; - EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, leader_idx, leader)); - - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 2 * 32 + 2, id, MAX_LOG_BODY_SIZE)); - EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.get_palf_handle_impl()->get_max_lsn())); - - block_id_t min_block_id, max_block_id; - LogStorage *log_storage = &leader.get_palf_handle_impl()->log_engine_.log_storage_; - EXPECT_EQ(OB_SUCCESS, log_storage->get_block_id_range(min_block_id, max_block_id)); - EXPECT_EQ(2, max_block_id); - SCN scn; - char block_name_tmp[OB_MAX_FILE_NAME_LENGTH]; - EXPECT_EQ(OB_SUCCESS, block_id_to_tmp_string(max_block_id, block_name_tmp, OB_MAX_FILE_NAME_LENGTH)); - char block_name[OB_MAX_FILE_NAME_LENGTH]; - EXPECT_EQ(OB_SUCCESS, block_id_to_string(max_block_id, block_name, OB_MAX_FILE_NAME_LENGTH)); - ::renameat(log_storage->block_mgr_.dir_fd_, block_name, log_storage->block_mgr_.dir_fd_, block_name_tmp); - EXPECT_EQ(-1, ::openat(log_storage->block_mgr_.dir_fd_, block_name, LOG_READ_FLAG)); - EXPECT_EQ(OB_NEED_RETRY, read_log(leader)); - EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn)); - - // 测试边界场景,read_log_tail_为文件中间,最后一个文件完全被flashback掉, 此时log_tail_是最后一个文件头 - log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE); - EXPECT_EQ(OB_NEED_RETRY, read_log(leader)); - EXPECT_EQ(OB_NEED_RETRY, log_storage->get_block_min_scn(max_block_id, scn)); - - // 测试边界场景,read_log_tail_最后一个文件头,最后一个文件完全被flashback掉 - log_storage->log_tail_ = LSN(2*PALF_BLOCK_SIZE); - log_storage->readable_log_tail_ = LSN(2*PALF_BLOCK_SIZE); - EXPECT_EQ(OB_ITER_END, read_log(leader)); - EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn)); - - // 测试边界场景,readable_log_tail_还没改变前检验是否可读通过,直接读文件时报错文件不存在。 - log_storage->log_tail_ = LSN(3*PALF_BLOCK_SIZE); - log_storage->readable_log_tail_ = LSN(3*PALF_BLOCK_SIZE); - // 设置max_block_id_为1是为了构造check_read_out_of_bound返回OB_ERR_OUT_OF_UPPER_BOUND的场景 - log_storage->block_mgr_.max_block_id_ = 1; - // log_storage返回OB_ERR_OUT_OF_UPPER_BOUND, iterator将其转换为OB_ITER_END - EXPECT_EQ(OB_ITER_END, read_log(leader)); - EXPECT_EQ(OB_ERR_OUT_OF_UPPER_BOUND, log_storage->get_block_min_scn(max_block_id, scn)); -} - } // namespace unittest } // namespace oceanbase diff --git a/src/logservice/palf/log_engine.cpp b/src/logservice/palf/log_engine.cpp index e282b53e5b..d7b5e61863 100644 --- a/src/logservice/palf/log_engine.cpp +++ b/src/logservice/palf/log_engine.cpp @@ -1455,7 +1455,7 @@ int LogEngine::try_clear_up_holes_and_check_storage_integrity_( block_id_t min_block_id = LOG_INVALID_BLOCK_ID; block_id_t max_block_id = LOG_INVALID_BLOCK_ID; int64_t logical_block_size = 0; - + const LSN log_storage_tail = log_storage_.get_end_lsn(); if (OB_FAIL(log_storage_.get_logical_block_size(logical_block_size))) { PALF_LOG(WARN, "get_logical_block_size failed", K(ret), K_(palf_id), K_(is_inited)); } else if (FALSE_IT(base_block_id = lsn_2_block(base_lsn, logical_block_size))) { @@ -1475,7 +1475,10 @@ int LogEngine::try_clear_up_holes_and_check_storage_integrity_( } else { ret = OB_SUCCESS; } - } else if (!last_group_entry_header.is_valid()) { + // If log_storage_ is not empty but last_group_entry_header is invalid, unexpected error. + // For rebuild, the base_lsn may be greater than the log_tail of LogStorage because we + // update LogSnapshotMeta firstly. + } else if (log_storage_.get_end_lsn() != base_lsn && !last_group_entry_header.is_valid()) { ret = OB_ERR_UNEXPECTED; PALF_LOG(ERROR, "unexpected error, LogStorage are not empty bus last log entry is invalid", K(last_entry_begin_lsn), K(expected_next_block_id), K(last_group_entry_header)); diff --git a/src/logservice/palf/palf_env_impl.cpp b/src/logservice/palf/palf_env_impl.cpp index 65908db32d..29c9cce47e 100644 --- a/src/logservice/palf/palf_env_impl.cpp +++ b/src/logservice/palf/palf_env_impl.cpp @@ -170,7 +170,7 @@ PalfEnvImpl::PalfEnvImpl() : palf_meta_lock_(common::ObLatchIds::PALF_ENV_LOCK), log_updater_(), monitor_(NULL), disk_options_wrapper_(), - check_disk_print_log_interval_(OB_INVALID_TIMESTAMP), + disk_not_enough_print_interval_(OB_INVALID_TIMESTAMP), self_(), palf_handle_impl_map_(64), // 指定min_size=64 last_palf_epoch_(0), @@ -333,6 +333,7 @@ void PalfEnvImpl::destroy() election_timer_.destroy(); log_alloc_mgr_ = NULL; monitor_ = NULL; + disk_not_enough_print_interval_ = OB_INVALID_TIMESTAMP; self_.reset(); log_dir_[0] = '\0'; tmp_log_dir_[0] = '\0'; @@ -719,6 +720,8 @@ int PalfEnvImpl::try_recycle_blocks() ATOMIC_STORE(&diskspace_enough_, curr_diskspace_enough); } if ((true == need_recycle && false == has_recycled && false == is_shrinking) || false == diskspace_enough_) { + constexpr int64_t INTERVAL = 1*1000*1000; + if (palf_reach_time_interval(INTERVAL, disk_not_enough_print_interval_)) { int tmp_ret = OB_LOG_OUTOF_DISK_SPACE; LOG_DBA_ERROR(OB_LOG_OUTOF_DISK_SPACE, "msg", "log disk space is almost full", "ret", tmp_ret, "total_size(MB)", disk_opts_for_recycling_blocks.log_disk_usage_limit_size_/MB, @@ -733,6 +736,7 @@ int PalfEnvImpl::try_recycle_blocks() "maximum_log_stream", palf_id, "oldest_log_stream", oldest_palf_id, "oldest_scn", oldest_scn); + } } else { if (REACH_TIME_INTERVAL(2 * 1000 * 1000L)) { PALF_LOG(INFO, "LOG_DISK_OPTION", K(disk_options_wrapper_)); diff --git a/src/logservice/palf/palf_env_impl.h b/src/logservice/palf/palf_env_impl.h index 7b9e4176b6..312209225b 100644 --- a/src/logservice/palf/palf_env_impl.h +++ b/src/logservice/palf/palf_env_impl.h @@ -346,7 +346,7 @@ private: PalfMonitorCb *monitor_; PalfDiskOptionsWrapper disk_options_wrapper_; - int64_t check_disk_print_log_interval_; + int64_t disk_not_enough_print_interval_; char log_dir_[common::MAX_PATH_SIZE]; char tmp_log_dir_[common::MAX_PATH_SIZE];