fixed no log callback because log disk full of arb replica

This commit is contained in:
HaHaJeff 2024-02-10 06:34:00 +00:00 committed by ob-robot
parent 5ba5ccbace
commit 7325f3b22f
7 changed files with 107 additions and 16 deletions

View File

@ -284,6 +284,49 @@ TEST_F(TestObSimpleMutilArbServer, create_mutil_cluster)
EXPECT_FALSE(palf_env_mgr->is_cluster_placeholder_exists(cluster_ids[0]));
}
TEST_F(TestObSimpleMutilArbServer, restart_arb)
{
SET_CASE_LOG_FILE(TEST_NAME, "restart_arb");
OB_LOGGER.set_log_level("TRACE");
ObISimpleLogServer *iserver = get_cluster()[0];
EXPECT_EQ(true, iserver->is_arb_server());
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(iserver);
palflite::PalfEnvLiteMgr *palf_env_mgr = &arb_server->palf_env_mgr_;
std::vector<int64_t> cluster_ids = {2, 3, 4, 5, 6, 7};
arbserver::GCMsgEpoch epoch = arbserver::GCMsgEpoch(1, 1);
// test add tenant without cluster, generate placeholder
EXPECT_EQ(OB_SUCCESS, palf_env_mgr->create_palf_env_lite(palflite::PalfEnvKey(cluster_ids[0], 1)));
EXPECT_TRUE(palf_env_mgr->is_cluster_placeholder_exists(cluster_ids[0]));
palflite::PalfEnvLite *palf_env_lite = NULL;
IPalfHandleImpl *ipalf_handle_impl = NULL;
{
PalfBaseInfo info; info.generate_by_default();
AccessMode mode(palf::AccessMode::APPEND);
EXPECT_EQ(OB_SUCCESS, palf_env_mgr->get_palf_env_lite(palflite::PalfEnvKey(cluster_ids[0], 1), palf_env_lite));
EXPECT_EQ(OB_SUCCESS, palf_env_lite->create_palf_handle_impl(1, mode, info, ipalf_handle_impl));
palflite::PalfHandleLite *palf_handle_lite = dynamic_cast<palflite::PalfHandleLite*>(ipalf_handle_impl);
ASSERT_NE(nullptr, palf_handle_lite);
LogEngine *log_engine = &palf_handle_lite->log_engine_;
LogMeta log_meta = log_engine->log_meta_;
int count = (2 * 1024 * 1024 - log_engine->log_meta_storage_.log_tail_.val_) / 4096;
while (count > 0) {
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_meta));
count --;
}
while (log_engine->log_meta_storage_.log_tail_ != LSN(2*1024*1024)) {
sleep(1);
}
sleep(1);
EXPECT_EQ(2*1024*1024, log_engine->log_meta_storage_.log_tail_);
EXPECT_EQ(OB_SUCCESS, log_engine->log_meta_storage_.block_mgr_.switch_next_block(1));
}
palf_env_lite->revert_palf_handle_impl(ipalf_handle_impl);
palf_env_mgr->revert_palf_env_lite(palf_env_lite);
EXPECT_EQ(OB_SUCCESS, restart_server(0));
}
} // end unittest
} // end oceanbase

View File

@ -262,7 +262,7 @@ TEST_F(TestObSimpleLogDiskMgr, update_disk_options_restart)
int64_t log_disk_usage, total_log_disk_size;
EXPECT_EQ(OB_SUCCESS, get_palf_env(leader_idx, palf_env));
usleep(2*ObLooper::INTERVAL_US);
usleep(1000*1000 + BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS);
usleep(2*1000*1000 + BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS);
EXPECT_EQ(true, palf_env->palf_env_impl_.diskspace_enough_);
EXPECT_EQ(PalfDiskOptionsWrapper::Status::SHRINKING_STATUS,
palf_env->palf_env_impl_.disk_options_wrapper_.status_);
@ -286,7 +286,7 @@ TEST_F(TestObSimpleLogDiskMgr, update_disk_options_restart)
// 于是日志盘变为正常状态
EXPECT_EQ(OB_SUCCESS, update_disk_options(leader_idx, 16));
// 在下一轮GC任务运行后,本地持久化的log_disk_size也会变为16*PALF_PHY_BLOCK_SIZE
usleep(1000*1000+palf::BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS);
usleep(2*1000*1000+palf::BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS);
// 经过一轮GC后,会变为NORMAL_STATUS
EXPECT_EQ(true, palf_env->palf_env_impl_.diskspace_enough_);
EXPECT_EQ(PalfDiskOptionsWrapper::Status::NORMAL_STATUS,
@ -294,7 +294,7 @@ TEST_F(TestObSimpleLogDiskMgr, update_disk_options_restart)
EXPECT_EQ(16*PALF_PHY_BLOCK_SIZE,
palf_env->palf_env_impl_.disk_options_wrapper_.disk_opts_for_recycling_blocks_.log_disk_usage_limit_size_);
// 后台线程会完成缩容操作,最终本地持久化的变为16*PALF_PHY_BLOCK_SIZE
usleep(ObLooper::INTERVAL_US*2);
usleep(2*1000*1000+ObLooper::INTERVAL_US*2);
EXPECT_EQ(OB_SUCCESS, get_disk_options(leader_idx, opts));
EXPECT_EQ(opts.log_disk_usage_limit_size_, 16*PALF_PHY_BLOCK_SIZE);
}
@ -325,7 +325,7 @@ TEST_F(TestObSimpleLogDiskMgr, overshelling)
if (opts.log_disk_usage_limit_size_ == 16*PALF_PHY_BLOCK_SIZE) {
EXPECT_EQ(16*PALF_PHY_BLOCK_SIZE, log_disk_size_used_for_tenants);
// 缩容不会立马生效
usleep(ObLooper::INTERVAL_US*2);
usleep(2*1000*1000+ObLooper::INTERVAL_US*2);
EXPECT_EQ(15*PALF_PHY_BLOCK_SIZE, log_pool->min_log_disk_size_for_all_tenants_);
} else {
PALF_LOG(INFO, "update_disk_options successfully", K(log_disk_size_used_for_tenants), K(opts));
@ -353,7 +353,7 @@ TEST_F(TestObSimpleLogDiskMgr, overshelling)
EXPECT_EQ(OB_SUCCESS, wait_until_has_committed(leader, leader.palf_handle_impl_->get_max_lsn()));
EXPECT_EQ(OB_SUCCESS, update_disk_options(leader_idx, 10));
// 缩容一定不会成功,租户日志盘规格依旧为上限值
usleep(ObLooper::INTERVAL_US * 2);
usleep(2*1000*1000+ObLooper::INTERVAL_US * 2);
EXPECT_EQ(OB_SUCCESS, get_palf_env(leader_idx, palf_env));
EXPECT_EQ(PalfDiskOptionsWrapper::Status::SHRINKING_STATUS,
palf_env->palf_env_impl_.disk_options_wrapper_.status_);
@ -419,7 +419,7 @@ TEST_F(TestObSimpleLogDiskMgr, hidden_sys)
EXPECT_EQ(0, disk_opts.log_disk_usage_limit_size_);
EXPECT_EQ(PalfDiskOptionsWrapper::Status::SHRINKING_STATUS,
palf_env->palf_env_impl_.disk_options_wrapper_.status_);
usleep(palf::BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS + 5*10000);
usleep(palf::BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS + 5*100000);
usleep(ObLooper::INTERVAL_US * 2);
EXPECT_EQ(PalfDiskOptionsWrapper::Status::NORMAL_STATUS,
palf_env->palf_env_impl_.disk_options_wrapper_.status_);

View File

@ -418,20 +418,27 @@ TEST_F(TestObSimpleLogClusterRestart, advance_base_lsn_with_restart)
sleep(2);
LSN log_tail =
leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_;
for (int64_t i = 0; i < 4096; i++) {
int count = (LSN(PALF_META_BLOCK_SIZE) - log_tail)/4096;
for (int64_t i = 0; i < count; i++) {
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->enable_vote());
}
while (LSN(4096 * 4096 + log_tail.val_) !=
while (LSN(PALF_META_BLOCK_SIZE) !=
leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_)
{
sleep(1);
}
sleep(1);
EXPECT_EQ(LSN(PALF_META_BLOCK_SIZE), leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_);
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->log_engine_.log_meta_storage_.block_mgr_.switch_next_block(1));
}
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
{
PalfHandleImplGuard leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
EXPECT_LT(LSN(PALF_META_BLOCK_SIZE), leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_);
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_base_lsn(LSN(0)));
sleep(1);
EXPECT_LT(LSN(PALF_META_BLOCK_SIZE) + 4096, leader.palf_handle_impl_->log_engine_.log_meta_storage_.log_tail_);
}
}

View File

@ -67,7 +67,7 @@ int LogBlockMgr::init(const char *log_dir,
PALF_LOG(ERROR, "init curr_writable_handler_ failed", K(ret), K(log_dir));
} else if (OB_FAIL(do_scan_dir_(log_dir, initial_block_id, log_block_pool))) {
PALF_LOG(ERROR, "do_scan_dir_ failed", K(ret), K(log_dir));
} else if (OB_FAIL(try_recovery_last_block_(log_dir))) {
} else if (OB_FAIL(try_recovery_last_block_(log_dir, log_block_size))) {
PALF_LOG(ERROR, "try_recovery_last_block_ failed", K(ret), KPC(this));
} else {
MEMCPY(log_dir_, log_dir, OB_MAX_FILE_NAME_LENGTH);
@ -503,7 +503,8 @@ bool LogBlockMgr::empty_() const
return min_block_id_ == max_block_id_;
}
int LogBlockMgr::try_recovery_last_block_(const char *log_dir)
int LogBlockMgr::try_recovery_last_block_(const char *log_dir,
const int64_t log_block_size)
{
int ret = OB_SUCCESS;
int64_t file_size = 0;
@ -516,9 +517,9 @@ int LogBlockMgr::try_recovery_last_block_(const char *log_dir)
PALF_LOG(WARN, "convert_to_normal_block failed", K(ret), K(block_id));
} else if (OB_FAIL(FileDirectoryUtils::get_file_size(block_path, file_size))) {
PALF_LOG(WARN, "get_file_size failed", K(ret), K(block_path));
} else if (file_size == PALF_PHY_BLOCK_SIZE) {
} else if (file_size == log_block_size) {
PALF_LOG(INFO, "last block no need to recovery", K(block_id));
} else if (-1 == ::truncate(block_path, PALF_PHY_BLOCK_SIZE)) {
} else if (-1 == ::truncate(block_path, log_block_size)) {
ret = convert_sys_errno();
PALF_LOG(ERROR, "ftruncate failed", K(ret), KPC(this), K(file_size));
} else {

View File

@ -92,7 +92,7 @@ private:
int do_scan_dir_(const char *dir, const block_id_t initial_block_id, ILogBlockPool *log_block_pool);
int do_rename_and_fsync_(const char *block_path, const char *tmp_block_path);
bool empty_() const;
int try_recovery_last_block_(const char *log_dir);
int try_recovery_last_block_(const char *log_dir, const int64_t log_block_size);
int check_after_truncate_(const char *block_path, const offset_t offset);
const int64_t SLEEP_TS_US = 1 * 1000;

View File

@ -85,11 +85,15 @@ int LogStorage::load_manifest_for_meta_storage(block_id_t &expected_next_block_i
{
int ret = OB_SUCCESS;
block_id_t log_tail_block_id = lsn_2_block(log_tail_, logical_block_size_);
// if last block is full, last_block_id will be the next block id of 'last block'
// NB: nowdays, there is no possible which last block is empty but the header of this block is valid.
block_id_t last_block_id = (0 == curr_block_writable_size_ ? log_tail_block_id - 1 : log_tail_block_id);
block_id_t log_tail_offset = lsn_2_offset(log_tail_, logical_block_size_);
// if last block is full or empty, last_block_id will be the next block id of 'last block',
// the valid block header is in prev block.
block_id_t last_block_id = (0 == log_tail_offset ? log_tail_block_id - 1 : log_tail_block_id);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
} else if (0 == log_tail_offset && 0 == log_tail_block_id) {
ret = OB_ERR_UNEXPECTED;
PALF_LOG(ERROR, "unexpected error, there is no valid meta at first block", KPC(this));
// NB: nowdays, we not support switch block when updat manifest failed, therefore, we don't need
// handle this case.
//

View File

@ -63,11 +63,47 @@ TEST(TestPalfEnvLiteMgr, test_load_cluster_placeholder)
}
}
TEST(TestPalfEnvLiteMgr, test_create_delete_palf)
{
PalfEnvLiteMgr mgr;
std::string base_dir = "create_delete_palf";
strcpy(mgr.base_dir_, base_dir.c_str());
string mkdir_cmd = "mkdir " + base_dir;
string rmdir_cmd = "rmdir " + base_dir;
system(rmdir_cmd.c_str());
system(mkdir_cmd.c_str());
std::string log_dir = "runlin_test";
EXPECT_EQ(OB_SUCCESS, mgr.check_and_prepare_dir(log_dir.c_str()));
EXPECT_EQ(OB_SUCCESS, mgr.check_and_prepare_dir(log_dir.c_str()));
EXPECT_EQ(OB_SUCCESS, mgr.remove_dir(log_dir.c_str()));
EXPECT_EQ(OB_SUCCESS, mgr.remove_dir_while_exist(log_dir.c_str()));
}
TEST(TestPalfEnvLiteMgr, test_create_block)
{
DummyBlockPool dbp;
int dir_fd = -1;
std::string test_dir = "test_create_block";
std::string mkdir_cmd = "mkdir -p " + test_dir;
std::string rmdir_cmd = "rm -rf " + test_dir;
system(rmdir_cmd.c_str());
const int64_t block_size = 2 * 1024 * 1024;
system(mkdir_cmd.c_str());
dir_fd = ::open(test_dir.c_str(), O_DIRECTORY | O_RDONLY);
EXPECT_NE(-1, dir_fd);
std::string block_path = "1";
EXPECT_EQ(OB_SUCCESS, dbp.create_block_at(dir_fd, block_path.c_str(), block_size));
EXPECT_EQ(OB_SUCCESS, dbp.create_block_at(dir_fd, block_path.c_str(), block_size));
EXPECT_EQ(OB_SUCCESS, dbp.remove_block_at(dir_fd, block_path.c_str()));
EXPECT_EQ(OB_NO_SUCH_FILE_OR_DIRECTORY, dbp.remove_block_at(dir_fd, block_path.c_str()));
}
} // end of unittest
} // end of oceanbase
int main(int argc, char **argv)
{
system("rm -rf test_palf_env_lite_mgr.log*");
OB_LOGGER.set_file_name("test_palf_env_lite_mgr.log", true);
OB_LOGGER.set_log_level("INFO");
PALF_LOG(INFO, "begin unittest::test_palf_env_lite_mgr");