fixed restart failed because of the block size of Arb is 2MB

This commit is contained in:
HaHaJeff
2023-05-22 06:17:39 +00:00
committed by ob-robot
parent 12df65c37c
commit 2bb8488595
4 changed files with 158 additions and 9 deletions

View File

@ -434,7 +434,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_arb_with_highest_version)
leader.reset();
// block_net, so two F cann't reach majority
block_net(another_f_idx, leader_idx);
restart_paxos_groups();
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
const int64_t restart_finish_time_us_ = common::ObTimeUtility::current_time();
PalfHandleImplGuard new_leader;
@ -515,6 +515,128 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_defensive)
PALF_LOG(INFO, "end test_2f1a_defensive", K(id));
}
int get_palf_handle_lite(const int64_t tenant_id,
const int64_t palf_id,
ObSimpleArbServer *server,
IPalfHandleImplGuard &handle_guard)
{
int ret = OB_SUCCESS;
PalfEnvLiteGuard env_guard;
if (NULL == server) {
ret = OB_INVALID_ARGUMENT;
} else if (OB_FAIL(server->get_palf_env_lite(tenant_id, env_guard))) {
PALF_LOG(ERROR, "get_palf_env_lite failed", K(tenant_id), K(palf_id));
} else if (OB_FAIL(env_guard.palf_env_lite_->get_palf_handle_impl(palf_id, handle_guard))) {
PALF_LOG(ERROR, "get_palf_handle_impl failed", K(tenant_id), K(palf_id));
} else {
}
return ret;
}
using namespace palflite;
TEST_F(TestObSimpleLogClusterArbService, test_multi_meta_block)
{
SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_defensive");
OB_LOGGER.set_log_level("DEBUG");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_defensive");
int64_t leader_idx = 0;
int64_t arb_replica_idx = -1;
PalfHandleImplGuard leader;
std::vector<PalfHandleImplGuard*> palf_list;
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
common::ObMember dummy_member;
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
// 为备副本设置location cb,用于备副本找leader
const int64_t another_f_idx = (leader_idx+1)%3;
loc_cb.leader_ = leader.palf_handle_impl_->self_;
palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
sleep(2);
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
{
while (1) {
if (meta_storage->log_tail_ < LSN(meta_storage->logical_block_size_)) {
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
} else {
break;
}
}
}
meta_tail = log_engine->log_meta_storage_.log_tail_;
ASSERT_EQ(meta_tail, LSN(log_engine->log_meta_storage_.logical_block_size_));
revert_cluster_palf_handle_guard(palf_list);
arb_guard.reset();
leader.reset();
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
{
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
PalfHandleImplGuard leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
ASSERT_NE(meta_tail, LSN(log_engine->log_meta_storage_.logical_block_size_));
}
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
{
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
PalfHandleImplGuard leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
while (1) {
if (meta_storage->log_tail_ < LSN(32 * meta_storage->logical_block_size_)) {
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
} else {
break;
}
}
}
EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
{
ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
PalfHandleImplGuard leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 4000, id));
IPalfHandleImplGuard arb_guard;
ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
LogEngine *log_engine = &arb_palf->log_engine_;
LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
LogStorage *meta_storage = &log_engine->log_meta_storage_;
while (1) {
if (meta_storage->log_tail_ < LSN(34 * meta_storage->logical_block_size_ + 4*4*1024)) {
EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
} else {
break;
}
}
}
delete_paxos_group(id);
PALF_LOG(INFO, "end test_2f1a_defensive", K(id));
}
} // end unittest
} // end oceanbase

View File

@ -786,6 +786,8 @@ int LogEngine::get_total_used_disk_space(int64_t &total_used_size_byte,
block_id_t max_block_id = LOG_INVALID_BLOCK_ID;
int64_t log_storage_used = 0;
int64_t meta_storage_used = 0;
int64_t log_storage_logical_block_size = 0;
int64_t meta_storage_logical_block_size = 0;
// calc log storage used
if (OB_FAIL(get_block_id_range(min_block_id, max_block_id))
&& OB_ENTRY_NOT_EXIST != ret) {
@ -793,10 +795,14 @@ int LogEngine::get_total_used_disk_space(int64_t &total_used_size_byte,
} else if (OB_ENTRY_NOT_EXIST == ret) {
log_storage_used = 0;
ret = OB_SUCCESS;
} else if (OB_FAIL(log_storage_.get_logical_block_size(log_storage_logical_block_size))) {
PALF_LOG(WARN, "LogStorage get_logical_block_size failed", KPC(this));
} else if (OB_FAIL(log_meta_storage_.get_logical_block_size(meta_storage_logical_block_size))) {
PALF_LOG(WARN, "MetaStorage get_logical_block_size failed", KPC(this));
} else {
//usage calculation should be precise to avoid stopping writing when actually no need
log_storage_used = (max_block_id - min_block_id) * (PALF_BLOCK_SIZE + MAX_INFO_BLOCK_SIZE)
+ lsn_2_offset(log_storage_.get_end_lsn(), PALF_BLOCK_SIZE) + MAX_INFO_BLOCK_SIZE;
log_storage_used = (max_block_id - min_block_id) * (log_storage_logical_block_size + MAX_INFO_BLOCK_SIZE)
+ lsn_2_offset(log_storage_.get_end_lsn(), log_storage_logical_block_size) + MAX_INFO_BLOCK_SIZE;
PALF_LOG(TRACE, "log_storage_used size", K(min_block_id), K(max_block_id), K(log_storage_used));
}
// calc meta storage used
@ -804,7 +810,7 @@ int LogEngine::get_total_used_disk_space(int64_t &total_used_size_byte,
} else if (OB_FAIL(log_meta_storage_.get_block_id_range(min_block_id, max_block_id))) {
PALF_LOG(WARN, "get_block_id_range failed", K(ret), KPC(this));
} else {
meta_storage_used = PALF_META_BLOCK_SIZE + MAX_INFO_BLOCK_SIZE;
meta_storage_used = meta_storage_logical_block_size + MAX_INFO_BLOCK_SIZE;
total_used_size_byte = log_storage_used + meta_storage_used;
const int64_t unrecyclable_meta_size = meta_storage_used;
@ -1428,12 +1434,16 @@ int LogEngine::try_clear_up_holes_and_check_storage_integrity_(
const LSN base_lsn = log_meta_.get_log_snapshot_meta().base_lsn_;
const LogInfo prev_log_info = log_meta_.get_log_snapshot_meta().prev_log_info_;
const bool prev_log_info_is_valid = prev_log_info.is_valid();
const block_id_t base_block_id = lsn_2_block(base_lsn, PALF_BLOCK_SIZE);
// 'expected_next_block_id': ethier it's the empty block or not exist.
block_id_t base_block_id = LOG_INVALID_BLOCK_ID;
block_id_t min_block_id = LOG_INVALID_BLOCK_ID;
block_id_t max_block_id = LOG_INVALID_BLOCK_ID;
int64_t logical_block_size = 0;
if (OB_FAIL(log_storage_.get_block_id_range(min_block_id, max_block_id))
if (OB_FAIL(log_storage_.get_logical_block_size(logical_block_size))) {
PALF_LOG(WARN, "get_logical_block_size failed", K(ret), K_(palf_id), K_(is_inited));
} else if (FALSE_IT(base_block_id = lsn_2_block(base_lsn, logical_block_size))) {
} else if (OB_FAIL(log_storage_.get_block_id_range(min_block_id, max_block_id))
&& OB_ENTRY_NOT_EXIST != ret) {
PALF_LOG(ERROR, "get_block_id_range failed", K(ret), K_(palf_id), K_(is_inited));
} else if (OB_ENTRY_NOT_EXIST == ret) {

View File

@ -78,7 +78,7 @@ int LogStorage::init(const char *base_dir, const char *sub_dir, const LSN &base_
int LogStorage::load_manifest_for_meta_storage(block_id_t &expected_next_block_id)
{
int ret = OB_SUCCESS;
block_id_t log_tail_block_id = lsn_2_block(log_tail_, PALF_BLOCK_SIZE);
block_id_t log_tail_block_id = lsn_2_block(log_tail_, logical_block_size_);
// if last block is full, last_block_id will be the next block id of 'last block'
// NB: nowdays, there is no possible which last block is empty but the header of this block is valid.
block_id_t last_block_id = (0 == curr_block_writable_size_ ? log_tail_block_id - 1 : log_tail_block_id);
@ -94,7 +94,7 @@ int LogStorage::load_manifest_for_meta_storage(block_id_t &expected_next_block_i
read_block_header_(last_block_id, log_block_header_))) {
PALF_LOG(WARN, "read_block_header_ failed", K(ret), KPC(this));
} else {
expected_next_block_id= lsn_2_block(log_block_header_.get_min_lsn(), PALF_BLOCK_SIZE);
expected_next_block_id= lsn_2_block(log_block_header_.get_min_lsn(), logical_block_size_);
PALF_LOG(INFO, "load_manifest_for_meta_storage success", K(ret), KPC(this), K(expected_next_block_id));
}
return ret;
@ -527,7 +527,10 @@ int LogStorage::update_manifest_used_for_meta_storage(const block_id_t expected_
// log error in LogBlockMgr because 'log_tail_block_id' is not same as 'curr_writable_block_id'(LogBlockMgr)
// assume 'log_tail_' is equal to PALF_PHY_BLOCK_SIZE, 'log_tail_block_id' is 1, however
// 'curr_writable_block_id' is 0.
if (OB_FAIL(update_block_header_(last_block_id, LSN(expected_max_block_id*PALF_BLOCK_SIZE), SCN::min_scn()))) {
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
PALF_LOG(WARN, "LogMetaStorage not inited", KPC(this), K(expected_max_block_id));
} else if (OB_FAIL(update_block_header_(last_block_id, LSN(expected_max_block_id*logical_block_size_), SCN::min_scn()))) {
PALF_LOG(WARN, "append_block_header_ failed", K(ret), KPC(this), K(last_block_id), K(log_tail_block_id));
} else {
PALF_LOG(INFO, "update_manifest_used_for_meta_storage success", K(ret), KPC(this));
@ -887,5 +890,17 @@ int LogStorage::update_manifest_(const block_id_t expected_next_block_id, const
{
return update_manifest_cb_(expected_next_block_id, in_restart);
}
int LogStorage::get_logical_block_size(int64_t &logical_block_size) const
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
PALF_LOG(WARN, "LogStorage not init", KPC(this));
} else {
logical_block_size = logical_block_size_;
}
return ret;
}
} // end namespace palf
} // end namespace oceanbase

View File

@ -115,6 +115,8 @@ public:
int update_manifest_used_for_meta_storage(const block_id_t expected_max_block_id);
int get_logical_block_size(int64_t &logical_block_size) const;
TO_STRING_KV(K_(log_tail),
K_(readable_log_tail),
K_(log_block_header),