Bugfix: inspect_bad_block cannot atomically increase MacroBlock's refcnt.

This commit is contained in:
ND501 2024-02-10 06:50:51 +00:00 committed by ob-robot
parent 39e144d7dd
commit f8c2bab01d
3 changed files with 70 additions and 25 deletions

View File

@ -497,10 +497,12 @@ int64_t ObBlockManager::get_used_macro_block_count() const
}
int ObBlockManager::get_macro_block_info(const MacroBlockId &macro_id,
ObMacroBlockInfo &macro_block_info) const
ObMacroBlockInfo &macro_block_info,
ObMacroBlockHandle &macro_block_handle)
{
int ret = OB_SUCCESS;
BlockInfo block_info;
bool has_inc_ref = false;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -508,14 +510,44 @@ int ObBlockManager::get_macro_block_info(const MacroBlockId &macro_id,
} else if (OB_UNLIKELY(!macro_id.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument, ", K(ret), K(macro_id));
} else if (OB_FAIL(block_map_.get(macro_id, block_info))) {
//BUG, should not happen
LOG_ERROR("fatal error, this block should be in block map", K(ret), K(macro_id));
} else {
macro_block_info.is_free_ = 0 == block_info.ref_cnt_;
macro_block_info.ref_cnt_ = block_info.ref_cnt_;
macro_block_info.access_time_ = block_info.last_write_time_;
ObBucketHashWLockGuard lock_guard(bucket_lock_, macro_id.hash());
if (OB_FAIL(block_map_.get(macro_id, block_info)) && ret != OB_HASH_NOT_EXIST) {
// BUG, should not happen
LOG_ERROR("fatal error, this block should be in block map", K(ret), K(macro_id));
} else if (OB_UNLIKELY(OB_SUCCESS == ret && block_info.ref_cnt_ < 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("fatal error, invalid refcnt", K(ret), K(macro_id), K(block_info));
} else if (OB_UNLIKELY(OB_HASH_NOT_EXIST == ret || 0 == block_info.ref_cnt_)) {
// set `is_free_` to true, skip this MacroBlock in upper layer.
ret = OB_SUCCESS;
macro_block_info.is_free_ = true;
} else {
macro_block_info.is_free_ = false;
macro_block_info.ref_cnt_ = block_info.ref_cnt_;
macro_block_info.access_time_ = block_info.last_write_time_;
block_info.access_time_ = ObTimeUtility::fast_current_time();
block_info.ref_cnt_++;
if (OB_FAIL(block_map_.insert_or_update(macro_id, block_info))) {
LOG_ERROR("update block info fail", K(ret), K(macro_id), K(block_info));
} else {
has_inc_ref = true;
LOG_DEBUG("debug ref_cnt: inc_ref in memory", K(ret), K(macro_id), K(block_info), K(lbt()));
}
}
}
if (OB_SUCC(ret) && !macro_block_info.is_free_) {
if (OB_FAIL(macro_block_handle.set_macro_block_id(macro_id))) {
LOG_ERROR("fatal error, fail to set macro block id", K(ret), K(macro_id), K(macro_block_info));
}
}
if (has_inc_ref) {
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(dec_ref(macro_id))) {
LOG_ERROR("fail to decrease reference count", K(ret), K(macro_id));
}
}
return ret;
}
@ -696,9 +728,9 @@ int ObBlockManager::inc_ref(const MacroBlockId &macro_id)
} else {
LOG_ERROR("get block_info fail", K(ret), K(macro_id));
}
} else if (OB_UNLIKELY(0 == block_info.ref_cnt_ && is_mark_sweep_enabled())) {
} else if (OB_UNLIKELY(block_info.ref_cnt_ < 0 || (0 == block_info.ref_cnt_ && is_mark_sweep_enabled()))) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("ref cnt shouldn't be 0", K(ret), K(macro_id), K(block_info));
LOG_ERROR("un-expected MacroBlock refcnt", K(ret), K(macro_id), K(block_info));
}
if (OB_SUCC(ret)) {
@ -1595,15 +1627,15 @@ void ObBlockManager::InspectBadBlockTask::runTimerTask()
inspect_bad_block();
}
int ObBlockManager::InspectBadBlockTask::check_block(const MacroBlockId &macro_id)
int ObBlockManager::InspectBadBlockTask::check_block(ObMacroBlockHandle &macro_block_handle)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!macro_id.is_valid())) {
if (OB_UNLIKELY(!macro_block_handle.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid arguments", K(ret), K(macro_id));
LOG_WARN("invalid arguments", K(ret), K(macro_block_handle));
} else {
const MacroBlockId &macro_id = macro_block_handle.get_macro_id();
ObMacroBlockReadInfo read_info;
ObMacroBlockHandle macro_handle;
common::ObArenaAllocator allocator(ObModIds::OB_SSTABLE_BLOCK_FILE);
read_info.io_timeout_ms_ =
std::max(GCONF._data_storage_io_timeout / 1000, DEFAULT_IO_WAIT_TIME_MS);
@ -1616,14 +1648,14 @@ int ObBlockManager::InspectBadBlockTask::check_block(const MacroBlockId &macro_i
if (OB_ISNULL(read_info.buf_ = reinterpret_cast<char*>(allocator.alloc(read_info.size_)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
STORAGE_LOG(WARN, "failed to alloc macro read info buffer", K(ret), K(read_info.size_));
} else if (OB_FAIL(ObBlockManager::async_read_block(read_info, macro_handle))) {
} else if (OB_FAIL(ObBlockManager::async_read_block(read_info, macro_block_handle))) {
LOG_WARN("async read block failed", K(ret), K(macro_id), K(read_info));
} else if (OB_FAIL(macro_handle.wait())) {
} else if (OB_FAIL(macro_block_handle.wait())) {
LOG_WARN("io wait failed", K(ret), K(macro_id), K(read_info));
} else if (macro_handle.get_data_size() != read_info.size_) {
} else if (macro_block_handle.get_data_size() != read_info.size_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("buf size is too small", K(ret), K(macro_id),
K(macro_handle.get_data_size()), K(read_info.size_));
K(macro_block_handle.get_data_size()), K(read_info.size_));
} else if (OB_FAIL(ObSSTableMacroBlockChecker::check(read_info.buf_,
read_info.size_, ObMacroBlockCheckLevel::CHECK_LEVEL_PHYSICAL))) {
LOG_ERROR("fail to check sstable macro block", K(ret), K(macro_id),
@ -1784,8 +1816,11 @@ void ObBlockManager::InspectBadBlockTask::inspect_bad_block()
last_macro_idx_ = (last_macro_idx_ + 1) % total_used_macro_block_count;
const MacroBlockId &macro_id = macro_ids.at(last_macro_idx_);
ObMacroBlockInfo block_info;
if (OB_FAIL(blk_mgr_.get_macro_block_info(macro_id, block_info))) {
ObMacroBlockHandle macro_block_handle;
if (OB_FAIL(blk_mgr_.get_macro_block_info(macro_id, block_info, macro_block_handle))) {
LOG_WARN("fail to get macro block info", K(ret), K(macro_id), K(last_macro_idx_));
} else if (OB_UNLIKELY(block_info.is_free_)) {
// do nothing, this MacroBlock has been released. skip this MacroBlock and continue.
} else if (!block_info.is_free_ && block_info.ref_cnt_ > 0
#ifdef ERRSIM
&& (begin_time - block_info.access_time_) > static_cast<int64_t>(10_s)) {
@ -1795,7 +1830,7 @@ void ObBlockManager::InspectBadBlockTask::inspect_bad_block()
#endif
++check_count;
LOG_INFO("check macro block", K(block_info), "time_interval", begin_time - block_info.access_time_);
if (OB_FAIL(check_block(macro_id))) {
if (OB_FAIL(check_block(macro_block_handle))) {
LOG_WARN("found a bad block", K(ret), K(macro_id));
}
}

View File

@ -339,7 +339,9 @@ private:
private:
void update_partial_status(const ObMacroBlockMarkerStatus &tmp_status);
int get_macro_block_info(const MacroBlockId &macro_id, ObMacroBlockInfo &macro_block_info) const;
int get_macro_block_info(const MacroBlockId &macro_id,
ObMacroBlockInfo &macro_block_info,
ObMacroBlockHandle &macro_block_handle);
bool is_bad_block(const MacroBlockId &macro_block_id);
int mark_macro_blocks(
MacroBlkIdMap &mark_info,
@ -452,7 +454,7 @@ private:
void reset();
private:
int check_block(const MacroBlockId &macro_id);
int check_block(ObMacroBlockHandle &macro_block_handle);
void inspect_bad_block();
private:

View File

@ -190,7 +190,9 @@ TEST_F(TestRefCnt, test_inc_and_dec_ref)
ASSERT_EQ(OB_SUCCESS, ret);
}
ObMacroBlockInfo block_info;
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info);
ObMacroBlockHandle block_handle;
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info, block_handle);
block_handle.reset();
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_EQ(1, block_info.ref_cnt_);
}
@ -229,7 +231,9 @@ TEST_F(TestRefCnt, test_overmuch_dec_ref)
}
ASSERT_EQ(OB_ERR_SYS, ret);
ObMacroBlockInfo block_info;
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info);
ObMacroBlockHandle block_handle;
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info, block_handle);
block_handle.reset();
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_EQ(0, block_info.ref_cnt_);
}
@ -250,7 +254,9 @@ TEST_F(TestRefCnt, test_1_0_1)
ASSERT_EQ(OB_SUCCESS, ret);
}
ObMacroBlockInfo block_info;
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info);
ObMacroBlockHandle block_handle;
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info, block_handle);
block_handle.reset();
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_EQ(1, block_info.ref_cnt_);
@ -265,7 +271,9 @@ TEST_F(TestRefCnt, test_1_0_1)
ASSERT_EQ(OB_SUCCESS, ret);
}
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info);
ObMacroBlockHandle tmp_block_handle;
ret = OB_SERVER_BLOCK_MGR.get_macro_block_info(macro_id, block_info, tmp_block_handle);
tmp_block_handle.reset();
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_EQ(1, block_info.ref_cnt_);