avoid restart observer when read incorrect data from disk.

This commit is contained in:
HaHaJeff
2023-05-25 12:11:10 +00:00
committed by ob-robot
parent 5a1f13815f
commit 88513f9ed3
3 changed files with 24 additions and 9 deletions

View File

@ -749,7 +749,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.config_meta_.curr_.config_version_); palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.config_meta_.curr_.config_version_);
// waiting for leader revoke // waiting for leader revoke
while (leader.palf_handle_impl_->state_mgr_.role_ == common::ObRole::LEADER) { while (leader.palf_handle_impl_->state_mgr_.role_ == LEADER) {
sleep(1); sleep(1);
} }
@ -758,7 +758,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
unblock_all_net(leader_idx); unblock_all_net(leader_idx);
// waiting for leader takeover // waiting for leader takeover
while (leader.palf_handle_impl_->state_mgr_.role_ != common::ObRole::LEADER) { while (!leader.palf_handle_impl_->state_mgr_.is_leader_active()) {
sleep(1); sleep(1);
} }
// waiting for upgrading // waiting for upgrading

View File

@ -203,8 +203,15 @@ int ObServerLogBlockMgr::resize_(const int64_t new_size_byte)
K(resize_block_cnt), "free_block_cnt:", free_size_byte / BLOCK_SIZE); K(resize_block_cnt), "free_block_cnt:", free_size_byte / BLOCK_SIZE);
} else if (OB_FAIL( } else if (OB_FAIL(
do_resize_(old_log_pool_meta, resize_block_cnt, new_log_pool_meta))) { do_resize_(old_log_pool_meta, resize_block_cnt, new_log_pool_meta))) {
if (OB_ALLOCATE_DISK_SPACE_FAILED == ret) {
LOG_DBA_ERROR(OB_ALLOCATE_DISK_SPACE_FAILED,
"possible reason",
"may be diskspace is not enough, please check the configuration about log disk",
"expected log disk size(MB)", (new_size_byte+1024*1024-1)/1024/1024);
} else {
CLOG_LOG(ERROR, "do_resize_ failed", K(ret), KPC(this), K(old_log_pool_meta), CLOG_LOG(ERROR, "do_resize_ failed", K(ret), KPC(this), K(old_log_pool_meta),
K(new_log_pool_meta)); K(new_log_pool_meta));
}
} else { } else {
int64_t cost_ts = ObTimeUtility::current_time() - start_ts; int64_t cost_ts = ObTimeUtility::current_time() - start_ts;
CLOG_LOG(INFO, "resize success", K(ret), KPC(this), K(new_size_byte), K(aligned_new_size_byte), CLOG_LOG(INFO, "resize success", K(ret), KPC(this), K(new_size_byte), K(aligned_new_size_byte),

View File

@ -358,6 +358,13 @@ private:
padding_entry_size_ = 0; padding_entry_size_ = 0;
padding_entry_scn_.reset(); padding_entry_scn_.reset();
} }
bool need_clean_cache_(const int ret) const
{
// NB: several storage devices cannot guarantee linear consistency reading in scenarios where 4K is overwritten,
// therefore, we should clean the cache of IteratorStorage, and re-read data from disk in next time.
return OB_INVALID_DATA == ret || OB_CHECKSUM_ERROR == ret;
}
private: private:
static constexpr int MAX_READ_TIMES_IN_EACH_NEXT = 2; static constexpr int MAX_READ_TIMES_IN_EACH_NEXT = 2;
// In each `next_entry` round, need read data from `LogStorage` directly, // In each `next_entry` round, need read data from `LogStorage` directly,
@ -622,13 +629,14 @@ int LogIteratorImpl<ENTRY>::next(const share::SCN &replayable_point_scn,
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
PALF_LOG(WARN, "invalid argument", K(replayable_point_scn), KPC(this)); PALF_LOG(WARN, "invalid argument", K(replayable_point_scn), KPC(this));
} else if (OB_FAIL(get_next_entry_(replayable_point_scn, info))) { } else if (OB_FAIL(get_next_entry_(replayable_point_scn, info))) {
// NB: if the data which has been corrupted, clean cache. // NB: if the data which has been corrupted or accum_checksum_ is not match, clean cache.
// NB: if the accum_checksum_ is not match, return OB_CHECKSUM_ERROR. if (need_clean_cache_(ret)) {
if (OB_INVALID_DATA == ret) { PALF_LOG(WARN, "read invalid data, need clean cache, maybe storage device cann't guarantee linear consistency reading",
PALF_LOG(WARN, "read invalid data, need clean cache", K(ret), KPC(this)); K(ret), KPC(this));
// NB: several storage devices cannot guarantee linear consistency reading in scenarios where 4K is overwritten,
// therefore, we should clean the cache of IteratorStorage, and re-read data from disk in next time.
log_storage_->reuse(log_storage_->get_lsn(curr_read_pos_)); log_storage_->reuse(log_storage_->get_lsn(curr_read_pos_));
curr_read_buf_end_pos_ = curr_read_buf_start_pos_ = curr_read_pos_ = 0; curr_read_buf_end_pos_ = curr_read_buf_start_pos_ = curr_read_pos_ = 0;
PALF_LOG(WARN, "read invalid data, has clean cache", K(ret), KPC(this));
} }
if (OB_ITER_END != ret) { if (OB_ITER_END != ret) {
PALF_LOG(WARN, "get_next_entry_ failed", K(ret), KPC(this)); PALF_LOG(WARN, "get_next_entry_ failed", K(ret), KPC(this));