avoid restart observer when read incorrect data from disk.
This commit is contained in:
@ -749,7 +749,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
|
||||
palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.config_meta_.curr_.config_version_);
|
||||
|
||||
// waiting for leader revoke
|
||||
while (leader.palf_handle_impl_->state_mgr_.role_ == common::ObRole::LEADER) {
|
||||
while (leader.palf_handle_impl_->state_mgr_.role_ == LEADER) {
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
@ -758,7 +758,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
|
||||
unblock_all_net(leader_idx);
|
||||
|
||||
// waiting for leader takeover
|
||||
while (leader.palf_handle_impl_->state_mgr_.role_ != common::ObRole::LEADER) {
|
||||
while (!leader.palf_handle_impl_->state_mgr_.is_leader_active()) {
|
||||
sleep(1);
|
||||
}
|
||||
// waiting for upgrading
|
||||
|
||||
@ -203,8 +203,15 @@ int ObServerLogBlockMgr::resize_(const int64_t new_size_byte)
|
||||
K(resize_block_cnt), "free_block_cnt:", free_size_byte / BLOCK_SIZE);
|
||||
} else if (OB_FAIL(
|
||||
do_resize_(old_log_pool_meta, resize_block_cnt, new_log_pool_meta))) {
|
||||
if (OB_ALLOCATE_DISK_SPACE_FAILED == ret) {
|
||||
LOG_DBA_ERROR(OB_ALLOCATE_DISK_SPACE_FAILED,
|
||||
"possible reason",
|
||||
"may be diskspace is not enough, please check the configuration about log disk",
|
||||
"expected log disk size(MB)", (new_size_byte+1024*1024-1)/1024/1024);
|
||||
} else {
|
||||
CLOG_LOG(ERROR, "do_resize_ failed", K(ret), KPC(this), K(old_log_pool_meta),
|
||||
K(new_log_pool_meta));
|
||||
}
|
||||
} else {
|
||||
int64_t cost_ts = ObTimeUtility::current_time() - start_ts;
|
||||
CLOG_LOG(INFO, "resize success", K(ret), KPC(this), K(new_size_byte), K(aligned_new_size_byte),
|
||||
|
||||
@ -358,6 +358,13 @@ private:
|
||||
padding_entry_size_ = 0;
|
||||
padding_entry_scn_.reset();
|
||||
}
|
||||
|
||||
bool need_clean_cache_(const int ret) const
|
||||
{
|
||||
// NB: several storage devices cannot guarantee linear consistency reading in scenarios where 4K is overwritten,
|
||||
// therefore, we should clean the cache of IteratorStorage, and re-read data from disk in next time.
|
||||
return OB_INVALID_DATA == ret || OB_CHECKSUM_ERROR == ret;
|
||||
}
|
||||
private:
|
||||
static constexpr int MAX_READ_TIMES_IN_EACH_NEXT = 2;
|
||||
// In each `next_entry` round, need read data from `LogStorage` directly,
|
||||
@ -622,13 +629,14 @@ int LogIteratorImpl<ENTRY>::next(const share::SCN &replayable_point_scn,
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
PALF_LOG(WARN, "invalid argument", K(replayable_point_scn), KPC(this));
|
||||
} else if (OB_FAIL(get_next_entry_(replayable_point_scn, info))) {
|
||||
// NB: if the data which has been corrupted, clean cache.
|
||||
// NB: if the accum_checksum_ is not match, return OB_CHECKSUM_ERROR.
|
||||
if (OB_INVALID_DATA == ret) {
|
||||
PALF_LOG(WARN, "read invalid data, need clean cache", K(ret), KPC(this));
|
||||
// NB: if the data which has been corrupted or accum_checksum_ is not match, clean cache.
|
||||
if (need_clean_cache_(ret)) {
|
||||
PALF_LOG(WARN, "read invalid data, need clean cache, maybe storage device cann't guarantee linear consistency reading",
|
||||
K(ret), KPC(this));
|
||||
// NB: several storage devices cannot guarantee linear consistency reading in scenarios where 4K is overwritten,
|
||||
// therefore, we should clean the cache of IteratorStorage, and re-read data from disk in next time.
|
||||
log_storage_->reuse(log_storage_->get_lsn(curr_read_pos_));
|
||||
curr_read_buf_end_pos_ = curr_read_buf_start_pos_ = curr_read_pos_ = 0;
|
||||
PALF_LOG(WARN, "read invalid data, has clean cache", K(ret), KPC(this));
|
||||
}
|
||||
if (OB_ITER_END != ret) {
|
||||
PALF_LOG(WARN, "get_next_entry_ failed", K(ret), KPC(this));
|
||||
|
||||
Reference in New Issue
Block a user