avoid restart observer when read incorrect data from disk.

2023-05-25 12:11:10 +00:00
parent 5a1f13815f
commit 88513f9ed3
3 changed files with 24 additions and 9 deletions
--- a/mittest/logservice/test_ob_simple_log_arb.cpp
+++ b/mittest/logservice/test_ob_simple_log_arb.cpp
@ -749,7 +749,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
            palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.config_meta_.curr_.config_version_);

  // waiting for leader revoke
-  while (leader.palf_handle_impl_->state_mgr_.role_ == common::ObRole::LEADER) {
+  while (leader.palf_handle_impl_->state_mgr_.role_ == LEADER) {
    sleep(1);
  }

@ -758,7 +758,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
  unblock_all_net(leader_idx);

  // waiting for leader takeover
-  while (leader.palf_handle_impl_->state_mgr_.role_ != common::ObRole::LEADER) {
+  while (!leader.palf_handle_impl_->state_mgr_.is_leader_active()) {
    sleep(1);
  }
  // waiting for upgrading
--- a/src/logservice/ob_server_log_block_mgr.cpp
+++ b/src/logservice/ob_server_log_block_mgr.cpp
@ -203,8 +203,15 @@ int ObServerLogBlockMgr::resize_(const int64_t new_size_byte)
             K(resize_block_cnt), "free_block_cnt:", free_size_byte / BLOCK_SIZE);
  } else if (OB_FAIL(
                 do_resize_(old_log_pool_meta, resize_block_cnt, new_log_pool_meta))) {
+    if (OB_ALLOCATE_DISK_SPACE_FAILED == ret) {
+      LOG_DBA_ERROR(OB_ALLOCATE_DISK_SPACE_FAILED,
+                    "possible reason",
+                    "may be diskspace is not enough, please check the configuration about log disk",
+                    "expected log disk size(MB)", (new_size_byte+1024*1024-1)/1024/1024);
+    } else {
      CLOG_LOG(ERROR, "do_resize_ failed", K(ret), KPC(this), K(old_log_pool_meta),
               K(new_log_pool_meta));
+    }
  } else {
    int64_t cost_ts = ObTimeUtility::current_time() - start_ts;
    CLOG_LOG(INFO, "resize success", K(ret), KPC(this), K(new_size_byte), K(aligned_new_size_byte),
--- a/src/logservice/palf/log_iterator_impl.h
+++ b/src/logservice/palf/log_iterator_impl.h
@ -358,6 +358,13 @@ private:
    padding_entry_size_ = 0;
    padding_entry_scn_.reset();
  }
+
+  bool need_clean_cache_(const int ret) const
+  {
+    // NB: several storage devices cannot guarantee linear consistency reading in scenarios where 4K is overwritten,
+    // therefore, we should clean the cache of IteratorStorage, and re-read data from disk in next time.
+    return OB_INVALID_DATA == ret || OB_CHECKSUM_ERROR == ret;
+  }
 private:
 static constexpr int MAX_READ_TIMES_IN_EACH_NEXT = 2;
  // In each `next_entry` round, need read data from `LogStorage` directly,
@ -622,13 +629,14 @@ int LogIteratorImpl<ENTRY>::next(const share::SCN &replayable_point_scn,
    ret = OB_INVALID_ARGUMENT;
    PALF_LOG(WARN, "invalid argument", K(replayable_point_scn), KPC(this));
  } else if (OB_FAIL(get_next_entry_(replayable_point_scn, info))) {
-    // NB: if the data which has been corrupted, clean cache.
-    // NB: if the accum_checksum_ is not match, return OB_CHECKSUM_ERROR.
-    if (OB_INVALID_DATA == ret) {
-      PALF_LOG(WARN, "read invalid data, need clean cache", K(ret), KPC(this));
+    // NB: if the data which has been corrupted or accum_checksum_ is not match, clean cache.
+    if (need_clean_cache_(ret)) {
+      PALF_LOG(WARN, "read invalid data, need clean cache, maybe storage device cann't guarantee linear consistency reading",
+               K(ret), KPC(this));
+    // NB: several storage devices cannot guarantee linear consistency reading in scenarios where 4K is overwritten,
+    // therefore, we should clean the cache of IteratorStorage, and re-read data from disk in next time.
      log_storage_->reuse(log_storage_->get_lsn(curr_read_pos_));
      curr_read_buf_end_pos_ = curr_read_buf_start_pos_ = curr_read_pos_ = 0;
-      PALF_LOG(WARN, "read invalid data, has clean cache", K(ret), KPC(this));
    }
    if (OB_ITER_END != ret) {
      PALF_LOG(WARN, "get_next_entry_ failed", K(ret), KPC(this));