diff --git a/mittest/logservice/test_ob_simple_log_disk_mgr.cpp b/mittest/logservice/test_ob_simple_log_disk_mgr.cpp index ffdbe4d7c0..924eb7b50f 100644 --- a/mittest/logservice/test_ob_simple_log_disk_mgr.cpp +++ b/mittest/logservice/test_ob_simple_log_disk_mgr.cpp @@ -77,8 +77,8 @@ TEST_F(TestObSimpleLogDiskMgr, out_of_disk_space) share::SCN create_scn = share::SCN::base_scn(); EXPECT_EQ(OB_SUCCESS, get_palf_env(server_idx, palf_env)); EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, create_scn, leader_idx, leader)); - update_disk_options(leader_idx, MIN_DISK_SIZE_PER_PALF_INSTANCE/PALF_PHY_BLOCK_SIZE); - EXPECT_EQ(OB_SUCCESS, submit_log(leader, 6*31+1, id, MAX_LOG_BODY_SIZE)); + update_disk_options(leader_idx, MIN_DISK_SIZE_PER_PALF_INSTANCE/PALF_PHY_BLOCK_SIZE + 2); + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 8*31+1, id, MAX_LOG_BODY_SIZE)); LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_; while (LSN(6*PALF_BLOCK_SIZE) > log_storage->log_tail_) { usleep(500); @@ -92,6 +92,9 @@ TEST_F(TestObSimpleLogDiskMgr, out_of_disk_space) PALF_LOG(INFO, "out of disk max_lsn", K(max_lsn)); usleep(palf::BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS + 5*10000); EXPECT_EQ(OB_LOG_OUTOF_DISK_SPACE, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); + // shrinking 后继续停写 + update_disk_options(leader_idx, MIN_DISK_SIZE_PER_PALF_INSTANCE/PALF_PHY_BLOCK_SIZE); + EXPECT_EQ(OB_LOG_OUTOF_DISK_SPACE, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE)); usleep(ObLooper::INTERVAL_US*2); } diff --git a/src/logservice/palf/log_loop_thread.cpp b/src/logservice/palf/log_loop_thread.cpp index 228bb8915e..4f5054ac8f 100644 --- a/src/logservice/palf/log_loop_thread.cpp +++ b/src/logservice/palf/log_loop_thread.cpp @@ -142,6 +142,8 @@ void LogLoopThread::log_loop_() PALF_LOG_RET(WARN, tmp_ret, "for_each try_freeze_log_func failed", K(tmp_ret)); } + palf_env_impl_->period_calc_disk_usage(); + const int64_t round_cost_time = ObTimeUtility::current_time() - start_ts; int32_t sleep_ts = run_interval_ - static_cast(round_cost_time); if (sleep_ts < 0) { diff --git a/src/logservice/palf/palf_env_impl.cpp b/src/logservice/palf/palf_env_impl.cpp index 5a3ed01f96..714bce8437 100644 --- a/src/logservice/palf/palf_env_impl.cpp +++ b/src/logservice/palf/palf_env_impl.cpp @@ -186,7 +186,8 @@ PalfEnvImpl::PalfEnvImpl() : palf_meta_lock_(common::ObLatchIds::PALF_ENV_LOCK), log_updater_(), monitor_(NULL), disk_options_wrapper_(), - disk_not_enough_print_interval_(OB_INVALID_TIMESTAMP), + disk_not_enough_print_interval_in_gc_thread_(OB_INVALID_TIMESTAMP), + disk_not_enough_print_interval_in_loop_thread_(OB_INVALID_TIMESTAMP), self_(), palf_handle_impl_map_(64), // 指定min_size=64 last_palf_epoch_(0), @@ -350,7 +351,8 @@ void PalfEnvImpl::destroy() election_timer_.destroy(); log_alloc_mgr_ = NULL; monitor_ = NULL; - disk_not_enough_print_interval_ = OB_INVALID_TIMESTAMP; + disk_not_enough_print_interval_in_gc_thread_ = OB_INVALID_TIMESTAMP; + disk_not_enough_print_interval_in_loop_thread_ = OB_INVALID_TIMESTAMP; self_.reset(); log_dir_[0] = '\0'; tmp_log_dir_[0] = '\0'; @@ -715,11 +717,6 @@ int PalfEnvImpl::try_recycle_blocks() const bool need_recycle = usable_disk_size_to_recycle_blocks >= total_used_size_byte ? false : true; const bool is_shrinking = disk_options_wrapper_.is_shrinking(); - // Assume that, recycle speed is higher than write speed, therefor, the abnormal case - // is that, after each 'recycle_blocks_', the 'total_used_size_byte' is one PALF_BLOCK_SIZE - // more than 'usable_disk_size'. - const bool curr_diskspace_enough = - usable_disk_limit_size_to_stop_writing >= total_used_size_byte ? true : false; constexpr int64_t MB = 1024 * 1024LL; const int64_t print_error_log_disk_size = disk_opts_for_stopping_writing.log_disk_usage_limit_size_ @@ -732,7 +729,8 @@ int PalfEnvImpl::try_recycle_blocks() // 2. the snapshot of status is SHRINKING_STATUS. bool has_recycled = false; int64_t oldest_palf_id = INVALID_PALF_ID; - if (OB_SUCC(ret) && PalfDiskOptionsWrapper::Status::SHRINKING_STATUS == status) { + const bool in_shrinking = (PalfDiskOptionsWrapper::Status::SHRINKING_STATUS == status); + if (OB_SUCC(ret) && in_shrinking) { if (total_used_size_byte <= usable_disk_size_to_recycle_blocks) { disk_options_wrapper_.change_to_normal(sequence); PALF_LOG(INFO, "change_to_normal success", K(disk_options_wrapper_), @@ -749,31 +747,32 @@ int PalfEnvImpl::try_recycle_blocks() } } - // step3. reset diskspace_enough_. - if (diskspace_enough_ != curr_diskspace_enough) { - ATOMIC_STORE(&diskspace_enough_, curr_diskspace_enough); - } - + // step3. try print error log // NB: print error log when: - // 1. write-stop. - // 2. the used log disk space exceeded the log disk recycle threshold(stop-write PalfDiskOptions) and there is no recycable block. - if ((false == diskspace_enough_) || (true == need_print_error_log && false == has_recycled)) { + // 1. write-stop.(i.e. set 'diskspace_enough_' to true when the disk usage execeed than the 'log_disk_throttling_percentage_' in disk_opts_for_stopping_writing); + // 2. the used log disk space exceeded the log disk recycle threshold and there is no recycable block(in shrinking log disk status, disk_opts_for_stopping_writing is not + // same with disk_opts_for_recycling_blocks). + if (!check_disk_space_enough() || (true == need_print_error_log && false == has_recycled)) { constexpr int64_t INTERVAL = 1*1000*1000; - if (palf_reach_time_interval(INTERVAL, disk_not_enough_print_interval_)) { + if (palf_reach_time_interval(INTERVAL, disk_not_enough_print_interval_in_gc_thread_)) { int tmp_ret = OB_LOG_OUTOF_DISK_SPACE; + const int64_t log_disk_usage_limit_size = disk_opts_for_stopping_writing.log_disk_usage_limit_size_; + const int64_t log_disk_warn_percent = disk_opts_for_stopping_writing.log_disk_utilization_threshold_; + const int64_t log_disk_limit_percent = disk_opts_for_stopping_writing.log_disk_utilization_limit_threshold_; LOG_DBA_ERROR(OB_LOG_OUTOF_DISK_SPACE, "msg", "log disk space is almost full", "ret", tmp_ret, - "total_size(MB)", disk_opts_for_recycling_blocks.log_disk_usage_limit_size_/MB, + "total_size(MB)", log_disk_usage_limit_size/MB, "used_size(MB)", total_used_size_byte/MB, - "used_percent(%)", (total_used_size_byte* 100) / (disk_opts_for_stopping_writing.log_disk_usage_limit_size_ + 1), - "warn_size(MB)", (total_size_to_recycle_blocks*disk_opts_for_recycling_blocks.log_disk_utilization_threshold_)/100/MB, - "warn_percent(%)", disk_opts_for_recycling_blocks.log_disk_utilization_threshold_, - "limit_size(MB)", (total_size_to_recycle_blocks*disk_opts_for_recycling_blocks.log_disk_utilization_limit_threshold_)/100/MB, - "limit_percent(%)", disk_opts_for_recycling_blocks.log_disk_utilization_limit_threshold_, + "used_percent(%)", (total_used_size_byte*100) / (log_disk_usage_limit_size+1), + "warn_size(MB)", (log_disk_usage_limit_size*log_disk_warn_percent)/100/MB, + "warn_percent(%)", log_disk_warn_percent, + "limit_size(MB)", (log_disk_usage_limit_size*log_disk_limit_percent)/100/MB, + "limit_percent(%)", log_disk_limit_percent, "total_unrecyclable_size_byte(MB)", total_unrecyclable_size_byte/MB, "maximum_used_size(MB)", maximum_used_size/MB, "maximum_log_stream", palf_id, "oldest_log_stream", oldest_palf_id, - "oldest_scn", oldest_scn); + "oldest_scn", oldest_scn, + "in_shrinking", in_shrinking); } } else { if (REACH_TIME_INTERVAL(2 * 1000 * 1000L)) { @@ -1302,6 +1301,48 @@ int PalfEnvImpl::get_throttling_options(PalfThrottleOptions &options) return ret; } +void PalfEnvImpl::period_calc_disk_usage() +{ + int ret = OB_SUCCESS; + constexpr int64_t MB = 1024 * 1024; + PalfDiskOptions disk_options = disk_options_wrapper_.get_disk_opts_for_stopping_writing(); + int64_t used_size_byte = 0; + int64_t total_usable_size_byte = 0; + if (OB_FAIL(get_disk_usage_(used_size_byte))) { + PALF_LOG(WARN, "get_disk_usage_ failed", K(ret)); + } else { + const int64_t log_disk_usage_limit_size = disk_options.log_disk_usage_limit_size_; + const int64_t log_disk_limit_percent = disk_options.log_disk_utilization_limit_threshold_; + const int64_t log_disk_warn_percent = disk_options.log_disk_utilization_threshold_; + const int64_t usable_disk_limit_size_to_stop_writing = + log_disk_usage_limit_size * log_disk_limit_percent / 100LL; + const bool curr_diskspace_enough = + usable_disk_limit_size_to_stop_writing >= used_size_byte ? true : false; + const int64_t warn_siz = + log_disk_usage_limit_size * log_disk_warn_percent / 100LL; + if (diskspace_enough_ != curr_diskspace_enough) { + ATOMIC_STORE(&diskspace_enough_, curr_diskspace_enough); + } + // NB: print error log when: + // 1. write-stop. + if (!curr_diskspace_enough) { + constexpr int64_t INTERVAL = 1*1000*1000; + if (palf_reach_time_interval(INTERVAL, disk_not_enough_print_interval_in_loop_thread_)) { + int tmp_ret = OB_LOG_OUTOF_DISK_SPACE; + LOG_DBA_ERROR(OB_LOG_OUTOF_DISK_SPACE, "msg", "log disk space is almost full", "ret", tmp_ret, + "total_size(MB)", log_disk_usage_limit_size/MB, + "used_size(MB)", used_size_byte/MB, + "used_percent(%)", (used_size_byte*100) / (log_disk_usage_limit_size + 1), + "warn_size(MB)", warn_siz/MB, + "warn_percent(%)", log_disk_warn_percent, + "limit_size(MB)", usable_disk_limit_size_to_stop_writing/MB, + "limit_percent(%)", log_disk_limit_percent); + } + } + } + +} + int PalfEnvImpl::init_log_io_worker_config_(const int log_writer_parallelism, const int64_t tenant_id, LogIOWorkerConfig &config) diff --git a/src/logservice/palf/palf_env_impl.h b/src/logservice/palf/palf_env_impl.h index d007316b75..49df504d3a 100644 --- a/src/logservice/palf/palf_env_impl.h +++ b/src/logservice/palf/palf_env_impl.h @@ -202,6 +202,7 @@ public: // should be removed in version 4.2.0.0 virtual int update_replayable_point(const SCN &replayable_scn) = 0; virtual int get_throttling_options(PalfThrottleOptions &option) = 0; + virtual void period_calc_disk_usage() = 0; VIRTUAL_TO_STRING_KV("IPalfEnvImpl", "Dummy"); }; @@ -272,6 +273,7 @@ public: int64_t get_tenant_id() override final; int update_replayable_point(const SCN &replayable_scn) override final; int get_throttling_options(PalfThrottleOptions &option); + void period_calc_disk_usage() override final; INHERIT_TO_STRING_KV("IPalfEnvImpl", IPalfEnvImpl, K_(self), K_(log_dir), K_(disk_options_wrapper), KPC(log_alloc_mgr_)); // =================== disk space management ================== @@ -371,7 +373,8 @@ private: PalfMonitorCb *monitor_; PalfDiskOptionsWrapper disk_options_wrapper_; - int64_t disk_not_enough_print_interval_; + int64_t disk_not_enough_print_interval_in_gc_thread_; + int64_t disk_not_enough_print_interval_in_loop_thread_; char log_dir_[common::MAX_PATH_SIZE]; char tmp_log_dir_[common::MAX_PATH_SIZE];