fixed log disk usage execcded 100% because of recycleing blocks hang.
This commit is contained in:

committed by
ant-ob-hengtang

parent
a17991664f
commit
7ed5eaeb8f
@ -77,8 +77,8 @@ TEST_F(TestObSimpleLogDiskMgr, out_of_disk_space)
|
||||
share::SCN create_scn = share::SCN::base_scn();
|
||||
EXPECT_EQ(OB_SUCCESS, get_palf_env(server_idx, palf_env));
|
||||
EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, create_scn, leader_idx, leader));
|
||||
update_disk_options(leader_idx, MIN_DISK_SIZE_PER_PALF_INSTANCE/PALF_PHY_BLOCK_SIZE);
|
||||
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 6*31+1, id, MAX_LOG_BODY_SIZE));
|
||||
update_disk_options(leader_idx, MIN_DISK_SIZE_PER_PALF_INSTANCE/PALF_PHY_BLOCK_SIZE + 2);
|
||||
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 8*31+1, id, MAX_LOG_BODY_SIZE));
|
||||
LogStorage *log_storage = &leader.palf_handle_impl_->log_engine_.log_storage_;
|
||||
while (LSN(6*PALF_BLOCK_SIZE) > log_storage->log_tail_) {
|
||||
usleep(500);
|
||||
@ -92,6 +92,9 @@ TEST_F(TestObSimpleLogDiskMgr, out_of_disk_space)
|
||||
PALF_LOG(INFO, "out of disk max_lsn", K(max_lsn));
|
||||
usleep(palf::BlockGCTimerTask::BLOCK_GC_TIMER_INTERVAL_MS + 5*10000);
|
||||
EXPECT_EQ(OB_LOG_OUTOF_DISK_SPACE, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE));
|
||||
// shrinking 后继续停写
|
||||
update_disk_options(leader_idx, MIN_DISK_SIZE_PER_PALF_INSTANCE/PALF_PHY_BLOCK_SIZE);
|
||||
EXPECT_EQ(OB_LOG_OUTOF_DISK_SPACE, submit_log(leader, 1, id, MAX_LOG_BODY_SIZE));
|
||||
usleep(ObLooper::INTERVAL_US*2);
|
||||
}
|
||||
|
||||
|
@ -142,6 +142,8 @@ void LogLoopThread::log_loop_()
|
||||
PALF_LOG_RET(WARN, tmp_ret, "for_each try_freeze_log_func failed", K(tmp_ret));
|
||||
}
|
||||
|
||||
palf_env_impl_->period_calc_disk_usage();
|
||||
|
||||
const int64_t round_cost_time = ObTimeUtility::current_time() - start_ts;
|
||||
int32_t sleep_ts = run_interval_ - static_cast<const int32_t>(round_cost_time);
|
||||
if (sleep_ts < 0) {
|
||||
|
@ -186,7 +186,8 @@ PalfEnvImpl::PalfEnvImpl() : palf_meta_lock_(common::ObLatchIds::PALF_ENV_LOCK),
|
||||
log_updater_(),
|
||||
monitor_(NULL),
|
||||
disk_options_wrapper_(),
|
||||
disk_not_enough_print_interval_(OB_INVALID_TIMESTAMP),
|
||||
disk_not_enough_print_interval_in_gc_thread_(OB_INVALID_TIMESTAMP),
|
||||
disk_not_enough_print_interval_in_loop_thread_(OB_INVALID_TIMESTAMP),
|
||||
self_(),
|
||||
palf_handle_impl_map_(64), // 指定min_size=64
|
||||
last_palf_epoch_(0),
|
||||
@ -350,7 +351,8 @@ void PalfEnvImpl::destroy()
|
||||
election_timer_.destroy();
|
||||
log_alloc_mgr_ = NULL;
|
||||
monitor_ = NULL;
|
||||
disk_not_enough_print_interval_ = OB_INVALID_TIMESTAMP;
|
||||
disk_not_enough_print_interval_in_gc_thread_ = OB_INVALID_TIMESTAMP;
|
||||
disk_not_enough_print_interval_in_loop_thread_ = OB_INVALID_TIMESTAMP;
|
||||
self_.reset();
|
||||
log_dir_[0] = '\0';
|
||||
tmp_log_dir_[0] = '\0';
|
||||
@ -715,11 +717,6 @@ int PalfEnvImpl::try_recycle_blocks()
|
||||
const bool need_recycle =
|
||||
usable_disk_size_to_recycle_blocks >= total_used_size_byte ? false : true;
|
||||
const bool is_shrinking = disk_options_wrapper_.is_shrinking();
|
||||
// Assume that, recycle speed is higher than write speed, therefor, the abnormal case
|
||||
// is that, after each 'recycle_blocks_', the 'total_used_size_byte' is one PALF_BLOCK_SIZE
|
||||
// more than 'usable_disk_size'.
|
||||
const bool curr_diskspace_enough =
|
||||
usable_disk_limit_size_to_stop_writing >= total_used_size_byte ? true : false;
|
||||
constexpr int64_t MB = 1024 * 1024LL;
|
||||
const int64_t print_error_log_disk_size =
|
||||
disk_opts_for_stopping_writing.log_disk_usage_limit_size_
|
||||
@ -732,7 +729,8 @@ int PalfEnvImpl::try_recycle_blocks()
|
||||
// 2. the snapshot of status is SHRINKING_STATUS.
|
||||
bool has_recycled = false;
|
||||
int64_t oldest_palf_id = INVALID_PALF_ID;
|
||||
if (OB_SUCC(ret) && PalfDiskOptionsWrapper::Status::SHRINKING_STATUS == status) {
|
||||
const bool in_shrinking = (PalfDiskOptionsWrapper::Status::SHRINKING_STATUS == status);
|
||||
if (OB_SUCC(ret) && in_shrinking) {
|
||||
if (total_used_size_byte <= usable_disk_size_to_recycle_blocks) {
|
||||
disk_options_wrapper_.change_to_normal(sequence);
|
||||
PALF_LOG(INFO, "change_to_normal success", K(disk_options_wrapper_),
|
||||
@ -749,31 +747,32 @@ int PalfEnvImpl::try_recycle_blocks()
|
||||
}
|
||||
}
|
||||
|
||||
// step3. reset diskspace_enough_.
|
||||
if (diskspace_enough_ != curr_diskspace_enough) {
|
||||
ATOMIC_STORE(&diskspace_enough_, curr_diskspace_enough);
|
||||
}
|
||||
|
||||
// step3. try print error log
|
||||
// NB: print error log when:
|
||||
// 1. write-stop.
|
||||
// 2. the used log disk space exceeded the log disk recycle threshold(stop-write PalfDiskOptions) and there is no recycable block.
|
||||
if ((false == diskspace_enough_) || (true == need_print_error_log && false == has_recycled)) {
|
||||
// 1. write-stop.(i.e. set 'diskspace_enough_' to true when the disk usage execeed than the 'log_disk_throttling_percentage_' in disk_opts_for_stopping_writing);
|
||||
// 2. the used log disk space exceeded the log disk recycle threshold and there is no recycable block(in shrinking log disk status, disk_opts_for_stopping_writing is not
|
||||
// same with disk_opts_for_recycling_blocks).
|
||||
if (!check_disk_space_enough() || (true == need_print_error_log && false == has_recycled)) {
|
||||
constexpr int64_t INTERVAL = 1*1000*1000;
|
||||
if (palf_reach_time_interval(INTERVAL, disk_not_enough_print_interval_)) {
|
||||
if (palf_reach_time_interval(INTERVAL, disk_not_enough_print_interval_in_gc_thread_)) {
|
||||
int tmp_ret = OB_LOG_OUTOF_DISK_SPACE;
|
||||
const int64_t log_disk_usage_limit_size = disk_opts_for_stopping_writing.log_disk_usage_limit_size_;
|
||||
const int64_t log_disk_warn_percent = disk_opts_for_stopping_writing.log_disk_utilization_threshold_;
|
||||
const int64_t log_disk_limit_percent = disk_opts_for_stopping_writing.log_disk_utilization_limit_threshold_;
|
||||
LOG_DBA_ERROR(OB_LOG_OUTOF_DISK_SPACE, "msg", "log disk space is almost full", "ret", tmp_ret,
|
||||
"total_size(MB)", disk_opts_for_recycling_blocks.log_disk_usage_limit_size_/MB,
|
||||
"total_size(MB)", log_disk_usage_limit_size/MB,
|
||||
"used_size(MB)", total_used_size_byte/MB,
|
||||
"used_percent(%)", (total_used_size_byte* 100) / (disk_opts_for_stopping_writing.log_disk_usage_limit_size_ + 1),
|
||||
"warn_size(MB)", (total_size_to_recycle_blocks*disk_opts_for_recycling_blocks.log_disk_utilization_threshold_)/100/MB,
|
||||
"warn_percent(%)", disk_opts_for_recycling_blocks.log_disk_utilization_threshold_,
|
||||
"limit_size(MB)", (total_size_to_recycle_blocks*disk_opts_for_recycling_blocks.log_disk_utilization_limit_threshold_)/100/MB,
|
||||
"limit_percent(%)", disk_opts_for_recycling_blocks.log_disk_utilization_limit_threshold_,
|
||||
"used_percent(%)", (total_used_size_byte*100) / (log_disk_usage_limit_size+1),
|
||||
"warn_size(MB)", (log_disk_usage_limit_size*log_disk_warn_percent)/100/MB,
|
||||
"warn_percent(%)", log_disk_warn_percent,
|
||||
"limit_size(MB)", (log_disk_usage_limit_size*log_disk_limit_percent)/100/MB,
|
||||
"limit_percent(%)", log_disk_limit_percent,
|
||||
"total_unrecyclable_size_byte(MB)", total_unrecyclable_size_byte/MB,
|
||||
"maximum_used_size(MB)", maximum_used_size/MB,
|
||||
"maximum_log_stream", palf_id,
|
||||
"oldest_log_stream", oldest_palf_id,
|
||||
"oldest_scn", oldest_scn);
|
||||
"oldest_scn", oldest_scn,
|
||||
"in_shrinking", in_shrinking);
|
||||
}
|
||||
} else {
|
||||
if (REACH_TIME_INTERVAL(2 * 1000 * 1000L)) {
|
||||
@ -1302,6 +1301,48 @@ int PalfEnvImpl::get_throttling_options(PalfThrottleOptions &options)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void PalfEnvImpl::period_calc_disk_usage()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
constexpr int64_t MB = 1024 * 1024;
|
||||
PalfDiskOptions disk_options = disk_options_wrapper_.get_disk_opts_for_stopping_writing();
|
||||
int64_t used_size_byte = 0;
|
||||
int64_t total_usable_size_byte = 0;
|
||||
if (OB_FAIL(get_disk_usage_(used_size_byte))) {
|
||||
PALF_LOG(WARN, "get_disk_usage_ failed", K(ret));
|
||||
} else {
|
||||
const int64_t log_disk_usage_limit_size = disk_options.log_disk_usage_limit_size_;
|
||||
const int64_t log_disk_limit_percent = disk_options.log_disk_utilization_limit_threshold_;
|
||||
const int64_t log_disk_warn_percent = disk_options.log_disk_utilization_threshold_;
|
||||
const int64_t usable_disk_limit_size_to_stop_writing =
|
||||
log_disk_usage_limit_size * log_disk_limit_percent / 100LL;
|
||||
const bool curr_diskspace_enough =
|
||||
usable_disk_limit_size_to_stop_writing >= used_size_byte ? true : false;
|
||||
const int64_t warn_siz =
|
||||
log_disk_usage_limit_size * log_disk_warn_percent / 100LL;
|
||||
if (diskspace_enough_ != curr_diskspace_enough) {
|
||||
ATOMIC_STORE(&diskspace_enough_, curr_diskspace_enough);
|
||||
}
|
||||
// NB: print error log when:
|
||||
// 1. write-stop.
|
||||
if (!curr_diskspace_enough) {
|
||||
constexpr int64_t INTERVAL = 1*1000*1000;
|
||||
if (palf_reach_time_interval(INTERVAL, disk_not_enough_print_interval_in_loop_thread_)) {
|
||||
int tmp_ret = OB_LOG_OUTOF_DISK_SPACE;
|
||||
LOG_DBA_ERROR(OB_LOG_OUTOF_DISK_SPACE, "msg", "log disk space is almost full", "ret", tmp_ret,
|
||||
"total_size(MB)", log_disk_usage_limit_size/MB,
|
||||
"used_size(MB)", used_size_byte/MB,
|
||||
"used_percent(%)", (used_size_byte*100) / (log_disk_usage_limit_size + 1),
|
||||
"warn_size(MB)", warn_siz/MB,
|
||||
"warn_percent(%)", log_disk_warn_percent,
|
||||
"limit_size(MB)", usable_disk_limit_size_to_stop_writing/MB,
|
||||
"limit_percent(%)", log_disk_limit_percent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int PalfEnvImpl::init_log_io_worker_config_(const int log_writer_parallelism,
|
||||
const int64_t tenant_id,
|
||||
LogIOWorkerConfig &config)
|
||||
|
@ -202,6 +202,7 @@ public:
|
||||
// should be removed in version 4.2.0.0
|
||||
virtual int update_replayable_point(const SCN &replayable_scn) = 0;
|
||||
virtual int get_throttling_options(PalfThrottleOptions &option) = 0;
|
||||
virtual void period_calc_disk_usage() = 0;
|
||||
VIRTUAL_TO_STRING_KV("IPalfEnvImpl", "Dummy");
|
||||
|
||||
};
|
||||
@ -272,6 +273,7 @@ public:
|
||||
int64_t get_tenant_id() override final;
|
||||
int update_replayable_point(const SCN &replayable_scn) override final;
|
||||
int get_throttling_options(PalfThrottleOptions &option);
|
||||
void period_calc_disk_usage() override final;
|
||||
INHERIT_TO_STRING_KV("IPalfEnvImpl", IPalfEnvImpl, K_(self), K_(log_dir), K_(disk_options_wrapper),
|
||||
KPC(log_alloc_mgr_));
|
||||
// =================== disk space management ==================
|
||||
@ -371,7 +373,8 @@ private:
|
||||
PalfMonitorCb *monitor_;
|
||||
|
||||
PalfDiskOptionsWrapper disk_options_wrapper_;
|
||||
int64_t disk_not_enough_print_interval_;
|
||||
int64_t disk_not_enough_print_interval_in_gc_thread_;
|
||||
int64_t disk_not_enough_print_interval_in_loop_thread_;
|
||||
|
||||
char log_dir_[common::MAX_PATH_SIZE];
|
||||
char tmp_log_dir_[common::MAX_PATH_SIZE];
|
||||
|
Reference in New Issue
Block a user