adjust to avoid frequent freeze due to clog disk usage
This commit is contained in:
parent
c15ddf7c6d
commit
2616e12ec1
@ -29,9 +29,7 @@ namespace checkpoint
|
||||
{
|
||||
|
||||
ObCheckpointExecutor::ObCheckpointExecutor()
|
||||
: wait_advance_checkpoint_(false),
|
||||
last_set_wait_advance_checkpoint_time_(0),
|
||||
update_checkpoint_enabled_(false)
|
||||
: update_checkpoint_enabled_(false)
|
||||
{
|
||||
reset();
|
||||
}
|
||||
@ -185,7 +183,6 @@ int ObCheckpointExecutor::update_clog_checkpoint()
|
||||
STORAGE_LOG(ERROR, "set base lsn failed", K(ret), K(clog_checkpoint_lsn), K(ls_id));
|
||||
}
|
||||
} else {
|
||||
ATOMIC_STORE(&wait_advance_checkpoint_, false);
|
||||
FLOG_INFO("[CHECKPOINT] update clog checkpoint successfully",
|
||||
K(clog_checkpoint_lsn), K(checkpoint_ts), K(ls_id),
|
||||
K(service_type));
|
||||
@ -268,43 +265,6 @@ int ObCheckpointExecutor::get_checkpoint_info(ObIArray<ObCheckpointVTInfo> &chec
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool ObCheckpointExecutor::need_flush()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
bool need_flush = false;
|
||||
int64_t end_log_ts = 0;
|
||||
if (OB_FAIL(loghandler_->get_end_ts_ns(end_log_ts))) {
|
||||
STORAGE_LOG(WARN, "get_end_ts_ns failed", K(ret));
|
||||
} else if (end_log_ts -
|
||||
ls_->get_clog_checkpoint_ts() > MAX_NEED_REPLAY_CLOG_INTERVAL) {
|
||||
STORAGE_LOG(INFO, "over max need replay clog interval",
|
||||
K(end_log_ts), K(ls_->get_clog_checkpoint_ts()));
|
||||
need_flush = true;
|
||||
}
|
||||
|
||||
return need_flush;
|
||||
}
|
||||
|
||||
bool ObCheckpointExecutor::is_wait_advance_checkpoint()
|
||||
{
|
||||
if (ATOMIC_LOAD(&wait_advance_checkpoint_)) {
|
||||
if (ObTimeUtility::current_time() - last_set_wait_advance_checkpoint_time_ > 10 * 1000 * 1000) {
|
||||
ATOMIC_STORE(&wait_advance_checkpoint_, false);
|
||||
}
|
||||
}
|
||||
|
||||
return ATOMIC_LOAD(&wait_advance_checkpoint_);
|
||||
}
|
||||
|
||||
void ObCheckpointExecutor::set_wait_advance_checkpoint(int64_t checkpoint_log_ts)
|
||||
{
|
||||
ObSpinLockGuard guard(lock_);
|
||||
if (checkpoint_log_ts == ls_->get_clog_checkpoint_ts()) {
|
||||
ATOMIC_STORE(&wait_advance_checkpoint_, true);
|
||||
last_set_wait_advance_checkpoint_time_ = ObTimeUtility::current_time();
|
||||
}
|
||||
}
|
||||
|
||||
int64_t ObCheckpointExecutor::get_cannot_recycle_log_size()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
@ -73,13 +73,6 @@ public:
|
||||
// for __all_virtual_checkpoint
|
||||
int get_checkpoint_info(ObIArray<ObCheckpointVTInfo> &checkpoint_array);
|
||||
|
||||
// avoid need replay too mang logs
|
||||
bool need_flush();
|
||||
|
||||
bool is_wait_advance_checkpoint();
|
||||
|
||||
void set_wait_advance_checkpoint(int64_t checkpoint_log_ts);
|
||||
|
||||
int64_t get_cannot_recycle_log_size();
|
||||
|
||||
void get_min_rec_log_ts(int &log_type, int64_t &min_rec_log_ts) const;
|
||||
@ -88,7 +81,6 @@ public:
|
||||
|
||||
private:
|
||||
static const int64_t CLOG_GC_PERCENT = 60;
|
||||
static const int64_t MAX_NEED_REPLAY_CLOG_INTERVAL = (int64_t)60 * 60 * 1000 * 1000 * 1000; //ns
|
||||
|
||||
ObLS *ls_;
|
||||
logservice::ObILogHandler *loghandler_;
|
||||
@ -98,9 +90,6 @@ private:
|
||||
// when the public interfaces are invoked
|
||||
mutable common::ObSpinLock lock_;
|
||||
|
||||
// avoid frequent freeze when clog_used_over_threshold
|
||||
bool wait_advance_checkpoint_;
|
||||
int64_t last_set_wait_advance_checkpoint_time_;
|
||||
bool update_checkpoint_enabled_;
|
||||
};
|
||||
|
||||
|
@ -27,13 +27,13 @@ enum ObCommonCheckpointType
|
||||
{
|
||||
INVALID_BASE_TYPE = 0,
|
||||
|
||||
DATA_CHECKPOINT_TYPE = 1,
|
||||
TX_CTX_MEMTABLE_TYPE = 1,
|
||||
|
||||
TX_CTX_MEMTABLE_TYPE = 2,
|
||||
TX_DATA_MEMTABLE_TYPE = 2,
|
||||
|
||||
TX_DATA_MEMTABLE_TYPE = 3,
|
||||
LOCK_MEMTABLE_TYPE = 3,
|
||||
|
||||
LOCK_MEMTABLE_TYPE = 4,
|
||||
DATA_CHECKPOINT_TYPE = 4,
|
||||
|
||||
// for unittest
|
||||
TEST_COMMON_CHECKPOINT = 5,
|
||||
|
@ -224,7 +224,9 @@ int ObDataCheckpoint::flush(int64_t recycle_log_ts, bool need_freeze)
|
||||
int ret = OB_SUCCESS;
|
||||
if (need_freeze) {
|
||||
if (get_rec_log_ts() <= recycle_log_ts) {
|
||||
if (OB_FAIL(ls_->logstream_freeze())) {
|
||||
if (!is_flushing() &&
|
||||
!has_prepared_flush_checkpoint() &&
|
||||
OB_FAIL(ls_->logstream_freeze())) {
|
||||
STORAGE_LOG(WARN, "minor freeze failed", K(ret), K(ls_->get_ls_id()));
|
||||
}
|
||||
}
|
||||
@ -486,6 +488,11 @@ int ObDataCheckpoint::unlink_from_prepare(ObFreezeCheckpoint *ob_freeze_checkpoi
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool ObDataCheckpoint::has_prepared_flush_checkpoint()
|
||||
{
|
||||
return !prepare_list_.is_empty();
|
||||
}
|
||||
|
||||
int ObDataCheckpoint::get_freezecheckpoint_info(
|
||||
ObIArray<checkpoint::ObFreezeCheckpointVTInfo> &freeze_checkpoint_array)
|
||||
{
|
||||
|
@ -116,6 +116,8 @@ public:
|
||||
|
||||
bool is_flushing() const;
|
||||
|
||||
bool has_prepared_flush_checkpoint();
|
||||
|
||||
private:
|
||||
// traversal prepare_list to flush memtable
|
||||
// case1: some memtable flush failed when ls freeze
|
||||
|
@ -1212,17 +1212,16 @@ int ObLS::flush_if_need_(const bool need_flush)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t clog_checkpoint_ts = get_clog_checkpoint_ts();
|
||||
if ((!need_flush && !checkpoint_executor_.need_flush()) || checkpoint_executor_.is_wait_advance_checkpoint()) {
|
||||
STORAGE_LOG(INFO, "the ls no need flush to advance_checkpoint", K(get_ls_id()));
|
||||
if (!need_flush) {
|
||||
STORAGE_LOG(INFO, "the ls no need flush to advance_checkpoint",
|
||||
K(get_ls_id()),
|
||||
K(need_flush));
|
||||
} else if (OB_FAIL(checkpoint_executor_.advance_checkpoint_by_flush())) {
|
||||
STORAGE_LOG(WARN, "advance_checkpoint_by_flush failed", KR(ret), K(get_ls_id()));
|
||||
} else {
|
||||
checkpoint_executor_.set_wait_advance_checkpoint(clog_checkpoint_ts);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int ObLS::try_update_uppder_trans_version()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
@ -171,10 +171,10 @@ void ObCheckPointService::ObCheckpointTask::runTimerTask()
|
||||
}
|
||||
}
|
||||
|
||||
bool ObCheckPointService::clog_disk_usage_over_threshold_(int64_t &threshold)
|
||||
bool ObCheckPointService::get_disk_usage_threshold_(int64_t &threshold)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int clog_disk_usage_over_threshold = false;
|
||||
bool get_disk_usage_threshold_success = false;
|
||||
// avod clog disk full
|
||||
logservice::ObLogService *log_service = nullptr;
|
||||
PalfEnv *palf_env = nullptr;
|
||||
@ -189,13 +189,13 @@ bool ObCheckPointService::clog_disk_usage_over_threshold_(int64_t &threshold)
|
||||
int64_t total_size = 0;
|
||||
if (OB_FAIL(palf_env->get_disk_usage(used_size, total_size))) {
|
||||
STORAGE_LOG(WARN, "get_disk_usage failed", K(ret), K(used_size), K(total_size));
|
||||
} else if (used_size > (threshold = (total_size * NEED_FLUSH_CLOG_DISK_PERCENT / 100))) {
|
||||
STORAGE_LOG(INFO, "clog disk is not enough",
|
||||
K(used_size), K(total_size));
|
||||
clog_disk_usage_over_threshold = true;
|
||||
} else {
|
||||
threshold = total_size * NEED_FLUSH_CLOG_DISK_PERCENT / 100;
|
||||
get_disk_usage_threshold_success = true;
|
||||
}
|
||||
}
|
||||
return clog_disk_usage_over_threshold;
|
||||
|
||||
return get_disk_usage_threshold_success;
|
||||
}
|
||||
|
||||
bool ObCheckPointService::cannot_recycle_log_over_threshold_(const int64_t threshold)
|
||||
@ -327,7 +327,7 @@ void ObCheckPointService::ObCheckClogDiskUsageTask::runTimerTask()
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t threshold_size = INT64_MAX;
|
||||
bool need_flush = false;
|
||||
if (checkpoint_service_.clog_disk_usage_over_threshold_(threshold_size)) {
|
||||
if (checkpoint_service_.get_disk_usage_threshold_(threshold_size)) {
|
||||
if (checkpoint_service_.cannot_recycle_log_over_threshold_(threshold_size)) {
|
||||
need_flush = true;
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ public:
|
||||
check_clog_disk_usage_task_(*this)
|
||||
{}
|
||||
|
||||
static const int64_t NEED_FLUSH_CLOG_DISK_PERCENT = 60;
|
||||
static const int64_t NEED_FLUSH_CLOG_DISK_PERCENT = 30;
|
||||
static int mtl_init(ObCheckPointService *&m);
|
||||
int init();
|
||||
int start();
|
||||
@ -60,7 +60,7 @@ private:
|
||||
// the thread which is used to deal with checkpoint task.
|
||||
ObLSFreezeThread freeze_thread_;
|
||||
|
||||
bool clog_disk_usage_over_threshold_(int64_t &threshold);
|
||||
bool get_disk_usage_threshold_(int64_t &threshold);
|
||||
bool cannot_recycle_log_over_threshold_(const int64_t threshold);
|
||||
int flush_if_need_(bool need_flush);
|
||||
// reduce the risk of clog full due to checkpoint long interval
|
||||
|
Loading…
x
Reference in New Issue
Block a user