BUGFIX: fix dead_lock between advance_checkpoint_by_flush and create_ls
This commit is contained in:
@ -72,13 +72,9 @@ static int advance_checkpoint_by_flush(const uint64_t tenant_id, const share::Ob
|
|||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
const int64_t advance_checkpoint_timeout = GCONF._advance_checkpoint_timeout;
|
const int64_t advance_checkpoint_timeout = GCONF._advance_checkpoint_timeout;
|
||||||
LOG_INFO("backup advance checkpoint timeout", K(tenant_id), K(advance_checkpoint_timeout));
|
LOG_INFO("backup advance checkpoint timeout", K(tenant_id), K(advance_checkpoint_timeout));
|
||||||
checkpoint::ObCheckpointExecutor *checkpoint_executor = NULL;
|
|
||||||
if (start_scn < 0) {
|
if (start_scn < 0) {
|
||||||
ret = OB_INVALID_ARGUMENT;
|
ret = OB_INVALID_ARGUMENT;
|
||||||
LOG_WARN("get invalid args", K(ret), K(start_scn));
|
LOG_WARN("get invalid args", K(ret), K(start_scn));
|
||||||
} else if (OB_ISNULL(checkpoint_executor = ls->get_checkpoint_executor())) {
|
|
||||||
ret = OB_ERR_UNEXPECTED;
|
|
||||||
LOG_WARN("checkpoint executor should not be null", K(ret), KPC(ls));
|
|
||||||
} else {
|
} else {
|
||||||
ObLSMetaPackage ls_meta_package;
|
ObLSMetaPackage ls_meta_package;
|
||||||
int64_t i = 0;
|
int64_t i = 0;
|
||||||
@ -88,7 +84,7 @@ static int advance_checkpoint_by_flush(const uint64_t tenant_id, const share::Ob
|
|||||||
if (cur_ts - start_ts > advance_checkpoint_timeout) {
|
if (cur_ts - start_ts > advance_checkpoint_timeout) {
|
||||||
ret = OB_BACKUP_ADVANCE_CHECKPOINT_TIMEOUT;
|
ret = OB_BACKUP_ADVANCE_CHECKPOINT_TIMEOUT;
|
||||||
LOG_WARN("backup advance checkpoint by flush timeout", K(ret), K(tenant_id), K(ls_id), K(start_scn));
|
LOG_WARN("backup advance checkpoint by flush timeout", K(ret), K(tenant_id), K(ls_id), K(start_scn));
|
||||||
} else if (OB_FAIL(checkpoint_executor->advance_checkpoint_by_flush(start_scn))) {
|
} else if (OB_FAIL(ls->advance_checkpoint_by_flush(start_scn))) {
|
||||||
if (OB_NO_NEED_UPDATE == ret) {
|
if (OB_NO_NEED_UPDATE == ret) {
|
||||||
// clog checkpoint ts has passed start log ts
|
// clog checkpoint ts has passed start log ts
|
||||||
ret = OB_SUCCESS;
|
ret = OB_SUCCESS;
|
||||||
|
|||||||
@ -200,48 +200,52 @@ int ObCheckpointExecutor::update_clog_checkpoint()
|
|||||||
|
|
||||||
int ObCheckpointExecutor::advance_checkpoint_by_flush(int64_t recycle_ts) {
|
int ObCheckpointExecutor::advance_checkpoint_by_flush(int64_t recycle_ts) {
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
int tmp_ret = OB_SUCCESS;
|
|
||||||
|
|
||||||
// calcu recycle_ts according to clog disk situation
|
ObSpinLockGuard guard(lock_);
|
||||||
if (recycle_ts == 0) {
|
if (update_checkpoint_enabled_) {
|
||||||
LSN end_lsn;
|
int tmp_ret = OB_SUCCESS;
|
||||||
int64_t calcu_recycle_ts = INT64_MAX;
|
|
||||||
if (OB_FAIL(loghandler_->get_end_lsn(end_lsn))) {
|
// calcu recycle_ts according to clog disk situation
|
||||||
STORAGE_LOG(WARN, "get end lsn failed", K(ret), K(ls_->get_ls_id()));
|
if (recycle_ts == 0) {
|
||||||
} else {
|
LSN end_lsn;
|
||||||
LSN clog_checkpoint_lsn = ls_->get_clog_base_lsn();
|
int64_t calcu_recycle_ts = INT64_MAX;
|
||||||
LSN calcu_recycle_lsn = clog_checkpoint_lsn
|
if (OB_FAIL(loghandler_->get_end_lsn(end_lsn))) {
|
||||||
+ ((end_lsn - clog_checkpoint_lsn) * CLOG_GC_PERCENT / 100);
|
STORAGE_LOG(WARN, "get end lsn failed", K(ret), K(ls_->get_ls_id()));
|
||||||
if (OB_FAIL(loghandler_->locate_by_lsn_coarsely(calcu_recycle_lsn, recycle_ts))) {
|
|
||||||
STORAGE_LOG(WARN, "locate_by_lsn_coarsely failed", K(calcu_recycle_ts), K(calcu_recycle_lsn),
|
|
||||||
K(recycle_ts), K(ls_->get_ls_id()));
|
|
||||||
} else {
|
} else {
|
||||||
STORAGE_LOG(INFO, "advance checkpoint by flush to avoid clog disk full",
|
LSN clog_checkpoint_lsn = ls_->get_clog_base_lsn();
|
||||||
K(recycle_ts), K(end_lsn), K(clog_checkpoint_lsn),
|
LSN calcu_recycle_lsn = clog_checkpoint_lsn
|
||||||
K(calcu_recycle_lsn), K(ls_->get_ls_id()));
|
+ ((end_lsn - clog_checkpoint_lsn) * CLOG_GC_PERCENT / 100);
|
||||||
|
if (OB_FAIL(loghandler_->locate_by_lsn_coarsely(calcu_recycle_lsn, recycle_ts))) {
|
||||||
|
STORAGE_LOG(WARN, "locate_by_lsn_coarsely failed", K(calcu_recycle_ts), K(calcu_recycle_lsn),
|
||||||
|
K(recycle_ts), K(ls_->get_ls_id()));
|
||||||
|
} else {
|
||||||
|
STORAGE_LOG(INFO, "advance checkpoint by flush to avoid clog disk full",
|
||||||
|
K(recycle_ts), K(end_lsn), K(clog_checkpoint_lsn),
|
||||||
|
K(calcu_recycle_lsn), K(ls_->get_ls_id()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// the log of end_log_lsn and the log of clog_checkpoint_lsn may be in a block
|
||||||
|
if (recycle_ts < ls_->get_clog_checkpoint_ts()) {
|
||||||
|
recycle_ts = INT64_MAX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the log of end_log_lsn and the log of clog_checkpoint_lsn may be in a block
|
|
||||||
if (recycle_ts < ls_->get_clog_checkpoint_ts()) {
|
|
||||||
recycle_ts = INT64_MAX;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (OB_SUCC(ret)) {
|
if (OB_SUCC(ret)) {
|
||||||
if (recycle_ts < ls_->get_clog_checkpoint_ts()) {
|
if (recycle_ts < ls_->get_clog_checkpoint_ts()) {
|
||||||
ret = OB_NO_NEED_UPDATE;
|
ret = OB_NO_NEED_UPDATE;
|
||||||
STORAGE_LOG(WARN, "recycle_ts should not smaller than checkpoint_log_ts",
|
STORAGE_LOG(WARN, "recycle_ts should not smaller than checkpoint_log_ts",
|
||||||
K(recycle_ts), K(ls_->get_clog_checkpoint_ts()), K(ls_->get_ls_id()));
|
K(recycle_ts), K(ls_->get_clog_checkpoint_ts()), K(ls_->get_ls_id()));
|
||||||
} else {
|
} else {
|
||||||
STORAGE_LOG(INFO, "start flush",
|
STORAGE_LOG(INFO, "start flush",
|
||||||
K(recycle_ts),
|
K(recycle_ts),
|
||||||
K(ls_->get_clog_checkpoint_ts()),
|
K(ls_->get_clog_checkpoint_ts()),
|
||||||
K(ls_->get_ls_id()));
|
K(ls_->get_ls_id()));
|
||||||
for (int i = 1; i < ObLogBaseType::MAX_LOG_BASE_TYPE; i++) {
|
for (int i = 1; i < ObLogBaseType::MAX_LOG_BASE_TYPE; i++) {
|
||||||
if (OB_NOT_NULL(handlers_[i])
|
if (OB_NOT_NULL(handlers_[i])
|
||||||
&& OB_SUCCESS != (tmp_ret = (handlers_[i]->flush(recycle_ts)))) {
|
&& OB_SUCCESS != (tmp_ret = (handlers_[i]->flush(recycle_ts)))) {
|
||||||
STORAGE_LOG(WARN, "handler flush failed", K(recycle_ts), K(tmp_ret),
|
STORAGE_LOG(WARN, "handler flush failed", K(recycle_ts), K(tmp_ret),
|
||||||
K(i), K(ls_->get_ls_id()));
|
K(i), K(ls_->get_ls_id()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1037,9 +1037,6 @@ int ObStartPrepareMigrationTask::wait_ls_checkpoint_ts_push_()
|
|||||||
LOG_WARN("failed to get ls saved info", K(ret), KPC(ls), KPC(ctx_));
|
LOG_WARN("failed to get ls saved info", K(ret), KPC(ls), KPC(ctx_));
|
||||||
} else if (!saved_info.is_empty()) {
|
} else if (!saved_info.is_empty()) {
|
||||||
LOG_INFO("saved info is not empty, no need wait ls checkpoint ts push", K(saved_info), KPC(ctx_));
|
LOG_INFO("saved info is not empty, no need wait ls checkpoint ts push", K(saved_info), KPC(ctx_));
|
||||||
} else if (OB_ISNULL(checkpoint_executor = ls->get_checkpoint_executor())) {
|
|
||||||
ret = OB_ERR_UNEXPECTED;
|
|
||||||
LOG_WARN("checkpoint executor should not be NULL", K(ret), KPC(ctx_), KP(checkpoint_executor));
|
|
||||||
} else {
|
} else {
|
||||||
const int64_t wait_checkpoint_push_start_ts = ObTimeUtility::current_time();
|
const int64_t wait_checkpoint_push_start_ts = ObTimeUtility::current_time();
|
||||||
while (OB_SUCC(ret)) {
|
while (OB_SUCC(ret)) {
|
||||||
@ -1059,7 +1056,7 @@ int ObStartPrepareMigrationTask::wait_ls_checkpoint_ts_push_()
|
|||||||
const int64_t cost_ts = ObTimeUtility::current_time() - wait_checkpoint_push_start_ts;
|
const int64_t cost_ts = ObTimeUtility::current_time() - wait_checkpoint_push_start_ts;
|
||||||
LOG_INFO("succeed wait clog checkpoint ts push", "cost", cost_ts, "ls_id", ctx_->arg_.ls_id_);
|
LOG_INFO("succeed wait clog checkpoint ts push", "cost", cost_ts, "ls_id", ctx_->arg_.ls_id_);
|
||||||
break;
|
break;
|
||||||
} else if (OB_FAIL(checkpoint_executor->advance_checkpoint_by_flush(ctx_->log_sync_scn_))) {
|
} else if (OB_FAIL(ls->advance_checkpoint_by_flush(ctx_->log_sync_scn_))) {
|
||||||
if (OB_NO_NEED_UPDATE == ret) {
|
if (OB_NO_NEED_UPDATE == ret) {
|
||||||
ret = OB_SUCCESS;
|
ret = OB_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -1153,6 +1153,14 @@ int ObLS::force_tablet_freeze(const ObTabletID &tablet_id)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ObLS::advance_checkpoint_by_flush(int64_t recycle_ts)
|
||||||
|
{
|
||||||
|
int64_t read_lock = LSLOCKALL;
|
||||||
|
int64_t write_lock = 0;
|
||||||
|
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
|
||||||
|
return checkpoint_executor_.advance_checkpoint_by_flush(recycle_ts);
|
||||||
|
}
|
||||||
|
|
||||||
int ObLS::get_ls_meta_package_and_tablet_ids(ObLSMetaPackage &meta_package, common::ObIArray<common::ObTabletID> &tablet_ids)
|
int ObLS::get_ls_meta_package_and_tablet_ids(ObLSMetaPackage &meta_package, common::ObIArray<common::ObTabletID> &tablet_ids)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
|
|||||||
@ -605,6 +605,7 @@ public:
|
|||||||
|
|
||||||
// ObCheckpointExecutor interface:
|
// ObCheckpointExecutor interface:
|
||||||
DELEGATE_WITH_RET(checkpoint_executor_, get_checkpoint_info, int);
|
DELEGATE_WITH_RET(checkpoint_executor_, get_checkpoint_info, int);
|
||||||
|
int advance_checkpoint_by_flush(int64_t recycle_ts);
|
||||||
|
|
||||||
// ObDataCheckpoint interface:
|
// ObDataCheckpoint interface:
|
||||||
DELEGATE_WITH_RET(data_checkpoint_, get_freezecheckpoint_info, int);
|
DELEGATE_WITH_RET(data_checkpoint_, get_freezecheckpoint_info, int);
|
||||||
|
|||||||
@ -50,8 +50,7 @@ int ObAdvanceLSCkptTask::try_advance_ls_ckpt_ts()
|
|||||||
ret = OB_INVALID_ARGUMENT;
|
ret = OB_INVALID_ARGUMENT;
|
||||||
}
|
}
|
||||||
TRANS_LOG(WARN, "get ls faild", K(ret), K(MTL(ObLSService *)));
|
TRANS_LOG(WARN, "get ls faild", K(ret), K(MTL(ObLSService *)));
|
||||||
} else if (ls_handle.get_ls()->get_checkpoint_executor()->advance_checkpoint_by_flush(
|
} else if (ls_handle.get_ls()->advance_checkpoint_by_flush(target_ckpt_ts_)) {
|
||||||
target_ckpt_ts_)) {
|
|
||||||
TRANS_LOG(WARN, "advance checkpoint ts failed", K(ret), K(ls_id_), K(target_ckpt_ts_));
|
TRANS_LOG(WARN, "advance checkpoint ts failed", K(ret), K(ls_id_), K(target_ckpt_ts_));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -355,10 +355,7 @@ int ObCheckPointService::do_minor_freeze()
|
|||||||
ObLS *ls = nullptr;
|
ObLS *ls = nullptr;
|
||||||
int ls_cnt = 0;
|
int ls_cnt = 0;
|
||||||
for (; OB_SUCC(iter->get_next(ls)); ++ls_cnt) {
|
for (; OB_SUCC(iter->get_next(ls)); ++ls_cnt) {
|
||||||
ObCheckpointExecutor *checkpoint_executor = nullptr;
|
if (OB_SUCCESS != (tmp_ret = (ls->advance_checkpoint_by_flush(INT64_MAX)))) {
|
||||||
if (OB_ISNULL(checkpoint_executor = ls->get_checkpoint_executor())) {
|
|
||||||
STORAGE_LOG(WARN, "checkpoint_executor should not be null", K(ls->get_ls_id()));
|
|
||||||
} else if (OB_SUCCESS != (tmp_ret = (checkpoint_executor->advance_checkpoint_by_flush(INT64_MAX)))) {
|
|
||||||
STORAGE_LOG(WARN, "advance_checkpoint_by_flush failed", K(tmp_ret), K(ls->get_ls_id()));
|
STORAGE_LOG(WARN, "advance_checkpoint_by_flush failed", K(tmp_ret), K(ls->get_ls_id()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user