fix backup concurrency with twice major freeze and with transfer change turn and io error change retry

This commit is contained in:
oceanoverflow
2024-01-26 06:42:18 +00:00
committed by ob-robot
parent 718216ee0c
commit 380d486f80
5 changed files with 44 additions and 4 deletions

View File

@ -591,6 +591,8 @@ class ObString;
ACT(HANG_IN_CLONE_SYS_RELEASE_RESOURCE,)\
ACT(HANG_IN_CLONE_SYS_SUCCESS,)\
ACT(HANG_IN_CLONE_SYS_FAILED_STATUS,)\
ACT(BEFORE_BACKUP_PREFETCH_TASK,)\
ACT(BEFORE_BACKUP_DATA_TASK,)\
ACT(MAX_DEBUG_SYNC_POINT,)
DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF);

View File

@ -1861,3 +1861,7 @@ DEF_BOOL(enable_rpc_authentication_bypass, OB_CLUSTER_PARAMETER, "True",
DEF_INT(max_partition_num, OB_TENANT_PARAMETER, "8192", "[8192, 65536]",
"set max partition num in mysql mode",
ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
ERRSIM_DEF_INT(errsim_backup_task_batch_size, OB_CLUSTER_PARAMETER, "0", "[0,)",
"the batch size backup task receive in errsim mode"
"Range: [0,) in integer",
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));

View File

@ -113,7 +113,9 @@ int ObBackupMetaIndexFuser::fuse(const MERGE_ITER_ARRAY &iter_array)
{
int ret = OB_SUCCESS;
ObBackupMetaIndex output;
int64_t largest_retry = -1;
output.reset();
int64_t largest_turn_id = -1;
int64_t largest_retry_id = -1;
for (int64_t i = 0; OB_SUCC(ret) && i < iter_array.count(); ++i) {
ObBackupMetaIndexIterator *iter = iter_array.at(i);
if (OB_ISNULL(iter)) {
@ -123,13 +125,20 @@ int ObBackupMetaIndexFuser::fuse(const MERGE_ITER_ARRAY &iter_array)
continue;
} else if (OB_FAIL(iter->get_cur_index(output))) {
LOG_WARN("failed to get cur index", K(ret));
} else if (output.retry_id_ > largest_retry) {
} else if (output.turn_id_ > largest_turn_id) {
largest_turn_id = output.turn_id_;
largest_retry_id = output.retry_id_;
result_ = output;
largest_retry = output.retry_id_;
} else if (output.turn_id_ == largest_turn_id) {
if (output.retry_id_ > largest_retry_id) {
largest_turn_id = output.turn_id_;
largest_retry_id = output.retry_id_;
result_ = output;
}
}
}
if (OB_SUCC(ret)) {
if (-1 == largest_retry) {
if (-1 == largest_retry_id) {
ret = OB_ITER_END;
}
}

View File

@ -76,6 +76,7 @@ namespace backup {
}
#endif
ERRSIM_POINT_DEF(EN_LS_BACKUP_FAILED);
ERRSIM_POINT_DEF(EN_BACKUP_DATA_TASK_FAILED);
static int get_ls_handle(const uint64_t tenant_id, const share::ObLSID &ls_id, storage::ObLSHandle &ls_handle)
{
@ -2009,6 +2010,12 @@ int ObPrefetchBackupInfoTask::process()
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
bool need_report_error = false;
#ifdef ERRSIM
if (backup_data_type_.is_major_backup() && 1002 == param_.ls_id_.id() && 1 == param_.turn_id_ && 1 == param_.retry_id_) {
SERVER_EVENT_SYNC_ADD("backup_errsim", "before_backup_prefetch_task");
DEBUG_SYNC(BEFORE_BACKUP_PREFETCH_TASK);
}
#endif
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("prefetch backup info task is not inited", K(ret));
@ -2577,6 +2584,19 @@ int ObLSBackupDataTask::process()
}
}
#endif
#ifdef ERRSIM
if (OB_SUCC(ret)) {
if (backup_data_type_.is_major_backup() && 1002 == param_.ls_id_.id() && 1 == param_.turn_id_ && 0 == param_.retry_id_ && 1 == task_id_) {
ret = EN_BACKUP_DATA_TASK_FAILED ? : OB_SUCCESS;
if (OB_FAIL(ret)) {
SERVER_EVENT_SYNC_ADD("backup_errsim", "before_backup_data_task");
DEBUG_SYNC(BEFORE_BACKUP_DATA_TASK);
}
}
}
#endif
if (OB_FAIL(ret)) {
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;

View File

@ -2464,6 +2464,11 @@ int ObBackupMacroBlockTaskMgr::init(const share::ObBackupDataType &backup_data_t
} else {
backup_data_type_ = backup_data_type;
batch_size_ = batch_size;
#ifdef ERRSIM
if (0 != GCONF.errsim_backup_task_batch_size) {
batch_size_ = GCONF.errsim_backup_task_batch_size;
}
#endif
max_task_id_ = 0;
cur_task_id_ = 0;
is_inited_ = true;