modify wait logic of clone executor and retry logic of clone scheduler in CLONE_SYS_RELEASE_RESOURCE
This commit is contained in:
parent
abef1a89a4
commit
240f690249
@ -28,6 +28,7 @@
|
||||
#ifdef OB_BUILD_TDE_SECURITY
|
||||
#include "share/ob_master_key_getter.h"
|
||||
#endif
|
||||
#include "lib/utility/ob_tracepoint.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -261,6 +262,7 @@ int ObCloneScheduler::process_user_clone_job(const share::ObCloneJob &job)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_LOCK_ERROR);
|
||||
int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -273,7 +275,10 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
|
||||
const ObTenantSnapshotUtil::TenantSnapshotOp op = ObTenantCloneJobType::RESTORE == job_type ?
|
||||
ObTenantSnapshotUtil::RESTORE_OP :
|
||||
ObTenantSnapshotUtil::FORK_OP;
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_LOCK_ERROR)) {
|
||||
ret = ERRSIM_CLONE_LOCK_ERROR;
|
||||
LOG_WARN("mock clone lock failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -306,7 +311,7 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
|
||||
if (ObTenantSnapStatus::CREATING == original_global_state_status) {
|
||||
ret = OB_SUCCESS;
|
||||
need_wait = true;
|
||||
LOG_INFO("need wait for current tenant restore operation", KR(ret), K(source_tenant_id));
|
||||
LOG_INFO("need wait for current tenant snapshot creation", KR(ret), K(source_tenant_id));
|
||||
} else {
|
||||
LOG_WARN("GLOBAL_STATE snapshot lock conflict", KR(ret), K(source_tenant_id),
|
||||
K(original_global_state_status));
|
||||
@ -315,10 +320,10 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
|
||||
} else if (OB_FAIL(ObTenantSnapshotUtil::check_tenant_has_no_conflict_tasks(source_tenant_id))) {
|
||||
LOG_WARN("fail to check tenant has conflict tasks", KR(ret), K(source_tenant_id));
|
||||
} else if (ObTenantCloneJobType::RESTORE == job_type &&
|
||||
OB_FAIL(ObTenantSnapshotUtil::add_restore_tenant_task(trans, source_tenant_id,
|
||||
snapshot_id))) {
|
||||
// if job_type is FORK, the snapshot will be updated as RESTORE when it is created successful
|
||||
LOG_WARN("failed to add restore tenant snapshot task", KR(ret), K(source_tenant_id), K(snapshot_id));
|
||||
OB_FAIL(ObTenantSnapshotUtil::add_clone_tenant_task(trans, source_tenant_id,
|
||||
snapshot_id))) {
|
||||
// if job_type is FORK, the snapshot will be updated as CLONING when it is created successful
|
||||
LOG_WARN("failed to add clone tenant snapshot task", KR(ret), K(source_tenant_id), K(snapshot_id));
|
||||
}
|
||||
if (trans.is_started()) {
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
@ -342,10 +347,6 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
|
||||
}
|
||||
}
|
||||
|
||||
// if (FAILEDx(wait_source_relative_task_finished_(source_tenant_id))) {
|
||||
// LOG_WARN("wait source relative task finished failed", KR(ret), KR(source_tenant_id));
|
||||
// }
|
||||
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
|
||||
LOG_WARN("fail to update job status", KR(ret), KR(tmp_ret), K(job));
|
||||
@ -354,6 +355,7 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_RESOURCE_POOL_ERROR);
|
||||
int ObCloneScheduler::clone_create_resource_pool(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -362,7 +364,10 @@ int ObCloneScheduler::clone_create_resource_pool(const share::ObCloneJob &job)
|
||||
uint64_t resource_pool_id = job.get_resource_pool_id();
|
||||
const int64_t job_id = job.get_job_id();
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_RESOURCE_POOL_ERROR)) {
|
||||
ret = ERRSIM_CLONE_RESOURCE_POOL_ERROR;
|
||||
LOG_WARN("mock clone resource pool failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -402,6 +407,7 @@ int ObCloneScheduler::clone_create_resource_pool(const share::ObCloneJob &job)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_CREATE_SNAPSHOT_ERROR);
|
||||
int ObCloneScheduler::clone_create_snapshot_for_fork_tenant(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -416,7 +422,10 @@ int ObCloneScheduler::clone_create_snapshot_for_fork_tenant(const share::ObClone
|
||||
ObSqlString snapshot_name;
|
||||
ObTenantSnapshotID tenant_snapshot_id;
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_CREATE_SNAPSHOT_ERROR)) {
|
||||
ret = ERRSIM_CLONE_CREATE_SNAPSHOT_ERROR;
|
||||
LOG_WARN("mock clone create snapshot failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -498,6 +507,7 @@ int ObCloneScheduler::clone_create_snapshot_for_fork_tenant(const share::ObClone
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_WAIT_CREATE_SNAPSHOT_ERROR);
|
||||
int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -510,7 +520,10 @@ int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::Ob
|
||||
ObMySQLTransaction trans;
|
||||
bool need_wait = false;
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_WAIT_CREATE_SNAPSHOT_ERROR)) {
|
||||
ret = ERRSIM_CLONE_WAIT_CREATE_SNAPSHOT_ERROR;
|
||||
LOG_WARN("mock clone wait create snapshot failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -534,14 +547,14 @@ int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::Ob
|
||||
} else if (ObTenantSnapStatus::CREATING == item.get_status() ||
|
||||
ObTenantSnapStatus::DECIDED == item.get_status()) {
|
||||
need_wait = true;
|
||||
} else if (ObTenantSnapStatus::RESTORING == item.get_status()) {
|
||||
} else if (ObTenantSnapStatus::CLONING == item.get_status()) {
|
||||
// no need to update snapshot status
|
||||
} else if (ObTenantSnapStatus::NORMAL != item.get_status()) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("invalid status for fork tenant snapshot", KR(ret), K(source_tenant_id),
|
||||
K(tenant_snapshot_id), K(item));
|
||||
} else if (OB_FAIL(rootserver::ObTenantSnapshotUtil::add_restore_tenant_task(trans, item))) {
|
||||
LOG_WARN("fail to update fork tenant snapshot to restoring", KR(ret), K(item));
|
||||
} else if (OB_FAIL(rootserver::ObTenantSnapshotUtil::add_clone_tenant_task(trans, item))) {
|
||||
LOG_WARN("fail to update fork tenant snapshot to cloning", KR(ret), K(item));
|
||||
}
|
||||
|
||||
if (trans.is_started()) {
|
||||
@ -572,6 +585,7 @@ int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::Ob
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_CREATE_TENANT_ERROR);
|
||||
int ObCloneScheduler::clone_create_tenant(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -583,7 +597,10 @@ int ObCloneScheduler::clone_create_tenant(const share::ObCloneJob &job)
|
||||
const int64_t timeout = GCONF._ob_ddl_timeout;
|
||||
ObTenantCloneTableOperator clone_op;
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_CREATE_TENANT_ERROR)) {
|
||||
ret = ERRSIM_CLONE_CREATE_TENANT_ERROR;
|
||||
LOG_WARN("mock clone create tenant failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -621,6 +638,7 @@ int ObCloneScheduler::clone_create_tenant(const share::ObCloneJob &job)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_WAIT_CREATE_TENANT_ERROR);
|
||||
int ObCloneScheduler::clone_wait_tenant_restore_finish(const ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -629,7 +647,11 @@ int ObCloneScheduler::clone_wait_tenant_restore_finish(const ObCloneJob &job)
|
||||
const uint64_t clone_tenant_id = job.get_clone_tenant_id();
|
||||
bool need_wait = false;
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_WAIT_CREATE_TENANT_ERROR)) {
|
||||
ret = ERRSIM_CLONE_WAIT_CREATE_TENANT_ERROR;
|
||||
need_wait = OB_EAGAIN == ret ? true : false;
|
||||
LOG_WARN("mock clone wait create tenant failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -678,11 +700,12 @@ int ObCloneScheduler::clone_wait_tenant_restore_finish(const ObCloneJob &job)
|
||||
if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
|
||||
LOG_WARN("fail to update job status", KR(ret), KR(tmp_ret), K(job));
|
||||
}
|
||||
LOG_INFO("[RESTORE] clone wait tenant restore finish", KR(ret), K(job));
|
||||
}
|
||||
LOG_INFO("[RESTORE] clone wait tenant restore finish", KR(ret), K(job));
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_RELEASE_RESOURCE_ERROR);
|
||||
int ObCloneScheduler::clone_release_resource(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -691,8 +714,13 @@ int ObCloneScheduler::clone_release_resource(const share::ObCloneJob &job)
|
||||
const ObTenantSnapshotID snapshot_id = job.get_tenant_snapshot_id();
|
||||
const ObTenantCloneJobType job_type = job.get_job_type();
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
bool need_retry = false;
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_RELEASE_RESOURCE_ERROR)) {
|
||||
ret = ERRSIM_CLONE_RELEASE_RESOURCE_ERROR;
|
||||
need_retry = OB_EAGAIN == ret ? true : false;
|
||||
LOG_WARN("mock clone release resource failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -708,16 +736,19 @@ int ObCloneScheduler::clone_release_resource(const share::ObCloneJob &job)
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(job));
|
||||
} else if (OB_FAIL(ObTenantCloneUtil::release_source_tenant_resource_of_clone_job(*sql_proxy_, job))) {
|
||||
need_retry = true;
|
||||
LOG_WARN("failed to release source tenant resource", KR(ret), K(job));
|
||||
}
|
||||
|
||||
if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
|
||||
if (OB_FAIL(ret) && need_retry) {
|
||||
} else if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
|
||||
LOG_WARN("fail to update job status", KR(ret), KR(tmp_ret), K(job));
|
||||
}
|
||||
LOG_INFO("[RESTORE] clone_release_resource", KR(ret), K(job));
|
||||
LOG_INFO("[RESTORE] clone_release_resource", KR(ret), K(need_retry), K(job));
|
||||
return ret;
|
||||
}
|
||||
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_SYS_FINISH_ERROR);
|
||||
int ObCloneScheduler::clone_sys_finish(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -727,7 +758,10 @@ int ObCloneScheduler::clone_sys_finish(const share::ObCloneJob &job)
|
||||
const uint64_t source_tenant_id = job.get_source_tenant_id();
|
||||
const ObTenantSnapshotID &snapshot_id = job.get_tenant_snapshot_id();
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_SYS_FINISH_ERROR)) {
|
||||
ret = ERRSIM_CLONE_SYS_FINISH_ERROR;
|
||||
LOG_WARN("mock clone sys finish failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -1116,6 +1150,7 @@ int ObCloneScheduler::clone_user_finish(const share::ObCloneJob &job)
|
||||
// 1. for clone_tenant, gc the resource of resource_pool and clone_tenant
|
||||
// 2. for source_tenant, release global_lock and tenant snapshot
|
||||
// 3. for sys_tenant, finish the clone job
|
||||
ERRSIM_POINT_DEF(ERRSIM_CLONE_RECYCLE_FAILED_JOB_ERROR);
|
||||
int ObCloneScheduler::clone_recycle_failed_job(const share::ObCloneJob &job)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -1123,7 +1158,10 @@ int ObCloneScheduler::clone_recycle_failed_job(const share::ObCloneJob &job)
|
||||
const uint64_t source_tenant_id = job.get_source_tenant_id();
|
||||
const ObTenantCloneStatus job_status = job.get_status();
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
if (OB_UNLIKELY(ERRSIM_CLONE_RECYCLE_FAILED_JOB_ERROR)) {
|
||||
ret = ERRSIM_CLONE_RECYCLE_FAILED_JOB_ERROR;
|
||||
LOG_WARN("mock clone recycle failed job failed", KR(ret), K(job));
|
||||
} else if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not inited", KR(ret));
|
||||
} else if (has_set_stop()) {
|
||||
@ -1139,10 +1177,8 @@ int ObCloneScheduler::clone_recycle_failed_job(const share::ObCloneJob &job)
|
||||
} else if (!job_status.is_sys_failed_status()) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("try to recycle a processing or successful job", KR(ret), K(job));
|
||||
} else if (ObTenantCloneStatus::Status::CLONE_SYS_RELEASE_RESOURCE_FAIL != job_status &&
|
||||
} else if (job_status.is_sys_release_clone_resource_status() &&
|
||||
OB_FAIL(ObTenantCloneUtil::release_clone_tenant_resource_of_clone_job(job))) {
|
||||
// CLONE_SYS_RELEASE_RESOURCE means the clone_tenant has been created and restored successful.
|
||||
// thus, if the clone_job is failed in this status, we just need to release the according snapshot.
|
||||
LOG_WARN("fail to release resource of clone tenant", KR(ret), K(job));
|
||||
} else if (OB_FAIL(ObTenantCloneUtil::release_source_tenant_resource_of_clone_job(*sql_proxy_, job))) {
|
||||
LOG_WARN("fail to release resource of source tenant", KR(ret), K(job));
|
||||
|
@ -421,14 +421,14 @@ int ObTenantCloneUtil::release_source_tenant_resource_of_clone_job(common::ObISQ
|
||||
} else {
|
||||
LOG_WARN("fail to get global_lock", KR(ret), K(clone_job));
|
||||
}
|
||||
} else if (ObTenantSnapStatus::RESTORING != global_lock.get_status()) {
|
||||
} else if (ObTenantSnapStatus::CLONING != global_lock.get_status()) {
|
||||
is_already_unlocked = true;
|
||||
LOG_INFO("global lock has been released", KR(ret), K(clone_job));
|
||||
} else if (OB_FAIL(ObTenantSnapshotUtil::unlock_tenant_snapshot_simulated_mutex_from_clone_release_task(
|
||||
trans,
|
||||
source_tenant_id,
|
||||
job_id,
|
||||
ObTenantSnapStatus::RESTORING,
|
||||
ObTenantSnapStatus::CLONING,
|
||||
is_already_unlocked))) {
|
||||
LOG_WARN("fail to unlock", KR(ret), K(clone_job), K(global_lock));
|
||||
}
|
||||
@ -532,7 +532,7 @@ int ObTenantCloneUtil::cancel_clone_job(common::ObISQLClient &sql_client,
|
||||
ObTenantCloneTableOperator clone_op;
|
||||
ObCloneJob clone_job;
|
||||
ObMySQLTransaction trans;
|
||||
static const char *err_msg = "clone job has been canceled";
|
||||
ObSqlString err_msg;
|
||||
const ObTenantCloneStatus next_status(ObTenantCloneStatus::Status::CLONE_SYS_CANCELED);
|
||||
|
||||
if (OB_UNLIKELY(clone_tenant_name.empty())) {
|
||||
@ -560,7 +560,9 @@ int ObTenantCloneUtil::cancel_clone_job(common::ObISQLClient &sql_client,
|
||||
clone_job.get_status(), /*old_status*/
|
||||
next_status))) {
|
||||
LOG_WARN("fail to update job status", KR(ret), K(clone_tenant_name), K(clone_job));
|
||||
} else if (OB_FAIL(clone_op.update_job_failed_info(clone_job.get_job_id(), OB_CANCELED, err_msg))) {
|
||||
} else if (OB_FAIL(err_msg.append_fmt("clone job has been canceled in %s status",
|
||||
ObTenantCloneStatus::get_clone_status_str(clone_job.get_status())))) {
|
||||
} else if (OB_FAIL(clone_op.update_job_failed_info(clone_job.get_job_id(), OB_CANCELED, err_msg.string()))) {
|
||||
LOG_WARN("fail to update job failed info", KR(ret), K(clone_job));
|
||||
}
|
||||
|
||||
|
@ -190,7 +190,7 @@ int ObTenantSnapshotScheduler::get_tenant_snapshot_jobs_(
|
||||
uint64_t user_tenant_id = gen_user_tenant_id(MTL_ID());
|
||||
ObArbitrationServiceStatus arbitration_service_status;
|
||||
int64_t paxos_replica_num = OB_INVALID_COUNT;
|
||||
int64_t restore_job_num = 0;
|
||||
int64_t clone_job_num = 0;
|
||||
|
||||
if (OB_ISNULL(GCTX.schema_service_)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
@ -242,8 +242,8 @@ int ObTenantSnapshotScheduler::get_tenant_snapshot_jobs_(
|
||||
} else if (OB_FAIL(delete_jobs.push_back(delete_job))) {
|
||||
LOG_WARN("push back failed", KR(ret), K(item), K(delete_job));
|
||||
}
|
||||
} else if (ObTenantSnapStatus::RESTORING == items.at(i).get_status()) {
|
||||
restore_job_num++;
|
||||
} else if (ObTenantSnapStatus::CLONING == items.at(i).get_status()) {
|
||||
clone_job_num++;
|
||||
} else if (ObTenantSnapStatus::FAILED == items.at(i).get_status()) {
|
||||
// when a tenant snapshot is created failed,
|
||||
// for the normal tenant snapshot, it will be setted as DELETING and be deleted directly;
|
||||
@ -259,10 +259,10 @@ int ObTenantSnapshotScheduler::get_tenant_snapshot_jobs_(
|
||||
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if ((create_jobs.count() > 1)
|
||||
|| (create_jobs.count() + restore_job_num > 1)) {
|
||||
|| (create_jobs.count() + clone_job_num > 1)) {
|
||||
//only one creation job/restoration job can exist at a time, num > 1 is illegal!
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("unexpected tenant snapshot count", KR(ret), K(create_jobs), K(restore_job_num));
|
||||
LOG_ERROR("unexpected tenant snapshot count", KR(ret), K(create_jobs), K(clone_job_num));
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -1028,8 +1028,8 @@ int ObTenantSnapshotScheduler::finish_create_tenant_snapshot_(
|
||||
true /*for update*/,
|
||||
global_lock))) {
|
||||
LOG_WARN("failed to get special tenant snapshot item", KR(ret), K(user_tenant_id));
|
||||
} else if (ObTenantSnapStatus::RESTORING == global_lock.get_status()) {
|
||||
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as RESTORING at beginning.
|
||||
} else if (ObTenantSnapStatus::CLONING == global_lock.get_status()) {
|
||||
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as CLONING at beginning.
|
||||
// in this case, the global_lock should be unlocked after cloning tenant is finished
|
||||
} else if (OB_FAIL(ObTenantSnapshotUtil::unlock_tenant_snapshot_simulated_mutex_from_snapshot_task(
|
||||
trans,
|
||||
@ -1077,8 +1077,8 @@ int ObTenantSnapshotScheduler::create_tenant_snapshot_fail_(const ObCreateSnapsh
|
||||
global_lock))) {
|
||||
LOG_WARN("failed to get special tenant snapshot item", KR(ret), K(user_tenant_id));
|
||||
} else {
|
||||
if (ObTenantSnapStatus::RESTORING == global_lock.get_status()) {
|
||||
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as RESTORING at beginning.
|
||||
if (ObTenantSnapStatus::CLONING == global_lock.get_status()) {
|
||||
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as CLONING at beginning.
|
||||
// in this case, when creating snapshot failed,
|
||||
// the snapshot and global_lock should only be released by clone job
|
||||
if (OB_FAIL(table_op.update_tenant_snap_item(tenant_snapshot_id,
|
||||
|
@ -672,10 +672,10 @@ int ObTenantSnapshotUtil::lock_(ObMySQLTransaction &trans,
|
||||
if (CREATE_OP == op) {
|
||||
new_status = ObTenantSnapStatus::CREATING;
|
||||
} else { // FORK_OP == op
|
||||
new_status = ObTenantSnapStatus::RESTORING;
|
||||
new_status = ObTenantSnapStatus::CLONING;
|
||||
}
|
||||
} else if (RESTORE_OP == op) {
|
||||
new_status = ObTenantSnapStatus::RESTORING;
|
||||
new_status = ObTenantSnapStatus::CLONING;
|
||||
} else {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected tenant snapshot operation", KR(ret), K(tenant_id), K(op));
|
||||
@ -835,9 +835,9 @@ int ObTenantSnapshotUtil::get_tenant_snapshot_info(common::ObISQLClient &sql_cli
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
|
||||
const uint64_t tenant_id,
|
||||
const share::ObTenantSnapshotID &tenant_snapshot_id)
|
||||
int ObTenantSnapshotUtil::add_clone_tenant_task(ObMySQLTransaction &trans,
|
||||
const uint64_t tenant_id,
|
||||
const share::ObTenantSnapshotID &tenant_snapshot_id)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObTenantSnapshotTableOperator table_op;
|
||||
@ -853,18 +853,18 @@ int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
|
||||
} else if (ObTenantSnapStatus::NORMAL != snap_item.get_status()) {
|
||||
ret = OB_OP_NOT_ALLOW;
|
||||
LOG_WARN("not allowed for current snapshot operation", KR(ret), K(tenant_id), K(snap_item.get_status()));
|
||||
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, restore tenant");
|
||||
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, clone tenant");
|
||||
} else if (OB_FAIL(table_op.update_tenant_snap_item(tenant_snapshot_id,
|
||||
ObTenantSnapStatus::NORMAL, /*old_status*/
|
||||
ObTenantSnapStatus::RESTORING /*new_status*/))) {
|
||||
ObTenantSnapStatus::CLONING /*new_status*/))) {
|
||||
LOG_WARN("update tenant snapshot status failed", KR(ret), K(tenant_id), K(tenant_snapshot_id));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
|
||||
const ObTenantSnapItem &snap_item)
|
||||
int ObTenantSnapshotUtil::add_clone_tenant_task(ObMySQLTransaction &trans,
|
||||
const ObTenantSnapItem &snap_item)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObTenantSnapshotTableOperator table_op;
|
||||
@ -875,12 +875,12 @@ int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
|
||||
} else if (ObTenantSnapStatus::NORMAL != snap_item.get_status()) {
|
||||
ret = OB_OP_NOT_ALLOW;
|
||||
LOG_WARN("not allowed for current snapshot operation", KR(ret), K(snap_item));
|
||||
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, restore tenant");
|
||||
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, clone tenant");
|
||||
} else if (OB_FAIL(table_op.init(snap_item.get_tenant_id(), &trans))) {
|
||||
LOG_WARN("failed to init table op", KR(ret), K(snap_item));
|
||||
} else if (OB_FAIL(table_op.update_tenant_snap_item(snap_item.get_tenant_snapshot_id(),
|
||||
ObTenantSnapStatus::NORMAL, /*old_status*/
|
||||
ObTenantSnapStatus::RESTORING /*new_status*/))) {
|
||||
ObTenantSnapStatus::CLONING /*new_status*/))) {
|
||||
LOG_WARN("update tenant snapshot status failed", KR(ret), K(snap_item));
|
||||
}
|
||||
|
||||
|
@ -82,8 +82,8 @@ public:
|
||||
{
|
||||
CREATE_OP = 0,
|
||||
DROP_OP = 1,
|
||||
RESTORE_OP = 2,
|
||||
FORK_OP = 3,
|
||||
RESTORE_OP = 2, // a type of clone job
|
||||
FORK_OP = 3, // a type of clone job
|
||||
MAX,
|
||||
};
|
||||
public:
|
||||
@ -137,11 +137,11 @@ public:
|
||||
const uint64_t source_tenant_id,
|
||||
const share::ObTenantSnapshotID &snapshot_id,
|
||||
share::ObTenantSnapItem &item);
|
||||
static int add_restore_tenant_task(ObMySQLTransaction &trans,
|
||||
const uint64_t tenant_id,
|
||||
const share::ObTenantSnapshotID &tenant_snapshot_id);
|
||||
static int add_restore_tenant_task(ObMySQLTransaction &trans,
|
||||
const share::ObTenantSnapItem &snap_item);
|
||||
static int add_clone_tenant_task(ObMySQLTransaction &trans,
|
||||
const uint64_t tenant_id,
|
||||
const share::ObTenantSnapshotID &tenant_snapshot_id);
|
||||
static int add_clone_tenant_task(ObMySQLTransaction &trans,
|
||||
const share::ObTenantSnapItem &snap_item);
|
||||
static int generate_tenant_snapshot_name(const uint64_t tenant_id,
|
||||
ObSqlString &tenant_snapshot_name,
|
||||
bool is_inner = false);
|
||||
|
@ -216,6 +216,20 @@ bool ObTenantCloneStatus::is_sys_release_resource_status() const
|
||||
return b_ret;
|
||||
}
|
||||
|
||||
bool ObTenantCloneStatus::is_sys_release_clone_resource_status() const
|
||||
{
|
||||
bool b_ret = false;
|
||||
|
||||
if (ObTenantCloneStatus::Status::CLONE_SYS_CREATE_INNER_RESOURCE_POOL_FAIL <= status_ &&
|
||||
ObTenantCloneStatus::Status::CLONE_SYS_RELEASE_RESOURCE_FAIL > status_) {
|
||||
// CLONE_SYS_RELEASE_RESOURCE means the clone_tenant has been created and restored successful.
|
||||
// thus, if the clone_job is in or is failed in this status, we just need to release the according snapshot.
|
||||
b_ret = true;
|
||||
}
|
||||
|
||||
return b_ret;
|
||||
}
|
||||
|
||||
ObCloneJob::ObCloneJob() :
|
||||
trace_id_(),
|
||||
tenant_id_(OB_INVALID_TENANT_ID),
|
||||
|
@ -93,6 +93,7 @@ public:
|
||||
bool is_sys_processing_status() const;
|
||||
bool is_sys_valid_snapshot_status_for_fork() const;
|
||||
bool is_sys_release_resource_status() const;
|
||||
bool is_sys_release_clone_resource_status() const;
|
||||
|
||||
TO_STRING_KV(K_(status));
|
||||
|
||||
|
@ -357,7 +357,7 @@ const char* ObTenantSnapshotTableOperator::TENANT_SNAP_STATUS_ARRAY[] =
|
||||
"CREATING",
|
||||
"DECIDED",
|
||||
"NORMAL",
|
||||
"RESTORING",
|
||||
"CLONING",
|
||||
"DELETING",
|
||||
"FAILED",
|
||||
};
|
||||
@ -366,7 +366,7 @@ const char* ObTenantSnapshotTableOperator::LS_SNAP_STATUS_ARRAY[] =
|
||||
{
|
||||
"CREATING",
|
||||
"NORMAL",
|
||||
"RESTORING",
|
||||
"CLONING",
|
||||
"FAILED",
|
||||
};
|
||||
|
||||
|
@ -30,7 +30,7 @@ enum class ObTenantSnapStatus : int64_t
|
||||
CREATING = 0,
|
||||
DECIDED,
|
||||
NORMAL,
|
||||
RESTORING,
|
||||
CLONING,
|
||||
DELETING,
|
||||
FAILED,
|
||||
MAX,
|
||||
@ -40,7 +40,7 @@ enum class ObLSSnapStatus : int64_t
|
||||
{
|
||||
CREATING = 0,
|
||||
NORMAL,
|
||||
RESTORING,
|
||||
CLONING,
|
||||
FAILED,
|
||||
MAX,
|
||||
};
|
||||
@ -233,7 +233,7 @@ private:
|
||||
ObTenantSnapType type_;
|
||||
int64_t create_time_;
|
||||
uint64_t data_version_;
|
||||
// when the status_ is RESTORING, the clone_job id will be owner_job_id of "global_lock"(snapshot_id == 0)
|
||||
// when the status_ is CLONING, the clone_job id will be owner_job_id of "global_lock"(snapshot_id == 0)
|
||||
// for the other status or the other snapshot, the owner_job_id always be OB_INVALID_ID
|
||||
int64_t owner_job_id_;
|
||||
};
|
||||
|
@ -79,9 +79,7 @@ int ObCloneTenantExecutor::wait_clone_tenant_finished_(ObExecContext &ctx,
|
||||
const int64_t abs_timeout = ObTimeUtility::current_time() + OB_MAX_USER_SPECIFIED_TIMEOUT; // 102 years
|
||||
THIS_WORKER.set_timeout_ts(abs_timeout);
|
||||
|
||||
if (OB_UNLIKELY(ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR)) {
|
||||
ret = ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR;
|
||||
} else if (OB_UNLIKELY(job_id < 0)) {
|
||||
if (OB_UNLIKELY(job_id < 0)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(job_id));
|
||||
} else if (OB_ISNULL(ctx.get_physical_plan_ctx())) {
|
||||
@ -100,16 +98,17 @@ int ObCloneTenantExecutor::wait_clone_tenant_finished_(ObExecContext &ctx,
|
||||
// the according record will be moved to __all_clone_job_history from __all_clone_job;
|
||||
// if the clone job is failed,
|
||||
// the according record will be set as failed status in __all_clone_job and
|
||||
// will be moved to __all_clone_job_history after user executes the "recycle" sql
|
||||
// will be moved to __all_clone_job_history after the related resource is recycled
|
||||
bool clone_over = false;
|
||||
while (OB_SUCC(ret) && !clone_over) {
|
||||
job.reset();
|
||||
ob_usleep(2 * 1000 * 1000L); // 2s
|
||||
ObTenantCloneTableOperator table_op;
|
||||
ObMySQLTransaction trans;
|
||||
bool exist_in_history = false;
|
||||
|
||||
if (THIS_WORKER.is_timeout()) {
|
||||
if (OB_UNLIKELY(ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR)) {
|
||||
ret = ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR;
|
||||
} else if (THIS_WORKER.is_timeout()) {
|
||||
ret = OB_TIMEOUT;
|
||||
LOG_WARN("wait clone tenant timeout", KR(ret), K(job_id));
|
||||
} else if (OB_FAIL(ctx.check_status())) {
|
||||
@ -119,36 +118,25 @@ int ObCloneTenantExecutor::wait_clone_tenant_finished_(ObExecContext &ctx,
|
||||
} else if (OB_FAIL(table_op.init(OB_SYS_TENANT_ID, &trans))) {
|
||||
LOG_WARN("failed to init table op", KR(ret));
|
||||
} else if (OB_FAIL(table_op.get_sys_clone_job_history(job_id, job))) {
|
||||
if (OB_ENTRY_NOT_EXIST == ret) {
|
||||
if (OB_ENTRY_NOT_EXIST == ret) { // clone job is running
|
||||
ret = OB_SUCCESS;
|
||||
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
if (OB_TMP_FAIL(ObTenantCloneUtil::notify_clone_scheduler(OB_SYS_TENANT_ID))) {
|
||||
LOG_WARN("notify clone scheduler failed", KR(tmp_ret));
|
||||
}
|
||||
} else {
|
||||
LOG_WARN("failed to get clone job history", KR(ret), K(job_id));
|
||||
}
|
||||
} else if (job.get_status().is_sys_success_status()) {
|
||||
clone_over = true;
|
||||
LOG_INFO("clone tenant successful", K(job));
|
||||
} else if (job.get_status().is_sys_failed_status()) {
|
||||
ret = OB_ERR_CLONE_TENANT;
|
||||
LOG_WARN("clone tenant failed", KR(ret), K(job));
|
||||
} else {
|
||||
exist_in_history = true;
|
||||
if (job.get_status().is_sys_success_status()) {
|
||||
clone_over = true;
|
||||
LOG_INFO("clone tenant successful", K(job));
|
||||
} else if (job.get_status().is_sys_failed_status()) {
|
||||
ret = OB_ERR_CLONE_TENANT;
|
||||
LOG_WARN("clone tenant failed", KR(ret), K(job));
|
||||
} else {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected status", KR(ret), K(job));
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) && !exist_in_history) {
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
if (OB_FAIL(table_op.get_clone_job_by_job_id(job_id, job))) {
|
||||
LOG_WARN("failed to get clone job", KR(ret), K(job));
|
||||
} else if (job.get_status().is_sys_failed_status()) {
|
||||
ret = OB_ERR_CLONE_TENANT;
|
||||
LOG_WARN("clone tenant failed", KR(ret), K(job));
|
||||
} else if (OB_TMP_FAIL(ObTenantCloneUtil::notify_clone_scheduler(OB_SYS_TENANT_ID))) {
|
||||
// clone job is running
|
||||
LOG_WARN("notify clone scheduler failed", KR(tmp_ret));
|
||||
}
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected status", KR(ret), K(job));
|
||||
}
|
||||
|
||||
if (OB_UNLIKELY(OB_TIMEOUT == ret)) {
|
||||
|
@ -298,7 +298,7 @@ int ObLSSnapshot::get_tablet_meta_entry(blocksstable::MacroBlockId &tablet_meta_
|
||||
|
||||
if (!meta_existed_) {
|
||||
ret = OB_STATE_NOT_MATCH;
|
||||
LOG_WARN("ObLSSnapshot's meta not exsited", KR(ret), KPC(this));
|
||||
LOG_WARN("ObLSSnapshot's meta not existed", KR(ret), KPC(this));
|
||||
} else if (OB_FAIL(meta_handler_->get_ls_snapshot(tenant_snapshot_id_,
|
||||
ls_id_,
|
||||
tablet_meta_entry))) {
|
||||
|
@ -83,7 +83,7 @@ bool ObLSSnapshotReportInfo::scn_range_is_valid(const ObTenantSnapItem &tenant_s
|
||||
int ret = OB_SUCCESS;
|
||||
bool bret = true;
|
||||
|
||||
if ((ObTenantSnapStatus::RESTORING == tenant_snap_item.get_status() ||
|
||||
if ((ObTenantSnapStatus::CLONING == tenant_snap_item.get_status() ||
|
||||
ObTenantSnapStatus::NORMAL == tenant_snap_item.get_status())) {
|
||||
if (begin_interval_scn_ < tenant_snap_item.get_clog_start_scn() ||
|
||||
end_interval_scn_ > tenant_snap_item.get_snapshot_scn()) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user