modify wait logic of clone executor and retry logic of clone scheduler in CLONE_SYS_RELEASE_RESOURCE

This commit is contained in:
wanyue-wy 2023-12-22 15:47:19 +00:00 committed by ob-robot
parent abef1a89a4
commit 240f690249
12 changed files with 137 additions and 96 deletions

View File

@ -28,6 +28,7 @@
#ifdef OB_BUILD_TDE_SECURITY
#include "share/ob_master_key_getter.h"
#endif
#include "lib/utility/ob_tracepoint.h"
namespace oceanbase
{
@ -261,6 +262,7 @@ int ObCloneScheduler::process_user_clone_job(const share::ObCloneJob &job)
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_LOCK_ERROR);
int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -273,7 +275,10 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
const ObTenantSnapshotUtil::TenantSnapshotOp op = ObTenantCloneJobType::RESTORE == job_type ?
ObTenantSnapshotUtil::RESTORE_OP :
ObTenantSnapshotUtil::FORK_OP;
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_LOCK_ERROR)) {
ret = ERRSIM_CLONE_LOCK_ERROR;
LOG_WARN("mock clone lock failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -306,7 +311,7 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
if (ObTenantSnapStatus::CREATING == original_global_state_status) {
ret = OB_SUCCESS;
need_wait = true;
LOG_INFO("need wait for current tenant restore operation", KR(ret), K(source_tenant_id));
LOG_INFO("need wait for current tenant snapshot creation", KR(ret), K(source_tenant_id));
} else {
LOG_WARN("GLOBAL_STATE snapshot lock conflict", KR(ret), K(source_tenant_id),
K(original_global_state_status));
@ -315,10 +320,10 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
} else if (OB_FAIL(ObTenantSnapshotUtil::check_tenant_has_no_conflict_tasks(source_tenant_id))) {
LOG_WARN("fail to check tenant has conflict tasks", KR(ret), K(source_tenant_id));
} else if (ObTenantCloneJobType::RESTORE == job_type &&
OB_FAIL(ObTenantSnapshotUtil::add_restore_tenant_task(trans, source_tenant_id,
snapshot_id))) {
// if job_type is FORK, the snapshot will be updated as RESTORE when it is created successful
LOG_WARN("failed to add restore tenant snapshot task", KR(ret), K(source_tenant_id), K(snapshot_id));
OB_FAIL(ObTenantSnapshotUtil::add_clone_tenant_task(trans, source_tenant_id,
snapshot_id))) {
// if job_type is FORK, the snapshot will be updated as CLONING when it is created successful
LOG_WARN("failed to add clone tenant snapshot task", KR(ret), K(source_tenant_id), K(snapshot_id));
}
if (trans.is_started()) {
int tmp_ret = OB_SUCCESS;
@ -342,10 +347,6 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
}
}
// if (FAILEDx(wait_source_relative_task_finished_(source_tenant_id))) {
// LOG_WARN("wait source relative task finished failed", KR(ret), KR(source_tenant_id));
// }
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
LOG_WARN("fail to update job status", KR(ret), KR(tmp_ret), K(job));
@ -354,6 +355,7 @@ int ObCloneScheduler::clone_lock(const share::ObCloneJob &job)
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_RESOURCE_POOL_ERROR);
int ObCloneScheduler::clone_create_resource_pool(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -362,7 +364,10 @@ int ObCloneScheduler::clone_create_resource_pool(const share::ObCloneJob &job)
uint64_t resource_pool_id = job.get_resource_pool_id();
const int64_t job_id = job.get_job_id();
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_RESOURCE_POOL_ERROR)) {
ret = ERRSIM_CLONE_RESOURCE_POOL_ERROR;
LOG_WARN("mock clone resource pool failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -402,6 +407,7 @@ int ObCloneScheduler::clone_create_resource_pool(const share::ObCloneJob &job)
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_CREATE_SNAPSHOT_ERROR);
int ObCloneScheduler::clone_create_snapshot_for_fork_tenant(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -416,7 +422,10 @@ int ObCloneScheduler::clone_create_snapshot_for_fork_tenant(const share::ObClone
ObSqlString snapshot_name;
ObTenantSnapshotID tenant_snapshot_id;
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_CREATE_SNAPSHOT_ERROR)) {
ret = ERRSIM_CLONE_CREATE_SNAPSHOT_ERROR;
LOG_WARN("mock clone create snapshot failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -498,6 +507,7 @@ int ObCloneScheduler::clone_create_snapshot_for_fork_tenant(const share::ObClone
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_WAIT_CREATE_SNAPSHOT_ERROR);
int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -510,7 +520,10 @@ int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::Ob
ObMySQLTransaction trans;
bool need_wait = false;
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_WAIT_CREATE_SNAPSHOT_ERROR)) {
ret = ERRSIM_CLONE_WAIT_CREATE_SNAPSHOT_ERROR;
LOG_WARN("mock clone wait create snapshot failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -534,14 +547,14 @@ int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::Ob
} else if (ObTenantSnapStatus::CREATING == item.get_status() ||
ObTenantSnapStatus::DECIDED == item.get_status()) {
need_wait = true;
} else if (ObTenantSnapStatus::RESTORING == item.get_status()) {
} else if (ObTenantSnapStatus::CLONING == item.get_status()) {
// no need to update snapshot status
} else if (ObTenantSnapStatus::NORMAL != item.get_status()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid status for fork tenant snapshot", KR(ret), K(source_tenant_id),
K(tenant_snapshot_id), K(item));
} else if (OB_FAIL(rootserver::ObTenantSnapshotUtil::add_restore_tenant_task(trans, item))) {
LOG_WARN("fail to update fork tenant snapshot to restoring", KR(ret), K(item));
} else if (OB_FAIL(rootserver::ObTenantSnapshotUtil::add_clone_tenant_task(trans, item))) {
LOG_WARN("fail to update fork tenant snapshot to cloning", KR(ret), K(item));
}
if (trans.is_started()) {
@ -572,6 +585,7 @@ int ObCloneScheduler::clone_wait_create_snapshot_for_fork_tenant(const share::Ob
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_CREATE_TENANT_ERROR);
int ObCloneScheduler::clone_create_tenant(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -583,7 +597,10 @@ int ObCloneScheduler::clone_create_tenant(const share::ObCloneJob &job)
const int64_t timeout = GCONF._ob_ddl_timeout;
ObTenantCloneTableOperator clone_op;
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_CREATE_TENANT_ERROR)) {
ret = ERRSIM_CLONE_CREATE_TENANT_ERROR;
LOG_WARN("mock clone create tenant failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -621,6 +638,7 @@ int ObCloneScheduler::clone_create_tenant(const share::ObCloneJob &job)
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_WAIT_CREATE_TENANT_ERROR);
int ObCloneScheduler::clone_wait_tenant_restore_finish(const ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -629,7 +647,11 @@ int ObCloneScheduler::clone_wait_tenant_restore_finish(const ObCloneJob &job)
const uint64_t clone_tenant_id = job.get_clone_tenant_id();
bool need_wait = false;
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_WAIT_CREATE_TENANT_ERROR)) {
ret = ERRSIM_CLONE_WAIT_CREATE_TENANT_ERROR;
need_wait = OB_EAGAIN == ret ? true : false;
LOG_WARN("mock clone wait create tenant failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -678,11 +700,12 @@ int ObCloneScheduler::clone_wait_tenant_restore_finish(const ObCloneJob &job)
if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
LOG_WARN("fail to update job status", KR(ret), KR(tmp_ret), K(job));
}
LOG_INFO("[RESTORE] clone wait tenant restore finish", KR(ret), K(job));
}
LOG_INFO("[RESTORE] clone wait tenant restore finish", KR(ret), K(job));
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_RELEASE_RESOURCE_ERROR);
int ObCloneScheduler::clone_release_resource(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -691,8 +714,13 @@ int ObCloneScheduler::clone_release_resource(const share::ObCloneJob &job)
const ObTenantSnapshotID snapshot_id = job.get_tenant_snapshot_id();
const ObTenantCloneJobType job_type = job.get_job_type();
int tmp_ret = OB_SUCCESS;
bool need_retry = false;
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_RELEASE_RESOURCE_ERROR)) {
ret = ERRSIM_CLONE_RELEASE_RESOURCE_ERROR;
need_retry = OB_EAGAIN == ret ? true : false;
LOG_WARN("mock clone release resource failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -708,16 +736,19 @@ int ObCloneScheduler::clone_release_resource(const share::ObCloneJob &job)
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(job));
} else if (OB_FAIL(ObTenantCloneUtil::release_source_tenant_resource_of_clone_job(*sql_proxy_, job))) {
need_retry = true;
LOG_WARN("failed to release source tenant resource", KR(ret), K(job));
}
if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
if (OB_FAIL(ret) && need_retry) {
} else if (OB_TMP_FAIL(try_update_job_status_(ret, job))) {
LOG_WARN("fail to update job status", KR(ret), KR(tmp_ret), K(job));
}
LOG_INFO("[RESTORE] clone_release_resource", KR(ret), K(job));
LOG_INFO("[RESTORE] clone_release_resource", KR(ret), K(need_retry), K(job));
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_CLONE_SYS_FINISH_ERROR);
int ObCloneScheduler::clone_sys_finish(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -727,7 +758,10 @@ int ObCloneScheduler::clone_sys_finish(const share::ObCloneJob &job)
const uint64_t source_tenant_id = job.get_source_tenant_id();
const ObTenantSnapshotID &snapshot_id = job.get_tenant_snapshot_id();
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_SYS_FINISH_ERROR)) {
ret = ERRSIM_CLONE_SYS_FINISH_ERROR;
LOG_WARN("mock clone sys finish failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -1116,6 +1150,7 @@ int ObCloneScheduler::clone_user_finish(const share::ObCloneJob &job)
// 1. for clone_tenant, gc the resource of resource_pool and clone_tenant
// 2. for source_tenant, release global_lock and tenant snapshot
// 3. for sys_tenant, finish the clone job
ERRSIM_POINT_DEF(ERRSIM_CLONE_RECYCLE_FAILED_JOB_ERROR);
int ObCloneScheduler::clone_recycle_failed_job(const share::ObCloneJob &job)
{
int ret = OB_SUCCESS;
@ -1123,7 +1158,10 @@ int ObCloneScheduler::clone_recycle_failed_job(const share::ObCloneJob &job)
const uint64_t source_tenant_id = job.get_source_tenant_id();
const ObTenantCloneStatus job_status = job.get_status();
if (IS_NOT_INIT) {
if (OB_UNLIKELY(ERRSIM_CLONE_RECYCLE_FAILED_JOB_ERROR)) {
ret = ERRSIM_CLONE_RECYCLE_FAILED_JOB_ERROR;
LOG_WARN("mock clone recycle failed job failed", KR(ret), K(job));
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", KR(ret));
} else if (has_set_stop()) {
@ -1139,10 +1177,8 @@ int ObCloneScheduler::clone_recycle_failed_job(const share::ObCloneJob &job)
} else if (!job_status.is_sys_failed_status()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("try to recycle a processing or successful job", KR(ret), K(job));
} else if (ObTenantCloneStatus::Status::CLONE_SYS_RELEASE_RESOURCE_FAIL != job_status &&
} else if (job_status.is_sys_release_clone_resource_status() &&
OB_FAIL(ObTenantCloneUtil::release_clone_tenant_resource_of_clone_job(job))) {
// CLONE_SYS_RELEASE_RESOURCE means the clone_tenant has been created and restored successful.
// thus, if the clone_job is failed in this status, we just need to release the according snapshot.
LOG_WARN("fail to release resource of clone tenant", KR(ret), K(job));
} else if (OB_FAIL(ObTenantCloneUtil::release_source_tenant_resource_of_clone_job(*sql_proxy_, job))) {
LOG_WARN("fail to release resource of source tenant", KR(ret), K(job));

View File

@ -421,14 +421,14 @@ int ObTenantCloneUtil::release_source_tenant_resource_of_clone_job(common::ObISQ
} else {
LOG_WARN("fail to get global_lock", KR(ret), K(clone_job));
}
} else if (ObTenantSnapStatus::RESTORING != global_lock.get_status()) {
} else if (ObTenantSnapStatus::CLONING != global_lock.get_status()) {
is_already_unlocked = true;
LOG_INFO("global lock has been released", KR(ret), K(clone_job));
} else if (OB_FAIL(ObTenantSnapshotUtil::unlock_tenant_snapshot_simulated_mutex_from_clone_release_task(
trans,
source_tenant_id,
job_id,
ObTenantSnapStatus::RESTORING,
ObTenantSnapStatus::CLONING,
is_already_unlocked))) {
LOG_WARN("fail to unlock", KR(ret), K(clone_job), K(global_lock));
}
@ -532,7 +532,7 @@ int ObTenantCloneUtil::cancel_clone_job(common::ObISQLClient &sql_client,
ObTenantCloneTableOperator clone_op;
ObCloneJob clone_job;
ObMySQLTransaction trans;
static const char *err_msg = "clone job has been canceled";
ObSqlString err_msg;
const ObTenantCloneStatus next_status(ObTenantCloneStatus::Status::CLONE_SYS_CANCELED);
if (OB_UNLIKELY(clone_tenant_name.empty())) {
@ -560,7 +560,9 @@ int ObTenantCloneUtil::cancel_clone_job(common::ObISQLClient &sql_client,
clone_job.get_status(), /*old_status*/
next_status))) {
LOG_WARN("fail to update job status", KR(ret), K(clone_tenant_name), K(clone_job));
} else if (OB_FAIL(clone_op.update_job_failed_info(clone_job.get_job_id(), OB_CANCELED, err_msg))) {
} else if (OB_FAIL(err_msg.append_fmt("clone job has been canceled in %s status",
ObTenantCloneStatus::get_clone_status_str(clone_job.get_status())))) {
} else if (OB_FAIL(clone_op.update_job_failed_info(clone_job.get_job_id(), OB_CANCELED, err_msg.string()))) {
LOG_WARN("fail to update job failed info", KR(ret), K(clone_job));
}

View File

@ -190,7 +190,7 @@ int ObTenantSnapshotScheduler::get_tenant_snapshot_jobs_(
uint64_t user_tenant_id = gen_user_tenant_id(MTL_ID());
ObArbitrationServiceStatus arbitration_service_status;
int64_t paxos_replica_num = OB_INVALID_COUNT;
int64_t restore_job_num = 0;
int64_t clone_job_num = 0;
if (OB_ISNULL(GCTX.schema_service_)) {
ret = OB_INVALID_ARGUMENT;
@ -242,8 +242,8 @@ int ObTenantSnapshotScheduler::get_tenant_snapshot_jobs_(
} else if (OB_FAIL(delete_jobs.push_back(delete_job))) {
LOG_WARN("push back failed", KR(ret), K(item), K(delete_job));
}
} else if (ObTenantSnapStatus::RESTORING == items.at(i).get_status()) {
restore_job_num++;
} else if (ObTenantSnapStatus::CLONING == items.at(i).get_status()) {
clone_job_num++;
} else if (ObTenantSnapStatus::FAILED == items.at(i).get_status()) {
// when a tenant snapshot is created failed,
// for the normal tenant snapshot, it will be setted as DELETING and be deleted directly;
@ -259,10 +259,10 @@ int ObTenantSnapshotScheduler::get_tenant_snapshot_jobs_(
if (OB_FAIL(ret)) {
} else if ((create_jobs.count() > 1)
|| (create_jobs.count() + restore_job_num > 1)) {
|| (create_jobs.count() + clone_job_num > 1)) {
//only one creation job/restoration job can exist at a time, num > 1 is illegal!
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("unexpected tenant snapshot count", KR(ret), K(create_jobs), K(restore_job_num));
LOG_ERROR("unexpected tenant snapshot count", KR(ret), K(create_jobs), K(clone_job_num));
}
return ret;
@ -1028,8 +1028,8 @@ int ObTenantSnapshotScheduler::finish_create_tenant_snapshot_(
true /*for update*/,
global_lock))) {
LOG_WARN("failed to get special tenant snapshot item", KR(ret), K(user_tenant_id));
} else if (ObTenantSnapStatus::RESTORING == global_lock.get_status()) {
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as RESTORING at beginning.
} else if (ObTenantSnapStatus::CLONING == global_lock.get_status()) {
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as CLONING at beginning.
// in this case, the global_lock should be unlocked after cloning tenant is finished
} else if (OB_FAIL(ObTenantSnapshotUtil::unlock_tenant_snapshot_simulated_mutex_from_snapshot_task(
trans,
@ -1077,8 +1077,8 @@ int ObTenantSnapshotScheduler::create_tenant_snapshot_fail_(const ObCreateSnapsh
global_lock))) {
LOG_WARN("failed to get special tenant snapshot item", KR(ret), K(user_tenant_id));
} else {
if (ObTenantSnapStatus::RESTORING == global_lock.get_status()) {
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as RESTORING at beginning.
if (ObTenantSnapStatus::CLONING == global_lock.get_status()) {
// For fork tenant (a job type of tenant cloning), the status of global_lock is set as CLONING at beginning.
// in this case, when creating snapshot failed,
// the snapshot and global_lock should only be released by clone job
if (OB_FAIL(table_op.update_tenant_snap_item(tenant_snapshot_id,

View File

@ -672,10 +672,10 @@ int ObTenantSnapshotUtil::lock_(ObMySQLTransaction &trans,
if (CREATE_OP == op) {
new_status = ObTenantSnapStatus::CREATING;
} else { // FORK_OP == op
new_status = ObTenantSnapStatus::RESTORING;
new_status = ObTenantSnapStatus::CLONING;
}
} else if (RESTORE_OP == op) {
new_status = ObTenantSnapStatus::RESTORING;
new_status = ObTenantSnapStatus::CLONING;
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected tenant snapshot operation", KR(ret), K(tenant_id), K(op));
@ -835,9 +835,9 @@ int ObTenantSnapshotUtil::get_tenant_snapshot_info(common::ObISQLClient &sql_cli
return ret;
}
int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
const uint64_t tenant_id,
const share::ObTenantSnapshotID &tenant_snapshot_id)
int ObTenantSnapshotUtil::add_clone_tenant_task(ObMySQLTransaction &trans,
const uint64_t tenant_id,
const share::ObTenantSnapshotID &tenant_snapshot_id)
{
int ret = OB_SUCCESS;
ObTenantSnapshotTableOperator table_op;
@ -853,18 +853,18 @@ int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
} else if (ObTenantSnapStatus::NORMAL != snap_item.get_status()) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("not allowed for current snapshot operation", KR(ret), K(tenant_id), K(snap_item.get_status()));
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, restore tenant");
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, clone tenant");
} else if (OB_FAIL(table_op.update_tenant_snap_item(tenant_snapshot_id,
ObTenantSnapStatus::NORMAL, /*old_status*/
ObTenantSnapStatus::RESTORING /*new_status*/))) {
ObTenantSnapStatus::CLONING /*new_status*/))) {
LOG_WARN("update tenant snapshot status failed", KR(ret), K(tenant_id), K(tenant_snapshot_id));
}
return ret;
}
int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
const ObTenantSnapItem &snap_item)
int ObTenantSnapshotUtil::add_clone_tenant_task(ObMySQLTransaction &trans,
const ObTenantSnapItem &snap_item)
{
int ret = OB_SUCCESS;
ObTenantSnapshotTableOperator table_op;
@ -875,12 +875,12 @@ int ObTenantSnapshotUtil::add_restore_tenant_task(ObMySQLTransaction &trans,
} else if (ObTenantSnapStatus::NORMAL != snap_item.get_status()) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("not allowed for current snapshot operation", KR(ret), K(snap_item));
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, restore tenant");
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "there may be other operation on the same tenant snapshot, clone tenant");
} else if (OB_FAIL(table_op.init(snap_item.get_tenant_id(), &trans))) {
LOG_WARN("failed to init table op", KR(ret), K(snap_item));
} else if (OB_FAIL(table_op.update_tenant_snap_item(snap_item.get_tenant_snapshot_id(),
ObTenantSnapStatus::NORMAL, /*old_status*/
ObTenantSnapStatus::RESTORING /*new_status*/))) {
ObTenantSnapStatus::CLONING /*new_status*/))) {
LOG_WARN("update tenant snapshot status failed", KR(ret), K(snap_item));
}

View File

@ -82,8 +82,8 @@ public:
{
CREATE_OP = 0,
DROP_OP = 1,
RESTORE_OP = 2,
FORK_OP = 3,
RESTORE_OP = 2, // a type of clone job
FORK_OP = 3, // a type of clone job
MAX,
};
public:
@ -137,11 +137,11 @@ public:
const uint64_t source_tenant_id,
const share::ObTenantSnapshotID &snapshot_id,
share::ObTenantSnapItem &item);
static int add_restore_tenant_task(ObMySQLTransaction &trans,
const uint64_t tenant_id,
const share::ObTenantSnapshotID &tenant_snapshot_id);
static int add_restore_tenant_task(ObMySQLTransaction &trans,
const share::ObTenantSnapItem &snap_item);
static int add_clone_tenant_task(ObMySQLTransaction &trans,
const uint64_t tenant_id,
const share::ObTenantSnapshotID &tenant_snapshot_id);
static int add_clone_tenant_task(ObMySQLTransaction &trans,
const share::ObTenantSnapItem &snap_item);
static int generate_tenant_snapshot_name(const uint64_t tenant_id,
ObSqlString &tenant_snapshot_name,
bool is_inner = false);

View File

@ -216,6 +216,20 @@ bool ObTenantCloneStatus::is_sys_release_resource_status() const
return b_ret;
}
bool ObTenantCloneStatus::is_sys_release_clone_resource_status() const
{
bool b_ret = false;
if (ObTenantCloneStatus::Status::CLONE_SYS_CREATE_INNER_RESOURCE_POOL_FAIL <= status_ &&
ObTenantCloneStatus::Status::CLONE_SYS_RELEASE_RESOURCE_FAIL > status_) {
// CLONE_SYS_RELEASE_RESOURCE means the clone_tenant has been created and restored successful.
// thus, if the clone_job is in or is failed in this status, we just need to release the according snapshot.
b_ret = true;
}
return b_ret;
}
ObCloneJob::ObCloneJob() :
trace_id_(),
tenant_id_(OB_INVALID_TENANT_ID),

View File

@ -93,6 +93,7 @@ public:
bool is_sys_processing_status() const;
bool is_sys_valid_snapshot_status_for_fork() const;
bool is_sys_release_resource_status() const;
bool is_sys_release_clone_resource_status() const;
TO_STRING_KV(K_(status));

View File

@ -357,7 +357,7 @@ const char* ObTenantSnapshotTableOperator::TENANT_SNAP_STATUS_ARRAY[] =
"CREATING",
"DECIDED",
"NORMAL",
"RESTORING",
"CLONING",
"DELETING",
"FAILED",
};
@ -366,7 +366,7 @@ const char* ObTenantSnapshotTableOperator::LS_SNAP_STATUS_ARRAY[] =
{
"CREATING",
"NORMAL",
"RESTORING",
"CLONING",
"FAILED",
};

View File

@ -30,7 +30,7 @@ enum class ObTenantSnapStatus : int64_t
CREATING = 0,
DECIDED,
NORMAL,
RESTORING,
CLONING,
DELETING,
FAILED,
MAX,
@ -40,7 +40,7 @@ enum class ObLSSnapStatus : int64_t
{
CREATING = 0,
NORMAL,
RESTORING,
CLONING,
FAILED,
MAX,
};
@ -233,7 +233,7 @@ private:
ObTenantSnapType type_;
int64_t create_time_;
uint64_t data_version_;
// when the status_ is RESTORING, the clone_job id will be owner_job_id of "global_lock"(snapshot_id == 0)
// when the status_ is CLONING, the clone_job id will be owner_job_id of "global_lock"(snapshot_id == 0)
// for the other status or the other snapshot, the owner_job_id always be OB_INVALID_ID
int64_t owner_job_id_;
};

View File

@ -79,9 +79,7 @@ int ObCloneTenantExecutor::wait_clone_tenant_finished_(ObExecContext &ctx,
const int64_t abs_timeout = ObTimeUtility::current_time() + OB_MAX_USER_SPECIFIED_TIMEOUT; // 102 years
THIS_WORKER.set_timeout_ts(abs_timeout);
if (OB_UNLIKELY(ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR)) {
ret = ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR;
} else if (OB_UNLIKELY(job_id < 0)) {
if (OB_UNLIKELY(job_id < 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(job_id));
} else if (OB_ISNULL(ctx.get_physical_plan_ctx())) {
@ -100,16 +98,17 @@ int ObCloneTenantExecutor::wait_clone_tenant_finished_(ObExecContext &ctx,
// the according record will be moved to __all_clone_job_history from __all_clone_job;
// if the clone job is failed,
// the according record will be set as failed status in __all_clone_job and
// will be moved to __all_clone_job_history after user executes the "recycle" sql
// will be moved to __all_clone_job_history after the related resource is recycled
bool clone_over = false;
while (OB_SUCC(ret) && !clone_over) {
job.reset();
ob_usleep(2 * 1000 * 1000L); // 2s
ObTenantCloneTableOperator table_op;
ObMySQLTransaction trans;
bool exist_in_history = false;
if (THIS_WORKER.is_timeout()) {
if (OB_UNLIKELY(ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR)) {
ret = ERRSIM_WAIT_CLONE_TENANT_FINISHED_ERROR;
} else if (THIS_WORKER.is_timeout()) {
ret = OB_TIMEOUT;
LOG_WARN("wait clone tenant timeout", KR(ret), K(job_id));
} else if (OB_FAIL(ctx.check_status())) {
@ -119,36 +118,25 @@ int ObCloneTenantExecutor::wait_clone_tenant_finished_(ObExecContext &ctx,
} else if (OB_FAIL(table_op.init(OB_SYS_TENANT_ID, &trans))) {
LOG_WARN("failed to init table op", KR(ret));
} else if (OB_FAIL(table_op.get_sys_clone_job_history(job_id, job))) {
if (OB_ENTRY_NOT_EXIST == ret) {
if (OB_ENTRY_NOT_EXIST == ret) { // clone job is running
ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(ObTenantCloneUtil::notify_clone_scheduler(OB_SYS_TENANT_ID))) {
LOG_WARN("notify clone scheduler failed", KR(tmp_ret));
}
} else {
LOG_WARN("failed to get clone job history", KR(ret), K(job_id));
}
} else if (job.get_status().is_sys_success_status()) {
clone_over = true;
LOG_INFO("clone tenant successful", K(job));
} else if (job.get_status().is_sys_failed_status()) {
ret = OB_ERR_CLONE_TENANT;
LOG_WARN("clone tenant failed", KR(ret), K(job));
} else {
exist_in_history = true;
if (job.get_status().is_sys_success_status()) {
clone_over = true;
LOG_INFO("clone tenant successful", K(job));
} else if (job.get_status().is_sys_failed_status()) {
ret = OB_ERR_CLONE_TENANT;
LOG_WARN("clone tenant failed", KR(ret), K(job));
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status", KR(ret), K(job));
}
}
if (OB_SUCC(ret) && !exist_in_history) {
int tmp_ret = OB_SUCCESS;
if (OB_FAIL(table_op.get_clone_job_by_job_id(job_id, job))) {
LOG_WARN("failed to get clone job", KR(ret), K(job));
} else if (job.get_status().is_sys_failed_status()) {
ret = OB_ERR_CLONE_TENANT;
LOG_WARN("clone tenant failed", KR(ret), K(job));
} else if (OB_TMP_FAIL(ObTenantCloneUtil::notify_clone_scheduler(OB_SYS_TENANT_ID))) {
// clone job is running
LOG_WARN("notify clone scheduler failed", KR(tmp_ret));
}
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status", KR(ret), K(job));
}
if (OB_UNLIKELY(OB_TIMEOUT == ret)) {

View File

@ -298,7 +298,7 @@ int ObLSSnapshot::get_tablet_meta_entry(blocksstable::MacroBlockId &tablet_meta_
if (!meta_existed_) {
ret = OB_STATE_NOT_MATCH;
LOG_WARN("ObLSSnapshot's meta not exsited", KR(ret), KPC(this));
LOG_WARN("ObLSSnapshot's meta not existed", KR(ret), KPC(this));
} else if (OB_FAIL(meta_handler_->get_ls_snapshot(tenant_snapshot_id_,
ls_id_,
tablet_meta_entry))) {

View File

@ -83,7 +83,7 @@ bool ObLSSnapshotReportInfo::scn_range_is_valid(const ObTenantSnapItem &tenant_s
int ret = OB_SUCCESS;
bool bret = true;
if ((ObTenantSnapStatus::RESTORING == tenant_snap_item.get_status() ||
if ((ObTenantSnapStatus::CLONING == tenant_snap_item.get_status() ||
ObTenantSnapStatus::NORMAL == tenant_snap_item.get_status())) {
if (begin_interval_scn_ < tenant_snap_item.get_clog_start_scn() ||
end_interval_scn_ > tenant_snap_item.get_snapshot_scn()) {