Fix the unit deletion process when unit still has tenant snapshots

This commit is contained in:
obdev 2024-02-08 02:02:30 +00:00 committed by ob-robot
parent 6f58aeece7
commit e5c95cc904
12 changed files with 336 additions and 105 deletions

View File

@ -216,6 +216,9 @@ int ObBaseLogWriter::append_log(ObIBaseLogItem &log_item, const uint64_t timeout
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_STDERR("The ObBaseLogWriter has not been inited.\n");
} else if (has_stopped_) {
ret = OB_NOT_RUNNING;
LOG_STDERR("The ObBaseLogWriter is not running.\n");
} else {
int64_t abs_time = ObTimeUtility::current_time() + timeout_us;
while (OB_SUCC(ret)) {

View File

@ -566,7 +566,7 @@ int ObMultiTenant::init(ObAddr myaddr,
MTL_BIND2(mtl_new_default, ObOptStatMonitorManager::mtl_init, ObOptStatMonitorManager::mtl_start, ObOptStatMonitorManager::mtl_stop, ObOptStatMonitorManager::mtl_wait, mtl_destroy_default);
MTL_BIND2(mtl_new_default, ObTenantSrs::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default);
MTL_BIND2(mtl_new_default, table::ObTableApiSessPoolMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default);
MTL_BIND2(mtl_new_default, ObTenantSnapshotService::mtl_init, mtl_start_default, mtl_stop_default, nullptr, mtl_destroy_default);
MTL_BIND2(mtl_new_default, ObTenantSnapshotService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default);
MTL_BIND2(mtl_new_default, ObIndexUsageInfoMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default);
MTL_BIND2(mtl_new_default, storage::ObTabletMemtableMgrPool::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default);
}

View File

@ -513,6 +513,28 @@ int ObTenantNodeBalancer::fetch_effective_tenants(const TenantUnits &old_tenants
// check ls service safe to destroy.
is_released = MTL(ObLSService *)->safe_to_destroy();
}
bool is_tenant_snapshot_released = false;
if (is_user_tenant(tenant_config.tenant_id_)) {
MTL(ObTenantSnapshotService*)->notify_unit_is_deleting();
if (OB_FAIL(MTL(ObTenantSnapshotService*)->
check_all_tenant_snapshot_released(is_tenant_snapshot_released))) {
LOG_WARN("fail to check_all_tenant_snapshot_released", K(ret), K(tenant_config));
} else if (!is_tenant_snapshot_released) {
// can not release now. dump some debug info
const uint64_t interval = 180 * 1000 * 1000; // 180s
if (!is_tenant_snapshot_released && REACH_TIME_INTERVAL(interval)) {
MTL(ObTenantSnapshotService*)->dump_all_tenant_snapshot_info();
}
LOG_INFO("[DELETE_TENANT] tenant has been dropped, tenant snapshot is still waiting for gc",
K(tenant_config));
}
if (OB_SUCC(ret)) {
is_released = is_released && is_tenant_snapshot_released;
} else {
is_released = false;
}
}
}
if (OB_SUCC(ret)) {

View File

@ -301,7 +301,11 @@ void ObTenantCloneService::handle_copy_all_tablet_meta_(const ObCloneJob& job,
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
uint64_t source_tenant_id = OB_INVALID_TENANT_ID;
ObTenantSnapshotID tenant_snapshot_id;
ObLSID ls_id;
blocksstable::MacroBlockId tablet_meta_entry;
bool has_inc_clone_ref = false;
@ -315,45 +319,48 @@ void ObTenantCloneService::handle_copy_all_tablet_meta_(const ObCloneJob& job,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls tablet count is not zero", KR(ret), KPC(ls), KPC(ls->get_tablet_svr()));
} else {
source_tenant_id = job.get_source_tenant_id();
tenant_snapshot_id = job.get_tenant_snapshot_id();
ls_id = ls->get_ls_id();
}
if (OB_SUCC(ret)) {
MAKE_TENANT_SWITCH_SCOPE_GUARD(guard);
if (OB_FAIL(guard.switch_to(job.get_source_tenant_id(), false))) {
LOG_WARN("fail to switch to tenant", KR(ret), K(job.get_source_tenant_id()));
} else if (OB_FAIL(MTL(ObTenantSnapshotService*)->start_clone(job.get_tenant_snapshot_id(),
if (OB_FAIL(guard.switch_to(source_tenant_id, false))) {
LOG_WARN("fail to switch to tenant",
KR(ret), K(source_tenant_id), K(tenant_snapshot_id), K(ls_id));
} else if (OB_FAIL(MTL(ObTenantSnapshotService*)->start_clone(tenant_snapshot_id,
ls_id,
tablet_meta_entry))) {
LOG_WARN("fail to start_clone",
KR(ret), K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id));
LOG_WARN("fail to start_clone", KR(ret), K(source_tenant_id), K(tenant_snapshot_id), K(ls_id));
} else {
has_inc_clone_ref = true;
FLOG_INFO("inc snapshot clone ref succ",
K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id), K(tablet_meta_entry));
K(source_tenant_id), K(tenant_snapshot_id), K(ls_id));
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(meta_handler_->create_all_tablet(&startup_accel_handler_, tablet_meta_entry))) {
LOG_WARN("fail to create_all_tablet",
KR(ret), K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id), K(tablet_meta_entry));
KR(ret), K(source_tenant_id), K(tenant_snapshot_id), K(ls_id), K(tablet_meta_entry));
} else {
FLOG_INFO("create_all_tablet succ",
K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id), K(tablet_meta_entry));
K(source_tenant_id), K(tenant_snapshot_id), K(ls_id), K(tablet_meta_entry));
}
}
if (has_inc_clone_ref) {
MAKE_TENANT_SWITCH_SCOPE_GUARD(guard);
if (OB_TMP_FAIL(guard.switch_to(job.get_source_tenant_id(), false))) {
LOG_ERROR("fail to switch to tenant", KR(ret), K(job.get_source_tenant_id()));
LOG_WARN("fail to switch to tenant",
KR(ret), K(source_tenant_id), K(tenant_snapshot_id), K(ls_id), K(tablet_meta_entry));
} else if (OB_TMP_FAIL(MTL(ObTenantSnapshotService*)->end_clone(job.get_tenant_snapshot_id()))) {
LOG_ERROR("fail to end_clone",
KR(ret), K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id), K(tablet_meta_entry));
LOG_WARN("fail to end_clone",
KR(ret), K(source_tenant_id), K(tenant_snapshot_id), K(ls_id), K(tablet_meta_entry));
} else {
FLOG_INFO("dec snapshot clone ref succ",
K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id), K(tablet_meta_entry));
K(source_tenant_id), K(tenant_snapshot_id), K(ls_id), K(tablet_meta_entry));
}
}
@ -361,10 +368,10 @@ void ObTenantCloneService::handle_copy_all_tablet_meta_(const ObCloneJob& job,
next_status = ObLSRestoreStatus::Status::CLONE_COPY_LS_META;
next_loop = true;
FLOG_INFO("handle_copy_all_tablet_meta_ succ",
K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id), K(tablet_meta_entry));
K(source_tenant_id), K(tenant_snapshot_id), K(ls_id), K(tablet_meta_entry));
} else if (OB_EAGAIN == ret) {
FLOG_INFO("handle_copy_all_tablet_meta_ eagain",
K(job.get_source_tenant_id()), K(job.get_tenant_snapshot_id()), K(ls_id), K(tablet_meta_entry));
K(source_tenant_id), K(tenant_snapshot_id), K(ls_id), K(tablet_meta_entry));
} else {
next_status = ObLSRestoreStatus::Status::CLONE_FAILED;
next_loop = false;

View File

@ -130,12 +130,12 @@ int ObTenantSnapshot::try_start_create_tenant_snapshot_dag(ObArray<ObLSID>& crea
return ret;
}
int ObTenantSnapshot::try_start_gc_tenant_snapshot_dag(bool &gc_all_tenant_snapshot,
int ObTenantSnapshot::try_start_gc_tenant_snapshot_dag(const bool tenant_has_been_dropped,
bool &gc_tenant_snapshot,
ObArray<ObLSID> &gc_ls_id_arr,
common::ObCurTraceId::TraceId& trace_id)
{
int ret = OB_SUCCESS;
bool need_gc = false;
ObTenantSnapshotSvrInfo svr_info;
lib::ObMutexGuard snapshot_guard(mutex_);
@ -143,8 +143,9 @@ int ObTenantSnapshot::try_start_gc_tenant_snapshot_dag(bool &gc_all_tenant_snaps
// 1. no entry for this snapshot in __all_tenant_snapshot(but we see it in local storage);
// 2. snapshot status in __all_tenant_snapshot is DELETING.
// or gc ls_snapshot only
gc_all_tenant_snapshot = false;
gc_tenant_snapshot = false;
gc_ls_id_arr.reset();
trace_id.reset();
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObTenantSnapshot is not init", KR(ret));
@ -155,37 +156,43 @@ int ObTenantSnapshot::try_start_gc_tenant_snapshot_dag(bool &gc_all_tenant_snaps
} else if (has_unfinished_dag_()) {
ret = OB_EAGAIN;
LOG_INFO("ObTenantSnapshot has unfinished dag", KR(ret), KPC(this));
} else if (tenant_has_been_dropped) {
gc_tenant_snapshot = true;
FLOG_INFO("tenant has been dropped, need gc", KPC(this));
} else if (OB_FAIL(ObTenantSnapshotMetaTable::acquire_tenant_snapshot_svr_info(tenant_snapshot_id_,
svr_info))) {
if (OB_TENANT_SNAPSHOT_NOT_EXIST == ret) {
ret = OB_SUCCESS;
need_gc = true;
gc_tenant_snapshot = true;
LOG_INFO("tenant snapshot not exist, need gc", K(tenant_snapshot_id_));
}
} else if (ObTenantSnapStatus::DELETING == svr_info.get_tenant_snap_item().get_status()) {
need_gc = true;
gc_tenant_snapshot = true;
LOG_INFO("tenant snapshot status is DELETING, need gc", K(tenant_snapshot_id_));
}
if (OB_SUCC(ret)) {
if (need_gc) { // gc tenant snapshot (with corresponding ls_snapshot)
gc_all_tenant_snapshot = true;
if (gc_tenant_snapshot) { // gc tenant snapshot (with corresponding ls_snapshot)
is_running_ = false;
} else if (OB_FAIL(get_need_gc_ls_snapshot_arr_(svr_info.get_ls_snap_item_arr(),
gc_ls_id_arr))) {
LOG_WARN("fail to get_need_gc_ls_snapshot_arr_", KR(ret), K(svr_info));
} else if (gc_ls_id_arr.count() > 0) {
need_gc = true;
gc_all_tenant_snapshot = false; // gc ls_snapshot only (maybe cause by ls transfer)
LOG_INFO("ls snapshot need gc", KR(ret), K(gc_ls_id_arr));
} else {
if (OB_FAIL(get_need_gc_ls_snapshot_arr_(svr_info.get_ls_snap_item_arr(),
gc_ls_id_arr))) {
LOG_WARN("fail to get_need_gc_ls_snapshot_arr_", KR(ret), K(svr_info));
} else if (gc_ls_id_arr.count() > 0) {
LOG_INFO("ls snapshot need gc", K(gc_ls_id_arr));
}
}
}
if (OB_SUCC(ret)) {
if (need_gc) {
ObTenantSnapshotMetaTable::acquire_tenant_snapshot_trace_id(tenant_snapshot_id_,
ObTenantSnapOperation::DELETE,
trace_id);
if (gc_tenant_snapshot || gc_ls_id_arr.count() > 0) {
if (!tenant_has_been_dropped) {
ObTenantSnapshotMetaTable::acquire_tenant_snapshot_trace_id(tenant_snapshot_id_,
ObTenantSnapOperation::DELETE,
trace_id);
} else {
trace_id.init(GCTX.self_addr());
}
gc_dag_start_();
} else {
ret = OB_NO_NEED_UPDATE;
@ -196,7 +203,7 @@ int ObTenantSnapshot::try_start_gc_tenant_snapshot_dag(bool &gc_all_tenant_snaps
return ret;
}
int ObTenantSnapshot::execute_gc_tenant_snapshot_dag(const bool gc_all_tenant_snapshot, const ObArray<ObLSID> &gc_ls_id_arr)
int ObTenantSnapshot::execute_gc_tenant_snapshot_dag(const bool gc_tenant_snapshot, const ObArray<ObLSID> &gc_ls_id_arr)
{
int ret = OB_SUCCESS;
{
@ -214,7 +221,7 @@ int ObTenantSnapshot::execute_gc_tenant_snapshot_dag(const bool gc_all_tenant_sn
}
}
if (OB_SUCC(ret)) {
if (gc_all_tenant_snapshot) {
if (gc_tenant_snapshot) {
LOG_INFO("gc_tenant_snapshot_ with ls_snapshot", K(tenant_snapshot_id_));
if (OB_FAIL(gc_tenant_snapshot_())) {
LOG_WARN("fail to gc_tenant_snapshot_", KR(ret), KPC(this));

View File

@ -30,7 +30,7 @@ class ObLSSnapshot;
class ObTenantMetaSnapshotHandler;
class ObTenantSnapshotVTInfo;
typedef common::LinkHashValue<ObTenantSnapshotID> ObTenantSnapshotValue;
typedef common::LinkHashValue<share::ObTenantSnapshotID> ObTenantSnapshotValue;
class ObTenantSnapshot : public ObTenantSnapshotValue
{
public:
@ -49,7 +49,7 @@ public:
~ObTenantSnapshot() {}
int destroy();
int init(const ObTenantSnapshotID& tenant_snapshot_id,
int init(const share::ObTenantSnapshotID& tenant_snapshot_id,
ObLSSnapshotMgr* ls_snapshot_manager,
ObTenantMetaSnapshotHandler* meta_handler);
@ -68,16 +68,18 @@ public:
public:
int is_valid() const { return tenant_snapshot_id_.is_valid(); }
ObTenantSnapshotID get_tenant_snapshot_id() const { return tenant_snapshot_id_; }
share::ObTenantSnapshotID get_tenant_snapshot_id() const { return tenant_snapshot_id_; }
int load();
int try_start_create_tenant_snapshot_dag(ObArray<ObLSID>& creating_ls_id_arr,
int try_start_create_tenant_snapshot_dag(common::ObArray<share::ObLSID>& creating_ls_id_arr,
common::ObCurTraceId::TraceId& trace_id);
int try_start_gc_tenant_snapshot_dag(bool &gc_all_tenant_snapshot,
ObArray<ObLSID> &gc_ls_id_arr,
int try_start_gc_tenant_snapshot_dag(const bool tenant_has_been_dropped,
bool &gc_tenant_snapshot,
common::ObArray<share::ObLSID> &gc_ls_id_arr,
common::ObCurTraceId::TraceId& trace_id);
int execute_create_tenant_snapshot_dag(const ObArray<ObLSID> &creating_ls_id_arr);
int execute_gc_tenant_snapshot_dag(const bool gc_all_tenant_snapshot, const ObArray<ObLSID> &gc_ls_id_arr);
int execute_create_tenant_snapshot_dag(const common::ObArray<share::ObLSID> &creating_ls_id_arr);
int execute_gc_tenant_snapshot_dag(const bool gc_tenant_snapshot, const common::ObArray<share::ObLSID> &gc_ls_id_arr);
int finish_create_tenant_snapshot_dag();
int finish_gc_tenant_snapshot_dag();
@ -85,7 +87,7 @@ public:
bool is_stopped();
int get_tenant_snapshot_vt_info(ObTenantSnapshotVTInfo &info);
int get_ls_snapshot_tablet_meta_entry(const ObLSID &ls_id,
int get_ls_snapshot_tablet_meta_entry(const share::ObLSID &ls_id,
blocksstable::MacroBlockId &tablet_meta_entry);
int inc_clone_ref();
@ -104,26 +106,26 @@ private:
template<class Fn> class ForEachFilterFunctor
{
public:
explicit ForEachFilterFunctor(const ObTenantSnapshotID &tenant_snapshot_id, Fn& fn)
explicit ForEachFilterFunctor(const share::ObTenantSnapshotID &tenant_snapshot_id, Fn& fn)
: tenant_snapshot_id_(tenant_snapshot_id), fn_(fn) {}
~ForEachFilterFunctor() {}
bool operator()(const ObLSSnapshotMapKey &snapshot_key, ObLSSnapshot* ls_snapshot);
private:
const ObTenantSnapshotID tenant_snapshot_id_;
const share::ObTenantSnapshotID tenant_snapshot_id_;
Fn &fn_;
};
template<class Fn> class RemoveIfFilterFunctor
{
public:
explicit RemoveIfFilterFunctor(const ObTenantSnapshotID &tenant_snapshot_id, Fn& fn)
explicit RemoveIfFilterFunctor(const share::ObTenantSnapshotID &tenant_snapshot_id, Fn& fn)
: tenant_snapshot_id_(tenant_snapshot_id), fn_(fn) {}
~RemoveIfFilterFunctor() {}
bool operator()(const ObLSSnapshotMapKey &snapshot_key, ObLSSnapshot* ls_snapshot);
private:
const ObTenantSnapshotID tenant_snapshot_id_;
const share::ObTenantSnapshotID tenant_snapshot_id_;
Fn &fn_;
};
template <typename Fn> int for_each_(Fn &fn);
@ -136,29 +138,28 @@ private:
int create_dag_finish_();
int gc_dag_start_();
int gc_dag_finish_();
void build_all_snapshots_(const ObArray<ObLSID>& creating_ls_id_arr);
void build_all_snapshots_(const common::ObArray<share::ObLSID>& creating_ls_id_arr);
int build_tenant_snapshot_meta_();
void build_all_ls_snapshots_(const ObArray<ObLSID>& creating_ls_id_arr);
int build_one_ls_snapshot_(const ObLSID& creating_ls_id);
void build_all_ls_snapshots_(const common::ObArray<share::ObLSID>& creating_ls_id_arr);
int build_one_ls_snapshot_(const share::ObLSID& creating_ls_id);
int build_one_ls_snapshot_meta_(ObLSSnapshot* ls_snapshot);
void report_one_ls_snapshot_build_rlt_(ObLSSnapshot* ls_snapshot, const int ls_ret);
int report_create_ls_snapshot_succ_rlt_(ObLSSnapshot* ls_snapshot);
int report_create_ls_snapshot_fail_rlt_(const ObLSID& ls_id);
int report_create_ls_snapshot_fail_rlt_(const share::ObLSID& ls_id);
bool has_unfinished_dag_() { return has_unfinished_create_dag_ || has_unfinished_gc_dag_; }
int gc_tenant_snapshot_();
void notify_ls_snapshots_tenant_gc_();
int gc_ls_snapshots_(const ObArray<ObLSID> &gc_ls_id_arr);
int gc_ls_snapshots_(const common::ObArray<share::ObLSID> &gc_ls_id_arr);
int destroy_all_ls_snapshots_();
int get_need_gc_ls_snapshot_arr_(
const ObArray<ObTenantSnapLSReplicaSimpleItem>& item_arr,
ObArray<ObLSID>& gc_ls_id_arr);
const common::ObArray<share::ObTenantSnapLSReplicaSimpleItem>& item_arr,
common::ObArray<share::ObLSID>& gc_ls_id_arr);
private:
bool is_inited_;
bool is_running_;
ObTenantSnapshotID tenant_snapshot_id_;
share::ObTenantSnapshotID tenant_snapshot_id_;
bool has_unfinished_create_dag_;
bool has_unfinished_gc_dag_;

View File

@ -293,5 +293,20 @@ int ObTenantSnapshotMgr::has_tenant_snapshot_stopped(bool& has_tenant_snapshot_s
return ret;
}
int ObTenantSnapshotMgr::get_tenant_snapshot_cnt(int64_t& cnt)
{
int ret = OB_SUCCESS;
cnt = INT64_MAX;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObTenantSnapshotMgr has not been inited.", KR(ret), KPC(this));
} else {
cnt = tenant_snapshot_map_.count();
}
return ret;
}
}
}

View File

@ -105,6 +105,7 @@ public:
template <typename Fn> int for_each(Fn &fn) { return tenant_snapshot_map_.for_each(fn); }
template <typename Fn> int remove_if(Fn &fn) { return tenant_snapshot_map_.remove_if(fn); }
int get_tenant_snapshot_cnt(int64_t& cnt);
private:
int create_tenant_snapshot_(const share::ObTenantSnapshotID &tenant_snapshot_id,
ObTenantSnapshot *&tenant_snapshot);

View File

@ -31,6 +31,7 @@ ObTenantSnapshotService::ObTenantSnapshotService()
: is_inited_(false),
is_running_(false),
meta_loaded_(false),
unit_is_deleting_(false),
tenant_snapshot_mgr_(),
ls_snapshot_mgr_(),
meta_handler_(),
@ -75,6 +76,7 @@ int ObTenantSnapshotService::init()
meta_loaded_ = false;
running_mode_ = RUNNING_MODE::INVALID;
is_inited_ = true;
unit_is_deleting_ = false;
}
return ret;
@ -288,6 +290,25 @@ int ObTenantSnapshotService::clone_running_env_check_()
return ret;
}
int ObTenantSnapshotService::check_if_tenant_has_been_dropped_(bool &has_dropped)
{
int ret = OB_SUCCESS;
const uint64_t tenant_id = MTL_ID();
schema::ObMultiVersionSchemaService *schema_service = GCTX.schema_service_;
schema::ObSchemaGetterGuard guard;
has_dropped = false;
if (OB_ISNULL(schema_service)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("schema_service is null", KR(ret));
} else if (OB_FAIL(schema_service->get_tenant_schema_guard(OB_SYS_TENANT_ID, guard))) {
LOG_WARN("fail to get schema guard", KR(ret), K(tenant_id));
} else if (OB_FAIL(guard.check_if_tenant_has_been_dropped(tenant_id, has_dropped))) {
LOG_WARN("fail to check if tenant has been dropped", KR(ret), K(tenant_id));
}
return ret;
}
int ObTenantSnapshotService::decide_running_mode_(enum RUNNING_MODE& running_mode)
{
int ret = OB_SUCCESS;
@ -296,7 +317,14 @@ int ObTenantSnapshotService::decide_running_mode_(enum RUNNING_MODE& running_mod
const share::schema::ObTenantSchema *tenant_schema = NULL;
ObTenantStatus tenant_status = TENANT_STATUS_MAX;
if (OB_FAIL(get_tenant_status_(tenant_status))) {
bool has_dropped = false;
if (unit_is_deleting_) {
running_mode = GC;
} else if (OB_FAIL(check_if_tenant_has_been_dropped_(has_dropped))) {
LOG_WARN("fail to check_if_tenant_has_been_dropped_", KR(ret));
} else if (has_dropped) {
running_mode = GC;
} else if (OB_FAIL(get_tenant_status_(tenant_status))) {
LOG_WARN("fail to get_tenant_status_", KR(ret));
} else if (TENANT_STATUS_NORMAL == tenant_status) {
running_mode = NORMAL;
@ -461,14 +489,14 @@ int ObTenantSnapshotService::schedule_create_tenant_snapshot_dag_(const ObTenant
int ObTenantSnapshotService::schedule_gc_tenant_snapshot_dag_(const ObTenantSnapshotID &tenant_snapshot_id,
const ObArray<ObLSID> &gc_ls_id_arr,
const bool gc_all_tenant_snapshot,
const bool gc_tenant_snapshot,
const common::ObCurTraceId::TraceId& trace_id)
{
int ret = OB_SUCCESS;
ObTenantSnapshotGCParam param(tenant_snapshot_id,
gc_ls_id_arr,
gc_all_tenant_snapshot,
gc_tenant_snapshot,
trace_id,
&tenant_snapshot_mgr_);
ObTenantDagScheduler *dag_scheduler = MTL(ObTenantDagScheduler*);
@ -504,15 +532,6 @@ void ObTenantSnapshotService::run_in_normal_mode_()
LOG_INFO("fail to normal_running_env_check_", KR(ret));
}
if (OB_SUCC(ret) && !meta_loaded_) {
if (OB_FAIL(load_())) {
LOG_ERROR("fail to load slog meta", KR(ret), KPC(this));
} else {
meta_loaded_ = true;
LOG_INFO("ObTenantSnapshotService load slog meta succ", KR(ret), KPC(this));
}
}
if (OB_SUCC(ret) && meta_loaded_) {
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(try_gc_tenant_snapshot_())) {
@ -524,6 +543,30 @@ void ObTenantSnapshotService::run_in_normal_mode_()
}
}
void ObTenantSnapshotService::run_in_gc_mode_()
{
int ret = OB_SUCCESS;
uint64_t data_version = 0;
if (ATOMIC_LOAD(&running_mode_) != GC) {
ret = OB_STATE_NOT_MATCH;
LOG_INFO("the running mode is not GC", KR(ret), KPC(this));
} else if (OB_FAIL(GET_MIN_DATA_VERSION(MTL_ID(), data_version))) {
LOG_WARN("get_min_data_version failed", KR(ret), KPC(this));
} else if (OB_UNLIKELY(data_version < DATA_VERSION_4_3_0_0)) {
ret = OB_NOT_SUPPORTED;
LOG_INFO("ObTenantSnapshotService does not work before data version upgrade to 4_3_0_0",
KR(ret), KPC(this), K(data_version));
}
if (OB_SUCC(ret) && meta_loaded_) {
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(try_gc_tenant_snapshot_())) {
LOG_WARN("fail to try_gc_tenant_snapshot_", KR(tmp_ret));
}
}
}
int ObTenantSnapshotService::start_clone(const ObTenantSnapshotID &tenant_snapshot_id,
const ObLSID &ls_id,
blocksstable::MacroBlockId &tablet_meta_entry)
@ -590,7 +633,12 @@ int ObTenantSnapshotService::try_gc_tenant_snapshot_()
{
int ret = OB_SUCCESS;
TryGcTenantSnapshotFunctor fn;
bool tenant_has_been_dropped = false;
if (GC == running_mode_) {
tenant_has_been_dropped = true;
}
TryGcTenantSnapshotFunctor fn(tenant_has_been_dropped);
if (OB_FAIL(tenant_snapshot_mgr_.for_each(fn))) {
LOG_WARN("fail to add all try_gc dag task", KR(ret));
}
@ -602,39 +650,44 @@ bool ObTenantSnapshotService::TryGcTenantSnapshotFunctor::operator()(
const ObTenantSnapshotID &tenant_snapshot_id, ObTenantSnapshot* tenant_snapshot)
{
int ret = OB_SUCCESS;
bool gc_all_tenant_snapshot = false;
bool gc_tenant_snapshot = false;
ObArray<ObLSID> gc_ls_id_arr;
common::ObCurTraceId::TraceId trace_id;
ObTenantSnapshotService *tenant_snapshot_service = MTL(ObTenantSnapshotService *);
if (OB_UNLIKELY(OB_ISNULL(tenant_snapshot_service))) {
if (OB_ISNULL(tenant_snapshot_service)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ObTenantSnapshotService is null", KR(ret), K(tenant_snapshot_service));
} else if (OB_UNLIKELY(OB_ISNULL(tenant_snapshot))) {
} else if (OB_ISNULL(tenant_snapshot)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tenant_snapshot is null", KR(ret), K(tenant_snapshot));
LOG_WARN("tenant_snapshot is null", KR(ret));
} else if (!tenant_snapshot_id.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("tenant_snapshot_id is not valid", KR(ret), K(tenant_snapshot_id));
} else if (OB_FAIL(tenant_snapshot->try_start_gc_tenant_snapshot_dag(gc_all_tenant_snapshot,
} else if (OB_FAIL(tenant_snapshot->try_start_gc_tenant_snapshot_dag(tenant_has_been_dropped_,
gc_tenant_snapshot,
gc_ls_id_arr,
trace_id))) {
if (OB_NO_NEED_UPDATE == ret || OB_EAGAIN == ret) {
LOG_INFO("fail to try_start_gc_tenant_snapshot_dag now, try later", KR(ret), K(tenant_snapshot_id));
LOG_INFO("fail to try_start_gc_tenant_snapshot_dag now, try later",
KR(ret), K(tenant_snapshot_id), K(tenant_has_been_dropped_));
ret = OB_SUCCESS;
} else {
LOG_WARN("fail to start try_start_gc_tenant_snapshot_dag", KR(ret), K(tenant_snapshot_id));
LOG_WARN("fail to start try_start_gc_tenant_snapshot_dag",
KR(ret), K(tenant_snapshot_id), K(tenant_has_been_dropped_));
}
} else {
ObTraceIDGuard trace_guard(trace_id);
if (OB_FAIL(tenant_snapshot_service->schedule_gc_tenant_snapshot_dag_(tenant_snapshot_id,
gc_ls_id_arr,
gc_all_tenant_snapshot,
gc_tenant_snapshot,
trace_id))) {
LOG_WARN("fail to schedule_gc_tenant_snapshot_dag_", KR(ret), KPC(tenant_snapshot));
LOG_WARN("fail to schedule_gc_tenant_snapshot_dag_",
KR(ret), KPC(tenant_snapshot), K(tenant_has_been_dropped_));
tenant_snapshot->finish_gc_tenant_snapshot_dag();
} else {
LOG_INFO("schedule_gc_tenant_snapshot success", KR(ret), KPC(tenant_snapshot));
LOG_INFO("schedule_gc_tenant_snapshot success",
KR(ret), KPC(tenant_snapshot), K(tenant_has_been_dropped_));
}
}
return true;
@ -660,6 +713,22 @@ int ObTenantSnapshotService::try_create_tenant_snapshot_in_meta_table_()
return ret;
}
int ObTenantSnapshotService::try_load_meta_()
{
int ret = OB_SUCCESS;
if (OB_FAIL(common_env_check_())) {
LOG_INFO("failed to common_env_check_", KR(ret));
} else if (!meta_loaded_) {
if (OB_FAIL(load_())) {
LOG_ERROR("fail to load ckpt meta", KR(ret), KPC(this));
} else {
meta_loaded_ = true;
LOG_INFO("ObTenantSnapshotService load ckpt meta succ", KR(ret), KPC(this));
}
}
return ret;
}
void ObTenantSnapshotService::run1()
{
int ret = OB_SUCCESS;
@ -671,7 +740,7 @@ void ObTenantSnapshotService::run1()
LOG_INFO("failed to common_env_check_", KR(ret));
}
if (OB_SUCC(ret) && NORMAL != running_mode_) {
if (OB_SUCC(ret) && running_mode_ != GC) {
RUNNING_MODE tmp_running_mode = RUNNING_MODE::INVALID;
if (OB_FAIL(decide_running_mode_(tmp_running_mode))) {
LOG_INFO("fail to decide_running_mode_", KR(ret), KPC(this));
@ -680,6 +749,12 @@ void ObTenantSnapshotService::run1()
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(try_load_meta_())) {
LOG_INFO("fail to try_load_meta_", KR(ret), KPC(this));
}
}
if (OB_SUCC(ret) && CLONE == running_mode_) {
if (!clone_service_.is_started()) {
if (OB_FAIL(clone_service_.start())) {
@ -700,6 +775,10 @@ void ObTenantSnapshotService::run1()
run_in_normal_mode_();
}
if (OB_SUCC(ret) && GC == running_mode_) {
run_in_gc_mode_();
}
{
ObThreadCondGuard guard(cond_);
const uint64_t idle_time = calculate_idle_time_();
@ -751,7 +830,7 @@ bool ObTenantSnapshotService::GetAllLSSnapshotMapKeyFunctor::operator()(
int ret = OB_SUCCESS;
if (!ls_snap_map_key.is_valid()) {
LOG_DEBUG("invalid ObLSSnapshotMapKey, skip", K(ls_snap_map_key));
} else if (OB_UNLIKELY(OB_ISNULL(ls_snapshot_key_arr_))) {
} else if (OB_ISNULL(ls_snapshot_key_arr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls_snapshot_key_arr_ is null", KR(ret));
} else if (OB_FAIL(ls_snapshot_key_arr_->push_back(ls_snap_map_key))){
@ -785,7 +864,7 @@ int ObTenantSnapshotService::get_ls_snapshot_vt_info(const ObLSSnapshotMapKey &l
ls_snapshot_key.ls_id_,
ls_snapshot))){
LOG_WARN("fail to get ObLSSnapshot", KR(ret), K(ls_snapshot_key));
} else if (OB_UNLIKELY(OB_ISNULL(ls_snapshot))) {
} else if (OB_ISNULL(ls_snapshot)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls_snapshot is nullptr", KR(ret), K(ls_snapshot_key));
} else {
@ -807,7 +886,7 @@ int ObTenantSnapshotService::get_ls_snapshot_vt_info(const ObLSSnapshotMapKey &l
} else {
LOG_WARN("fail to get tenant snapshot", KR(ret), K(ls_snapshot_key));
}
} else if (OB_UNLIKELY(OB_ISNULL(tenant_snapshot))) {
} else if (OB_ISNULL(tenant_snapshot)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tenant_snapshot is nullptr", KR(ret), K(tenant_snapshot_id));
} else {
@ -822,5 +901,80 @@ int ObTenantSnapshotService::get_ls_snapshot_vt_info(const ObLSSnapshotMapKey &l
}
return ret;
}
int ObTenantSnapshotService::check_all_tenant_snapshot_released(bool& is_released)
{
int ret = OB_SUCCESS;
is_released = false;
int64_t cnt = INT64_MAX;
if (!ATOMIC_LOAD(&meta_loaded_)) {
is_released = false;
FLOG_INFO("cannot process before tenant snapshot meta loaded", KR(ret), KPC(this));
} else if (GC != ATOMIC_LOAD(&running_mode_)) {
is_released = false;
FLOG_INFO("running_mode_ is not switch to GC", KR(ret), KPC(this));
} else if (OB_FAIL(tenant_snapshot_mgr_.get_tenant_snapshot_cnt(cnt))) {
FLOG_WARN("fail to get_tenant_snapshot_cnt", KR(ret));
} else {
if (0 == cnt) {
is_released = true;
} else {
is_released = false;
}
}
FLOG_INFO("check_all_tenant_snapshot_released finished", KR(ret), K(is_released), K(cnt), KPC(this));
return ret;
}
void ObTenantSnapshotService::notify_unit_is_deleting()
{
int ret = OB_SUCCESS;
if (OB_FAIL(common_env_check_())) {
LOG_WARN("fail to common_env_check_", KR(ret));
} else if (FALSE_IT(unit_is_deleting_ = true)) {
} else {
ObThreadCondGuard guard(cond_);
cond_.signal();
}
LOG_INFO("try_set_running_mode_to_gc finished", KR(ret), KPC(this));
}
bool ObTenantSnapshotService::DumpTenantSnapInfoFunctor::operator()(
const ObTenantSnapshotID &tenant_snapshot_id,
ObTenantSnapshot* tenant_snapshot)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(tenant_snapshot)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tenant snapshot is unexpected null", KR(ret), K(tenant_snapshot_id));
} else {
LOG_INFO("dump tenant snapshot info", KPC(tenant_snapshot));
}
return true;
}
void ObTenantSnapshotService::dump_all_tenant_snapshot_info()
{
int ret = OB_SUCCESS;
if (OB_FAIL(common_env_check_())) {
LOG_WARN("fail to common_env_check_", KR(ret));
} else if (!ATOMIC_LOAD(&meta_loaded_)) {
ret = OB_NOT_RUNNING;
LOG_WARN("tenant snapshot meta unloaded", KR(ret), KPC(this));
} else {
DumpTenantSnapInfoFunctor fn;
if (OB_FAIL(tenant_snapshot_mgr_.for_each(fn))) {
LOG_WARN("fail to dump tenant snapshot info", KR(ret));
}
}
LOG_INFO("dump tenant snapshot info finished", KR(ret), KPC(this));
}
} // storage
} // oceanbase

View File

@ -56,7 +56,13 @@ public:
const share::ObLSID &ls_id,
blocksstable::MacroBlockId &tablet_meta_entry);
int end_clone(const share::ObTenantSnapshotID &tenant_snapshot_id);
int end_clone(const ObTenantSnapshotID &tenant_snapshot_id);
int check_all_tenant_snapshot_released(bool& is_released);
void dump_all_tenant_snapshot_info();
void notify_unit_is_deleting();
TO_STRING_KV(K(is_inited_), K(is_running_), K(running_mode_), K(meta_loaded_), K(tg_id_));
private:
@ -66,6 +72,7 @@ private:
RESTORE = 1,
CLONE = 2,
NORMAL = 3,
GC = 4,
};
private:
int load_();
@ -78,6 +85,7 @@ private:
int decide_running_mode_(enum RUNNING_MODE& running_mode);
void run_in_normal_mode_();
void run_in_clone_mode_();
void run_in_gc_mode_();
int wait_();
int schedule_create_tenant_snapshot_dag_(const share::ObTenantSnapshotID& tenant_snapshot_id,
@ -85,17 +93,25 @@ private:
const common::ObCurTraceId::TraceId& trace_id);
int schedule_gc_tenant_snapshot_dag_(const share::ObTenantSnapshotID &tenant_snapshot_id,
const common::ObArray<share::ObLSID> &gc_ls_id_arr,
const bool gc_all_tenant_snapshot,
const bool gc_tenant_snapshot,
const common::ObCurTraceId::TraceId& trace_id);
int try_create_tenant_snapshot_(const share::ObTenantSnapshotID& tenant_snapshot_id);
int try_create_tenant_snapshot_in_meta_table_();
int try_gc_tenant_snapshot_();
uint64_t calculate_idle_time_();
int try_load_meta_();
int check_if_tenant_has_been_dropped_(bool &has_dropped);
private:
class TryGcTenantSnapshotFunctor {
public:
TryGcTenantSnapshotFunctor(bool tenant_has_been_dropped)
: tenant_has_been_dropped_(tenant_has_been_dropped) {}
~TryGcTenantSnapshotFunctor() {}
bool operator()(const share::ObTenantSnapshotID &tenant_snapshot_id, ObTenantSnapshot* tenant_snapshot);
private:
const bool tenant_has_been_dropped_;
};
class GetAllLSSnapshotMapKeyFunctor {
@ -106,12 +122,18 @@ private:
private:
common::ObArray<ObLSSnapshotMapKey> *ls_snapshot_key_arr_;
};
class DumpTenantSnapInfoFunctor {
public:
bool operator()(const share::ObTenantSnapshotID &tenant_snapshot_id, ObTenantSnapshot* tenant_snapshot);
};
private:
DISALLOW_COPY_AND_ASSIGN(ObTenantSnapshotService);
bool is_inited_;
bool is_running_;
bool meta_loaded_;
bool unit_is_deleting_;
ObTenantSnapshotMgr tenant_snapshot_mgr_;
ObLSSnapshotMgr ls_snapshot_mgr_;
ObTenantMetaSnapshotHandler meta_handler_;
@ -119,8 +141,7 @@ private:
common::ObThreadCond cond_;
int tg_id_;
// record running_mode_ information in the ObTenantSnapshotService class, because the service
// will not switch to the CLONE state after confirming that it has reached the normal state,
RUNNING_MODE running_mode_;
ObTenantCloneService clone_service_;
};

View File

@ -225,7 +225,7 @@ bool ObTenantSnapshotGCParam::is_valid() const
if (OB_UNLIKELY(!tenant_snapshot_id_.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("the tenant_snapshot_id_ is invalid", KR(ret), KPC(this));
} else if (!gc_all_tenant_snapshot_ && gc_ls_id_arr_.empty()) { // gc_all_tenant_snapshot_ == false means gc ls snap
} else if (!gc_tenant_snapshot_ && gc_ls_id_arr_.empty()) { // gc_tenant_snapshot_ == false means gc ls snap
ret = OB_INVALID_ARGUMENT; // therefore gc_ls_id_arr_ could not be empty
LOG_WARN("gc_ls_id_arr_ is empty", KR(ret), KPC(this));
} else if (!trace_id_.is_valid()) {
@ -262,7 +262,7 @@ int ObTenantSnapshotGCDag::init_by_param(const share::ObIDagInitParam *param)
} else {
tenant_snapshot_id_ = gc_param->tenant_snapshot_id_;
gc_ls_id_arr_ = gc_param->gc_ls_id_arr_;
gc_all_tenant_snapshot_ = gc_param->gc_all_tenant_snapshot_;
gc_tenant_snapshot_ = gc_param->gc_tenant_snapshot_;
tenant_snapshot_mgr_ = gc_param->tenant_snapshot_mgr_;
is_inited_ = true;
}
@ -281,7 +281,7 @@ int ObTenantSnapshotGCDag::create_first_task()
LOG_WARN("fail to create ObTenantSnapshotGCDag", KR(ret));
} else if (OB_FAIL(task->init(tenant_snapshot_id_,
&gc_ls_id_arr_,
gc_all_tenant_snapshot_,
gc_tenant_snapshot_,
tenant_snapshot_mgr_))) {
LOG_WARN("fail to init ObTenantSnapshotGCTask", KR(ret));
} else if(OB_FAIL(add_task(*task))) {
@ -347,7 +347,7 @@ int64_t ObTenantSnapshotGCDag::hash() const
//****** ObTenantSnapshotGCTask
int ObTenantSnapshotGCTask::init(const ObTenantSnapshotID tenant_snapshot_id,
const ObArray<ObLSID> *gc_ls_id_arr,
bool gc_all_tenant_snapshot,
bool gc_tenant_snapshot,
ObTenantSnapshotMgr* tenant_snapshot_mgr)
{
int ret = OB_SUCCESS;
@ -366,7 +366,7 @@ int ObTenantSnapshotGCTask::init(const ObTenantSnapshotID tenant_snapshot_id,
} else {
tenant_snapshot_id_ = tenant_snapshot_id;
gc_ls_id_arr_ = gc_ls_id_arr;
gc_all_tenant_snapshot_ = gc_all_tenant_snapshot;
gc_tenant_snapshot_ = gc_tenant_snapshot;
tenant_snapshot_mgr_ = tenant_snapshot_mgr;
is_inited_ = true;
}
@ -394,10 +394,10 @@ int ObTenantSnapshotGCTask::process()
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("tenant_snapshot has been deleted", KR(ret), K(tenant_snapshot_id_));
} else {
if (OB_FAIL(tenant_snapshot->execute_gc_tenant_snapshot_dag(gc_all_tenant_snapshot_, *gc_ls_id_arr_))) {
if (OB_FAIL(tenant_snapshot->execute_gc_tenant_snapshot_dag(gc_tenant_snapshot_, *gc_ls_id_arr_))) {
LOG_WARN("fail to execute gc tenant snapshot dag", KR(ret));
} else {
if (gc_all_tenant_snapshot_) {
if (gc_tenant_snapshot_) {
if (OB_FAIL(tenant_snapshot_mgr_->del_tenant_snapshot(tenant_snapshot->get_tenant_snapshot_id()))) {
LOG_WARN("fail to delete tenant snapshot in tenant_snapshot_mgr_",
KR(ret), KPC(tenant_snapshot_mgr_), KPC(tenant_snapshot));
@ -410,7 +410,7 @@ int ObTenantSnapshotGCTask::process()
}
LOG_INFO("ObTenantSnapshotGCTask finished",
KR(ret), K(gc_all_tenant_snapshot_), K(tenant_snapshot_id_), KPC(gc_ls_id_arr_));
KR(ret), K(gc_tenant_snapshot_), K(tenant_snapshot_id_), KPC(gc_ls_id_arr_));
return ret;
}

View File

@ -104,23 +104,23 @@ private:
struct ObTenantSnapshotGCParam : public share::ObIDagInitParam {
ObTenantSnapshotGCParam(const share::ObTenantSnapshotID tenant_snapshot_id,
const common::ObArray<share::ObLSID> &gc_ls_id_arr,
const bool gc_all_tenant_snapshot,
const bool gc_tenant_snapshot,
const common::ObCurTraceId::TraceId& trace_id,
ObTenantSnapshotMgr *tenant_snapshot_mgr)
: tenant_snapshot_id_(tenant_snapshot_id),
gc_ls_id_arr_(gc_ls_id_arr),
gc_all_tenant_snapshot_(gc_all_tenant_snapshot),
gc_tenant_snapshot_(gc_tenant_snapshot),
trace_id_(trace_id),
tenant_snapshot_mgr_(tenant_snapshot_mgr){}
virtual ~ObTenantSnapshotGCParam(){}
virtual bool is_valid() const override;
TO_STRING_KV(K(tenant_snapshot_id_),
K(gc_ls_id_arr_), K(gc_all_tenant_snapshot_), K(trace_id_), KP(tenant_snapshot_mgr_));
K(gc_ls_id_arr_), K(gc_tenant_snapshot_), K(trace_id_), KP(tenant_snapshot_mgr_));
const share::ObTenantSnapshotID tenant_snapshot_id_;
const common::ObArray<share::ObLSID> gc_ls_id_arr_;
const bool gc_all_tenant_snapshot_; // gc tenant snapshot or gc ls snapshot
const bool gc_tenant_snapshot_; // gc tenant snapshot or gc ls snapshot
const common::ObCurTraceId::TraceId trace_id_;
ObTenantSnapshotMgr *tenant_snapshot_mgr_;
};
@ -131,7 +131,7 @@ public:
is_inited_(false),
tenant_snapshot_id_(),
gc_ls_id_arr_(),
gc_all_tenant_snapshot_(false),
gc_tenant_snapshot_(false),
tenant_snapshot_mgr_() {}
virtual ~ObTenantSnapshotGCDag() {}
virtual int init_by_param(const share::ObIDagInitParam *param) override;
@ -157,7 +157,7 @@ private:
bool is_inited_;
share::ObTenantSnapshotID tenant_snapshot_id_;
common::ObArray<share::ObLSID> gc_ls_id_arr_;
bool gc_all_tenant_snapshot_;
bool gc_tenant_snapshot_;
ObTenantSnapshotMgr *tenant_snapshot_mgr_;
};
@ -167,12 +167,12 @@ public:
is_inited_(false),
tenant_snapshot_id_(),
gc_ls_id_arr_(),
gc_all_tenant_snapshot_(false),
gc_tenant_snapshot_(false),
tenant_snapshot_mgr_(nullptr) {}
virtual ~ObTenantSnapshotGCTask() {}
int init(const share::ObTenantSnapshotID tenant_snapshot_id,
const common::ObArray<share::ObLSID> *gc_ls_id_arr,
bool gc_all_tenant_snapshot,
bool gc_tenant_snapshot,
ObTenantSnapshotMgr *tenant_snapshot_mgr);
protected:
virtual int process() override;
@ -183,7 +183,7 @@ protected:
bool is_inited_;
share::ObTenantSnapshotID tenant_snapshot_id_;
const common::ObArray<share::ObLSID> *gc_ls_id_arr_;
bool gc_all_tenant_snapshot_;
bool gc_tenant_snapshot_;
ObTenantSnapshotMgr *tenant_snapshot_mgr_;
};