fix backup check tablet continuity performance issue

This commit is contained in:
oceanoverflow 2024-02-08 04:00:17 +00:00 committed by ob-robot
parent 8921cc28a8
commit e7a85da9e6
7 changed files with 359 additions and 190 deletions

View File

@ -825,7 +825,8 @@ ObLSBackupCtx::ObLSBackupCtx()
sql_proxy_(NULL),
rebuild_seq_(),
check_tablet_info_cost_time_(),
backup_tx_table_filled_tx_scn_(share::SCN::min_scn())
backup_tx_table_filled_tx_scn_(share::SCN::min_scn()),
tablet_checker_()
{}
ObLSBackupCtx::~ObLSBackupCtx()
@ -834,7 +835,8 @@ ObLSBackupCtx::~ObLSBackupCtx()
}
int ObLSBackupCtx::open(
const ObLSBackupParam &param, const share::ObBackupDataType &backup_data_type, common::ObMySQLProxy &sql_proxy)
const ObLSBackupParam &param, const share::ObBackupDataType &backup_data_type,
common::ObMySQLProxy &sql_proxy, ObBackupIndexKVCache &index_kv_cache)
{
int ret = OB_SUCCESS;
ObArray<common::ObTabletID> tablet_list;
@ -856,6 +858,8 @@ int ObLSBackupCtx::open(
LOG_WARN("failed to init stat", K(ret));
} else if (OB_FAIL(param_.assign(param))) {
LOG_WARN("failed to assign param", K(ret), K(param));
} else if (OB_FAIL(tablet_checker_.init(param, sql_proxy, index_kv_cache))) {
LOG_WARN("failed to init tablet checker", K(ret), K(param));
} else {
max_file_id_ = 0;
prefetch_task_id_ = 0;
@ -1035,16 +1039,6 @@ int ObLSBackupCtx::set_max_file_id(const int64_t file_id)
return ret;
}
int ObLSBackupCtx::get_prefetch_task_id(int64_t &prefetch_task_id)
{
int ret = OB_SUCCESS;
ObMutexGuard guard(mutex_);
prefetch_task_id = prefetch_task_id_;
prefetch_task_id_++;
LOG_INFO("get prefetch task id", K(prefetch_task_id));
return ret;
}
int ObLSBackupCtx::wait_task(const int64_t file_id)
{
int ret = OB_SUCCESS;

View File

@ -229,8 +229,8 @@ struct ObLSBackupCtx {
public:
ObLSBackupCtx();
virtual ~ObLSBackupCtx();
int open(
const ObLSBackupParam &param, const share::ObBackupDataType &backup_data_type, common::ObMySQLProxy &sql_proxy);
int open(const ObLSBackupParam &param, const share::ObBackupDataType &backup_data_type,
common::ObMySQLProxy &sql_proxy, ObBackupIndexKVCache &index_kv_cache);
int next(common::ObTabletID &tablet_id);
void set_backup_data_type(const share::ObBackupDataType &backup_data_type);
int hold_tablet(const common::ObTabletID &tablet_id, storage::ObTabletHandle &tablet_handle);
@ -246,9 +246,12 @@ public:
int get_max_file_id(int64_t &max_file_id);
int set_max_file_id(const int64_t file_id);
int get_prefetch_task_id(int64_t &prefetch_task_id);
int wait_task(const int64_t file_id);
int finish_task(const int64_t file_id);
int64_t get_prefetch_task_id()
{
return ATOMIC_FAA(&prefetch_task_id_, 1);
}
ObBackupTabletHolder &get_tablet_holder()
{
@ -312,6 +315,7 @@ public:
int64_t rebuild_seq_; // rebuild seq of backup ls meta
int64_t check_tablet_info_cost_time_;
share::SCN backup_tx_table_filled_tx_scn_;
ObBackupTabletChecker tablet_checker_;
DISALLOW_COPY_AND_ASSIGN(ObLSBackupCtx);
};

View File

@ -33,7 +33,7 @@ class ObLocationService;
namespace backup {
static const int64_t OB_DEFAULT_BACKUP_CONCURRENCY = 2;
static const int64_t OB_MAX_BACKUP_CONCURRENCY = 8;
static const int64_t OB_MAX_BACKUP_CONCURRENCY = 128;
static const int64_t OB_MAX_BACKUP_MEM_BUF_LEN = 8 * 1024 * 1024; // 8MB
static const int64_t OB_MAX_BACKUP_INDEX_BUF_SIZE = 16 * 1024; // 16KB;
static const int64_t OB_MAX_BACKUP_FILE_SIZE = 4 * 1024 * 1024;

View File

@ -564,7 +564,8 @@ int ObLSBackupDataDagNet::inner_init_before_run_()
int64_t batch_size = 0;
if (OB_FAIL(param_.convert_to(backup_param))) {
LOG_WARN("failed to convert param", K(param_));
} else if (OB_FAIL(ls_backup_ctx_.open(backup_param, backup_data_type_, *param_.report_ctx_.sql_proxy_))) {
} else if (OB_FAIL(ls_backup_ctx_.open(backup_param, backup_data_type_,
*param_.report_ctx_.sql_proxy_, OB_BACKUP_INDEX_CACHE))) {
LOG_WARN("failed to open log stream backup ctx", K(ret), K(backup_param));
} else if (OB_FAIL(prepare_backup_tablet_provider_(backup_param, backup_data_type_, ls_backup_ctx_,
OB_BACKUP_INDEX_CACHE, *param_.report_ctx_.sql_proxy_, provider_))) {
@ -1966,7 +1967,9 @@ ObPrefetchBackupInfoTask::ObPrefetchBackupInfoTask()
index_kv_cache_(NULL),
macro_index_store_for_inc_(),
macro_index_store_for_turn_(),
index_rebuild_dag_(NULL)
index_rebuild_dag_(NULL),
next_prefetch_task_id_(-1),
next_backup_task_id_(-1)
{}
ObPrefetchBackupInfoTask::~ObPrefetchBackupInfoTask()
@ -1987,6 +1990,7 @@ int ObPrefetchBackupInfoTask::init(const ObLSBackupDagInitParam &param, const sh
LOG_WARN("failed to assign param", K(ret), K(param));
} else {
report_ctx_ = report_ctx;
prefetch_task_id_ = ls_backup_ctx.get_prefetch_task_id();
backup_data_type_ = backup_data_type;
ls_backup_ctx_ = &ls_backup_ctx;
provider_ = &provider;
@ -2008,6 +2012,7 @@ int ObPrefetchBackupInfoTask::process()
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
const int64_t start_ts = ObTimeUtility::current_time();
bool need_report_error = false;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -2045,6 +2050,8 @@ int ObPrefetchBackupInfoTask::process()
if (OB_NOT_NULL(ls_backup_ctx_) && need_report_error) {
REPORT_TASK_RESULT(this->get_dag()->get_dag_id(), ls_backup_ctx_->get_result_code());
}
const int64_t cost_us = ObTimeUtility::current_time() - start_ts;
record_server_event_(cost_us);
return ret;
}
@ -2380,7 +2387,6 @@ int ObPrefetchBackupInfoTask::generate_next_prefetch_dag_()
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
ObPrefetchBackupInfoDag *child_dag = NULL;
int64_t prefetch_task_id = 0;
ObTenantDagScheduler *scheduler = MTL(ObTenantDagScheduler *);
ObIDagNet *dag_net = NULL;
if (OB_ISNULL(scheduler) || OB_ISNULL(ls_backup_ctx_)) {
@ -2391,9 +2397,8 @@ int ObPrefetchBackupInfoTask::generate_next_prefetch_dag_()
LOG_WARN("dag net should not be NULL", K(ret), K(*this));
} else if (OB_FAIL(scheduler->alloc_dag(child_dag))) {
LOG_WARN("failed to alloc child dag", K(ret));
} else if (OB_FAIL(ls_backup_ctx_->get_prefetch_task_id(prefetch_task_id))) {
LOG_WARN("failed to get prefetch task id", K(ret));
} else if (OB_FAIL(child_dag->init(prefetch_task_id,
} else if (FALSE_IT(next_prefetch_task_id_ = ls_backup_ctx_->get_prefetch_task_id())) {
} else if (OB_FAIL(child_dag->init(next_prefetch_task_id_,
param_,
backup_data_type_,
report_ctx_,
@ -2418,7 +2423,7 @@ int ObPrefetchBackupInfoTask::generate_next_prefetch_dag_()
LOG_WARN("may exist same dag", K(ret));
}
} else {
LOG_INFO("success to alloc next prefetch dag", K(ret), K(prefetch_task_id), K_(param));
LOG_INFO("success to alloc next prefetch dag", K(ret), K_(param));
}
if (OB_FAIL(ret) && OB_NOT_NULL(scheduler) && OB_NOT_NULL(child_dag)) {
scheduler->free_dag(*child_dag);
@ -2476,6 +2481,7 @@ int ObPrefetchBackupInfoTask::generate_backup_dag_(
LOG_WARN("may exist same dag", K(ret));
}
} else {
next_backup_task_id_ = task_id;
LOG_INFO("success to alloc backup dag", K(ret), K(task_id), K_(backup_data_type), K(items));
}
}
@ -2485,6 +2491,35 @@ int ObPrefetchBackupInfoTask::generate_backup_dag_(
return ret;
}
void ObPrefetchBackupInfoTask::record_server_event_(const int64_t cost_us)
{
const char *prefetch_data_event = NULL;
if (backup_data_type_.is_sys_backup()) {
prefetch_data_event = "prefetch_sys_data";
} else if (backup_data_type_.is_minor_backup()) {
prefetch_data_event = "prefetch_minor_data";
} else if (backup_data_type_.is_major_backup()) {
prefetch_data_event = "prefetch_major_data";
}
int64_t pos = 0;
const int64_t buf_len = MAX_ROOTSERVICE_EVENT_EXTRA_INFO_LENGTH;
char buf[buf_len] = {};
if (-1 != next_prefetch_task_id_) {
(void)common::databuff_printf(buf, buf_len, pos, "prefetch_task_id:%ld -> next_prefetch_task_id:%ld",
prefetch_task_id_, next_prefetch_task_id_);
} else {
(void)common::databuff_printf(buf, buf_len, pos, "prefetch_task_id:%ld -> backup_task_id:%ld",
prefetch_task_id_, next_backup_task_id_);
}
SERVER_EVENT_ADD("backup", prefetch_data_event,
"tenant_id", param_.tenant_id_,
"backup_set_id", param_.backup_set_desc_.backup_set_id_,
"ls_id", param_.ls_id_.id(),
"turn_id", param_.turn_id_,
"retry_id", param_.retry_id_,
"cost_us", cost_us, buf);
}
/* ObLSBackupDataTask */
ObLSBackupDataTask::ObLSBackupDataTask()
@ -2504,7 +2539,8 @@ ObLSBackupDataTask::ObLSBackupDataTask()
allocator_(),
backup_items_(),
finished_tablet_list_(),
index_rebuild_dag_(NULL)
index_rebuild_dag_(NULL),
next_prefetch_task_id_(-1)
{}
ObLSBackupDataTask::~ObLSBackupDataTask()
@ -2593,6 +2629,8 @@ int ObLSBackupDataTask::process()
LOG_WARN("failed to check ls valid for backup", K(ret), K_(param));
} else if (OB_FAIL(do_write_file_header_())) {
LOG_WARN("failed to do write file header", K(ret));
} else if (OB_FAIL(do_check_tablet_valid_())) {
LOG_WARN("failed to check tablet valid", K(ret));
} else if (OB_FAIL(do_backup_macro_block_data_())) {
LOG_WARN("failed to do backup macro block data", K(ret));
} else if (OB_FAIL(do_backup_meta_data_())) {
@ -2694,6 +2732,51 @@ int ObLSBackupDataTask::do_write_file_header_()
return ret;
}
int ObLSBackupDataTask::get_check_tablet_list_(common::ObIArray<ObTabletID> &tablet_list)
{
int ret = OB_SUCCESS;
tablet_list.reset();
ARRAY_FOREACH_X(backup_items_, idx, cnt, OB_SUCC(ret)) {
const ObBackupProviderItem &item = backup_items_.at(idx);
if (PROVIDER_ITEM_CHECK == item.get_item_type()) {
if (OB_FAIL(tablet_list.push_back(item.get_tablet_id()))) {
LOG_WARN("failed to push back", K(ret));
}
}
}
return ret;
}
int ObLSBackupDataTask::do_check_tablet_valid_()
{
int ret = OB_SUCCESS;
ObArray<ObTabletID> tablet_list;
ObBackupTabletChecker *checker = NULL;
if (OB_ISNULL(ls_backup_ctx_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls backup ctx should not be null", K(ret));
} else if (OB_FALSE_IT(checker = &ls_backup_ctx_->tablet_checker_)) {
} else if (!backup_data_type_.is_major_backup()) {
// do nothing
} else if (OB_FAIL(get_check_tablet_list_(tablet_list))) {
LOG_WARN("failed to get check tablet list", K(ret));
} else {
ARRAY_FOREACH_X(tablet_list, idx, cnt, OB_SUCC(ret)) {
const ObTabletID &tablet_id = tablet_list.at(idx);
ObTabletHandle tablet_handle;
if (OB_ISNULL(checker)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("checker should not be null", K(ret));
} else if (OB_FAIL(get_tablet_handle_(tablet_id, tablet_handle))) {
LOG_WARN("failed to get tablet handle", K(ret), K(tablet_id));
} else if (OB_FAIL(checker->check_tablet_valid(param_.tenant_id_, param_.ls_id_, tablet_id, tablet_handle))) {
LOG_WARN("failed to check tablet valid", K(ret), K_(param), K(tablet_id));
}
}
}
return ret;
}
int ObLSBackupDataTask::do_backup_macro_block_data_()
{
int ret = OB_SUCCESS;
@ -3241,7 +3324,6 @@ int ObLSBackupDataTask::do_generate_next_backup_dag_()
ObLSBackupStage stage = LOG_STREAM_BACKUP_MAJOR;
ObTenantDagScheduler *scheduler = MTL(ObTenantDagScheduler *);
ObIDagNet *dag_net = NULL;
int64_t prefetch_task_id = 0;
if (OB_ISNULL(scheduler) || OB_ISNULL(ls_backup_ctx_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null MTL scheduler", K(ret), KP(scheduler), KP_(ls_backup_ctx));
@ -3252,9 +3334,8 @@ int ObLSBackupDataTask::do_generate_next_backup_dag_()
} else if (OB_ISNULL(dag_net = this->get_dag()->get_dag_net())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("dag net should not be NULL", K(ret), K(*this));
} else if (OB_FAIL(ls_backup_ctx_->get_prefetch_task_id(prefetch_task_id))) {
LOG_WARN("failed to get prefetch task id", K(ret));
} else if (OB_FAIL(next_dag->init(prefetch_task_id,
} else if (FALSE_IT(next_prefetch_task_id_ = ls_backup_ctx_->get_prefetch_task_id())) {
} else if (OB_FAIL(next_dag->init(next_prefetch_task_id_,
dag_param,
backup_data_type_,
report_ctx_,
@ -3315,20 +3396,18 @@ void ObLSBackupDataTask::record_server_event_(const int64_t cost_us) const
} else if (backup_data_type_.is_major_backup()) {
backup_data_event = "backup_major_data";
}
SERVER_EVENT_ADD("backup",
backup_data_event,
"tenant_id",
param_.tenant_id_,
"backup_set_id",
param_.backup_set_desc_.backup_set_id_,
"ls_id",
param_.ls_id_.id(),
"retry_id",
param_.retry_id_,
"file_id",
task_id_,
"cost_us",
cost_us);
int64_t pos = 0;
const int64_t buf_len = MAX_ROOTSERVICE_EVENT_EXTRA_INFO_LENGTH;
char buf[buf_len] = {};
(void)common::databuff_printf(buf, buf_len, pos, "backup_task_id:%ld -> next_prefetch_task_id:%ld",
task_id_, next_prefetch_task_id_);
SERVER_EVENT_ADD("backup", backup_data_event,
"tenant_id", param_.tenant_id_,
"backup_set_id", param_.backup_set_desc_.backup_set_id_,
"ls_id", param_.ls_id_.id(),
"turn_id", param_.turn_id_,
"retry_id", param_.retry_id_,
"cost_us", cost_us, buf);
}
int ObLSBackupDataTask::finish_backup_items_()
@ -3978,8 +4057,7 @@ int ObLSBackupPrepareTask::process()
} else if (OB_ISNULL(dag_net = this->get_dag()->get_dag_net())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("dag net should not be NULL", K(ret), K(*this));
} else if (OB_FAIL(ls_backup_ctx_->get_prefetch_task_id(prefetch_task_id))) {
LOG_WARN("failed to get prefetch task id", K(ret));
} else if (FALSE_IT(prefetch_task_id = ls_backup_ctx_->prefetch_task_id_)) {
} else if (OB_FAIL(child_dag->init(prefetch_task_id,
param_,
backup_data_type_,

View File

@ -496,9 +496,11 @@ private:
const ObBackupProviderItem &item, bool &need_copy, ObBackupMacroBlockIndex &macro_index);
int generate_next_prefetch_dag_();
int generate_backup_dag_(const int64_t task_id, const common::ObIArray<ObBackupProviderItem> &items);
void record_server_event_(const int64_t cost_us);
private:
bool is_inited_;
int64_t prefetch_task_id_;
ObLSBackupDagInitParam param_;
ObBackupReportCtx report_ctx_;
share::ObBackupDataType backup_data_type_;
@ -509,6 +511,8 @@ private:
ObBackupMacroBlockIndexStore macro_index_store_for_inc_;
ObBackupMacroBlockIndexStore macro_index_store_for_turn_;
share::ObIDag *index_rebuild_dag_;
int64_t next_prefetch_task_id_;
int64_t next_backup_task_id_;
DISALLOW_COPY_AND_ASSIGN(ObPrefetchBackupInfoTask);
};
@ -528,6 +532,8 @@ private:
private:
int build_backup_file_header_(ObBackupFileHeader &file_header);
int do_write_file_header_();
int get_check_tablet_list_(common::ObIArray<ObTabletID> &tablet_list);
int do_check_tablet_valid_();
int do_backup_macro_block_data_();
int do_backup_meta_data_();
int get_tablet_meta_info_(
@ -595,6 +601,7 @@ private:
common::ObArray<ObBackupProviderItem> backup_items_;
common::ObArray<common::ObTabletID> finished_tablet_list_;
share::ObIDag *index_rebuild_dag_;
int64_t next_prefetch_task_id_;
DISALLOW_COPY_AND_ASSIGN(ObLSBackupDataTask);
};

View File

@ -1733,15 +1733,11 @@ int ObBackupTabletProvider::prepare_tablet_(const uint64_t tenant_id, const shar
} else {
LOG_WARN("failed to get tablet handle", K(ret), K(tenant_id), K(ls_id), K(tablet_id));
}
} else if (OB_FAIL(check_tablet_continuity_(ls_id, tablet_id, tablet_handle))) {
LOG_WARN("failed to check tablet continuity", K(ret), K(ls_id), K(tablet_id), K(tablet_handle));
} else if (OB_FAIL(check_tx_data_can_explain_user_data_(tablet_handle, can_explain))) {
LOG_WARN("failed to check tx data can explain user data", K(ret), K(ls_id), K(tablet_id));
} else if (!can_explain) {
ret = OB_REPLICA_CANNOT_BACKUP;
LOG_WARN("can not backup replica", K(ret), K(tablet_id), K(ls_id));
} else if (OB_FAIL(check_tablet_replica_validity_(tenant_id, ls_id, tablet_id, backup_data_type))) {
LOG_WARN("failed to check tablet replica validity", K(ret), K(tenant_id), K(ls_id), K(tablet_id), K(backup_data_type));
} else if (OB_FAIL(hold_tablet_handle_(tablet_id, tablet_handle))) {
LOG_WARN("failed to hold tablet handle", K(ret), K(tablet_id), K(tablet_handle));
} else if (OB_FAIL(tablet_handle.get_obj()->fetch_table_store(table_store_wrapper))) {
@ -2158,6 +2154,23 @@ int ObBackupTabletProvider::add_macro_block_id_item_list_(const common::ObTablet
return ret;
}
int ObBackupTabletProvider::add_check_tablet_item_(const common::ObTabletID &tablet_id)
{
int ret = OB_SUCCESS;
ObBackupProviderItem item;
if (OB_FAIL(item.set_with_fake(PROVIDER_ITEM_CHECK, tablet_id))) {
LOG_WARN("failed to set item", K(ret), K(tablet_id));
} else if (!item.is_valid()) {
ret = OB_INVALID_DATA;
LOG_WARN("backup item is not valid", K(ret), K(item));
} else if (OB_FAIL(external_sort_.add_item(item))) {
LOG_WARN("failed to add item", KR(ret), K(item));
} else {
LOG_INFO("add check tablet item", K(tablet_id));
}
return ret;
}
int ObBackupTabletProvider::add_sstable_item_(const common::ObTabletID &tablet_id)
{
int ret = OB_SUCCESS;
@ -2276,142 +2289,9 @@ int ObBackupTabletProvider::check_tablet_status_(const storage::ObTabletHandle &
return ret;
}
int ObBackupTabletProvider::check_tablet_continuity_(const share::ObLSID &ls_id, const common::ObTabletID &tablet_id,
const storage::ObTabletHandle &tablet_handle)
{
int ret = OB_SUCCESS;
const ObBackupMetaType meta_type = BACKUP_TABLET_META;
ObBackupDataType backup_data_type;
backup_data_type.set_minor_data_backup();
ObBackupMetaIndex tablet_meta_index;
ObBackupTabletMeta prev_backup_tablet_meta;
share::ObBackupPath backup_path;
if (!backup_data_type_.is_major_backup()) {
// do nothing
} else if (OB_FAIL(build_tenant_meta_index_store_(backup_data_type))) {
LOG_WARN("failed to init meta index store", K(ret));
} else if (OB_FAIL(meta_index_store_.get_backup_meta_index(tablet_id, meta_type, tablet_meta_index))) {
LOG_WARN("failed to get backup meta index", K(ret), K(tablet_id));
} else if (OB_FAIL(ObBackupPathUtil::get_macro_block_backup_path(param_.backup_dest_,
param_.backup_set_desc_, tablet_meta_index.ls_id_, backup_data_type, tablet_meta_index.turn_id_,
tablet_meta_index.retry_id_, tablet_meta_index.file_id_, backup_path))) {
LOG_WARN("failed to get macro block backup path", K(ret), K_(param), K(backup_data_type), K(tablet_meta_index));
} else if (OB_FAIL(ObLSBackupRestoreUtil::read_tablet_meta(backup_path.get_obstr(),
param_.backup_dest_.get_storage_info(), backup_data_type, tablet_meta_index, prev_backup_tablet_meta))) {
LOG_WARN("failed to read tablet meta", K(ret), K(backup_path), K_(param));
} else {
const ObTabletMeta &cur_tablet_meta = tablet_handle.get_obj()->get_tablet_meta();
const int64_t cur_snapshot_version = cur_tablet_meta.report_status_.merge_snapshot_version_;
const int64_t prev_backup_snapshot_version = prev_backup_tablet_meta.tablet_meta_.report_status_.merge_snapshot_version_;
if ((prev_backup_snapshot_version <= 0 && prev_backup_tablet_meta.tablet_meta_.table_store_flag_.with_major_sstable())
|| (cur_snapshot_version <= 0 && cur_tablet_meta.table_store_flag_.with_major_sstable())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("prev or current snapshot version should not be invalid", K(ret), K(cur_tablet_meta), K(prev_backup_tablet_meta));
} else if (cur_snapshot_version < prev_backup_snapshot_version) {
ret = OB_BACKUP_MAJOR_NOT_COVER_MINOR;
LOG_WARN("tablet is not valid", K(ret), K(cur_tablet_meta), K(prev_backup_tablet_meta));
} else {
LOG_DEBUG("tablet is valid", K(cur_tablet_meta), K(prev_backup_tablet_meta));
}
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
const int64_t errsim_tablet_id = GCONF.errsim_backup_tablet_id;
if (errsim_tablet_id == tablet_id.id() && backup_data_type_.is_major_backup() && 0 == param_.retry_id_) {
ret = OB_E(EventTable::EN_BACKUP_CHECK_TABLET_CONTINUITY_FAILED) OB_SUCCESS;
FLOG_WARN("errsim backup check tablet continuity", K(ret), K(ls_id), K(tablet_id));
SERVER_EVENT_SYNC_ADD("backup_errsim", "check_tablet_continuity",
"ls_id", ls_id.id(), "tablet_id", tablet_id.id());
}
}
#endif
return ret;
}
int ObBackupTabletProvider::build_tenant_meta_index_store_(const share::ObBackupDataType &backup_data_type)
{
int ret = OB_SUCCESS;
ObBackupRestoreMode mode = BACKUP_MODE;
ObBackupIndexLevel index_level = BACKUP_INDEX_LEVEL_TENANT;
ObBackupIndexStoreParam index_store_param;
index_store_param.index_level_ = index_level;
index_store_param.tenant_id_ = param_.tenant_id_;
index_store_param.backup_set_id_ = param_.backup_set_desc_.backup_set_id_;
index_store_param.ls_id_ = param_.ls_id_;
index_store_param.is_tenant_level_ = true;
index_store_param.backup_data_type_ = backup_data_type;
int64_t retry_id = 0;
if (meta_index_store_.is_inited()) {
// do nothing
} else if (OB_FAIL(get_tenant_meta_index_turn_id_(index_store_param.turn_id_))) {
LOG_WARN("failed to find meta index turn id", K(ret), K(backup_data_type));
} else if (OB_FAIL(get_tenant_meta_index_retry_id_(backup_data_type, index_store_param.turn_id_, retry_id))) {
LOG_WARN("failed to find meta index retry id", K(ret), K(backup_data_type));
} else if (FALSE_IT(index_store_param.retry_id_ = retry_id)) {
// assign
} else if (OB_FAIL(meta_index_store_.init(mode,
index_store_param,
param_.backup_dest_,
param_.backup_set_desc_,
false/*is_sec_meta*/,
*index_kv_cache_))) {
LOG_WARN("failed to init macro index store", K(ret), K_(param), K_(backup_data_type));
}
return ret;
}
int ObBackupTabletProvider::get_tenant_meta_index_turn_id_(int64_t &turn_id)
{
int ret = OB_SUCCESS;
ObBackupSetTaskAttr set_task_attr;
if (OB_FAIL(share::ObBackupTaskOperator::get_backup_task(*GCTX.sql_proxy_, param_.job_id_, param_.tenant_id_, false, set_task_attr))) {
LOG_WARN("failed to get backup task", K(ret));
} else {
turn_id = set_task_attr.minor_turn_id_;
}
return ret;
}
int ObBackupTabletProvider::get_tenant_meta_index_retry_id_(
const share::ObBackupDataType &backup_data_type, const int64_t turn_id, int64_t &retry_id)
{
int ret = OB_SUCCESS;
const bool is_restore = false;
const bool is_macro_index = false;
const bool is_sec_meta = false;
ObBackupTenantIndexRetryIDGetter retry_id_getter;
if (OB_FAIL(retry_id_getter.init(param_.backup_dest_, param_.backup_set_desc_,
backup_data_type, turn_id, is_restore, is_macro_index, is_sec_meta))) {
LOG_WARN("failed to init retry id getter", K(ret), K(turn_id), K_(param));
} else if (OB_FAIL(retry_id_getter.get_max_retry_id(retry_id))) {
LOG_WARN("failed to get max retry id", K(ret));
}
return ret;
}
int ObBackupTabletProvider::check_tablet_replica_validity_(const uint64_t tenant_id, const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id, const share::ObBackupDataType &backup_data_type)
{
int ret = OB_SUCCESS;
int64_t start_ts = ObTimeUtility::current_time();
if (!backup_data_type.is_major_backup()) {
// do nothing
} else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(ls_backup_ctx_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql proxy should not be null", K(ret), KP_(sql_proxy), KP_(ls_backup_ctx));
} else if (OB_INVALID_ID == tenant_id || !ls_id.is_valid() || !tablet_id.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid args", K(tenant_id), K(ls_id), K(tablet_id));
} else {
const common::ObAddr &src_addr = GCTX.self_addr();
if (OB_FAIL(ObStorageHAUtils::check_tablet_replica_validity(tenant_id, ls_id, src_addr, tablet_id, *sql_proxy_))) {
LOG_WARN("failed to check tablet replica validity", K(ret), K(tenant_id), K(ls_id), K(src_addr), K(tablet_id));
} else {
ls_backup_ctx_->check_tablet_info_cost_time_ += ObTimeUtility::current_time() - start_ts;
}
}
return ret;
}
int ObBackupTabletProvider::compare_prev_item_(const ObBackupProviderItem &cur_item)
{
@ -2677,5 +2557,186 @@ int ObBackupMacroBlockTaskMgr::put_to_pending_list_(const common::ObIArray<ObBac
return ret;
}
/* ObBackupTabletChecker */
ObBackupTabletChecker::ObBackupTabletChecker()
: is_inited_(false),
param_(),
sql_proxy_(NULL),
index_kv_cache_(NULL),
meta_index_store_()
{
}
ObBackupTabletChecker::~ObBackupTabletChecker()
{
}
int ObBackupTabletChecker::init(const ObLSBackupParam &param, common::ObMySQLProxy &sql_proxy,
ObBackupIndexKVCache &index_kv_cache)
{
int ret = OB_SUCCESS;
if (IS_INIT) {
ret = OB_INIT_TWICE;
LOG_WARN("backup tablet checker init twice", K(ret));
} else if (!param.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid arg", K(ret), K(param));
} else if (OB_FAIL(param_.assign(param))) {
LOG_WARN("failed to assign param", K(ret), K(param));
} else {
sql_proxy_ = &sql_proxy;
index_kv_cache_ = &index_kv_cache;
is_inited_ = true;
}
return ret;
}
int ObBackupTabletChecker::check_tablet_valid(const uint64_t tenant_id, const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id, const storage::ObTabletHandle &tablet_handle)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("backup tablet checker do not init", K(ret));
} else if (OB_FAIL(check_tablet_replica_validity_(tenant_id, ls_id, tablet_id))) {
LOG_WARN("failed to check tablet replica validity", K(ret));
} else if (OB_FAIL(check_tablet_continuity_(ls_id, tablet_id, tablet_handle))) {
LOG_WARN("failed to check tablet continuity", K(ret));
}
return ret;
}
int ObBackupTabletChecker::check_tablet_replica_validity_(const uint64_t tenant_id, const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql proxy should not be null", K(ret), KP_(sql_proxy));
} else if (OB_INVALID_ID == tenant_id || !ls_id.is_valid() || !tablet_id.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid args", K(tenant_id), K(ls_id), K(tablet_id));
} else {
const common::ObAddr &src_addr = GCTX.self_addr();
if (OB_FAIL(ObStorageHAUtils::check_tablet_replica_validity(tenant_id, ls_id, src_addr, tablet_id, *sql_proxy_))) {
LOG_WARN("failed to check tablet replica validity", K(ret), K(tenant_id), K(ls_id), K(src_addr), K(tablet_id));
} else {
}
}
return ret;
}
int ObBackupTabletChecker::check_tablet_continuity_(const share::ObLSID &ls_id, const common::ObTabletID &tablet_id,
const storage::ObTabletHandle &tablet_handle)
{
int ret = OB_SUCCESS;
const ObBackupMetaType meta_type = BACKUP_TABLET_META;
ObBackupDataType backup_data_type;
backup_data_type.set_minor_data_backup();
ObBackupMetaIndex tablet_meta_index;
ObBackupTabletMeta prev_backup_tablet_meta;
share::ObBackupPath backup_path;
if (OB_FAIL(build_tenant_meta_index_store_(backup_data_type))) {
LOG_WARN("failed to init meta index store", K(ret));
} else if (OB_FAIL(meta_index_store_.get_backup_meta_index(tablet_id, meta_type, tablet_meta_index))) {
LOG_WARN("failed to get backup meta index", K(ret), K(tablet_id));
} else if (OB_FAIL(ObBackupPathUtil::get_macro_block_backup_path(param_.backup_dest_,
param_.backup_set_desc_, tablet_meta_index.ls_id_, backup_data_type, tablet_meta_index.turn_id_,
tablet_meta_index.retry_id_, tablet_meta_index.file_id_, backup_path))) {
LOG_WARN("failed to get macro block backup path", K(ret), K_(param), K(backup_data_type), K(tablet_meta_index));
} else if (OB_FAIL(ObLSBackupRestoreUtil::read_tablet_meta(backup_path.get_obstr(),
param_.backup_dest_.get_storage_info(), backup_data_type, tablet_meta_index, prev_backup_tablet_meta))) {
LOG_WARN("failed to read tablet meta", K(ret), K(backup_path), K_(param));
} else {
const ObTabletMeta &cur_tablet_meta = tablet_handle.get_obj()->get_tablet_meta();
const int64_t cur_snapshot_version = cur_tablet_meta.report_status_.merge_snapshot_version_;
const int64_t prev_backup_snapshot_version = prev_backup_tablet_meta.tablet_meta_.report_status_.merge_snapshot_version_;
if ((prev_backup_snapshot_version <= 0 && prev_backup_tablet_meta.tablet_meta_.table_store_flag_.with_major_sstable())
|| (cur_snapshot_version <= 0 && cur_tablet_meta.table_store_flag_.with_major_sstable())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("prev or current snapshot version should not be invalid", K(ret), K(cur_tablet_meta), K(prev_backup_tablet_meta));
} else if (cur_snapshot_version < prev_backup_snapshot_version) {
ret = OB_BACKUP_MAJOR_NOT_COVER_MINOR;
LOG_WARN("tablet is not valid", K(ret), K(cur_tablet_meta), K(prev_backup_tablet_meta));
} else {
LOG_DEBUG("tablet is valid", K(cur_tablet_meta), K(prev_backup_tablet_meta));
}
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
const int64_t errsim_tablet_id = GCONF.errsim_backup_tablet_id;
if (errsim_tablet_id == tablet_id.id() && 0 == param_.retry_id_) {
ret = OB_E(EventTable::EN_BACKUP_CHECK_TABLET_CONTINUITY_FAILED) OB_SUCCESS;
FLOG_WARN("errsim backup check tablet continuity", K(ret), K(ls_id), K(tablet_id));
SERVER_EVENT_SYNC_ADD("backup_errsim", "check_tablet_continuity",
"ls_id", ls_id.id(), "tablet_id", tablet_id.id());
}
}
#endif
return ret;
}
int ObBackupTabletChecker::build_tenant_meta_index_store_(const share::ObBackupDataType &backup_data_type)
{
int ret = OB_SUCCESS;
ObBackupRestoreMode mode = BACKUP_MODE;
ObBackupIndexLevel index_level = BACKUP_INDEX_LEVEL_TENANT;
ObBackupIndexStoreParam index_store_param;
index_store_param.index_level_ = index_level;
index_store_param.tenant_id_ = param_.tenant_id_;
index_store_param.backup_set_id_ = param_.backup_set_desc_.backup_set_id_;
index_store_param.ls_id_ = param_.ls_id_;
index_store_param.is_tenant_level_ = true;
index_store_param.backup_data_type_ = backup_data_type;
int64_t retry_id = 0;
if (meta_index_store_.is_inited()) {
// do nothing
} else if (OB_FAIL(get_tenant_meta_index_turn_id_(index_store_param.turn_id_))) {
LOG_WARN("failed to find meta index turn id", K(ret), K(backup_data_type));
} else if (OB_FAIL(get_tenant_meta_index_retry_id_(backup_data_type, index_store_param.turn_id_, retry_id))) {
LOG_WARN("failed to find meta index retry id", K(ret), K(backup_data_type));
} else if (FALSE_IT(index_store_param.retry_id_ = retry_id)) {
// assign
} else if (OB_FAIL(meta_index_store_.init(mode,
index_store_param,
param_.backup_dest_,
param_.backup_set_desc_,
false/*is_sec_meta*/,
*index_kv_cache_))) {
LOG_WARN("failed to init macro index store", K(ret), K_(param));
}
return ret;
}
int ObBackupTabletChecker::get_tenant_meta_index_turn_id_(int64_t &turn_id)
{
int ret = OB_SUCCESS;
ObBackupSetTaskAttr set_task_attr;
if (OB_FAIL(share::ObBackupTaskOperator::get_backup_task(*sql_proxy_, param_.job_id_, param_.tenant_id_, false, set_task_attr))) {
LOG_WARN("failed to get backup task", K(ret));
} else {
turn_id = set_task_attr.minor_turn_id_;
}
return ret;
}
int ObBackupTabletChecker::get_tenant_meta_index_retry_id_(
const share::ObBackupDataType &backup_data_type, const int64_t turn_id, int64_t &retry_id)
{
int ret = OB_SUCCESS;
const bool is_restore = false;
const bool is_macro_index = false;
const bool is_sec_meta = false;
ObBackupTenantIndexRetryIDGetter retry_id_getter;
if (OB_FAIL(retry_id_getter.init(param_.backup_dest_, param_.backup_set_desc_,
backup_data_type, turn_id, is_restore, is_macro_index, is_sec_meta))) {
LOG_WARN("failed to init retry id getter", K(ret), K(turn_id), K_(param));
} else if (OB_FAIL(retry_id_getter.get_max_retry_id(retry_id))) {
LOG_WARN("failed to get max retry id", K(ret));
}
return ret;
}
} // namespace backup
} // namespace oceanbase

View File

@ -181,9 +181,10 @@ private:
};
enum ObBackupProviderItemType {
PROVIDER_ITEM_MACRO_ID = 0,
PROVIDER_ITEM_SSTABLE_META = 1,
PROVIDER_ITEM_TABLET_META = 2,
PROVIDER_ITEM_CHECK = 0,
PROVIDER_ITEM_MACRO_ID = 1,
PROVIDER_ITEM_SSTABLE_META = 2,
PROVIDER_ITEM_TABLET_META = 3,
PROVIDER_ITEM_MAX,
};
@ -301,6 +302,7 @@ private:
const storage::ObITable::TableKey &table_key, const blocksstable::ObSSTable &sstable, int64_t &total_count);
int add_macro_block_id_item_list_(const common::ObTabletID &tablet_id, const storage::ObITable::TableKey &table_key,
const common::ObIArray<ObBackupMacroBlockId> &list, int64_t &added_count);
int add_check_tablet_item_(const common::ObTabletID &tablet_id);
int add_sstable_item_(const common::ObTabletID &tablet_id);
int add_tablet_item_(const common::ObTabletID &tablet_id);
int remove_duplicates_(common::ObIArray<ObBackupProviderItem> &array);
@ -311,12 +313,6 @@ private:
int check_tablet_continuity_(const share::ObLSID &ls_id, const common::ObTabletID &tablet_id,
const storage::ObTabletHandle &tablet_handle);
int check_tx_data_can_explain_user_data_(const storage::ObTabletHandle &tablet_handle, bool &can_explain);
int build_tenant_meta_index_store_(const share::ObBackupDataType &backup_data_type);
int get_tenant_meta_index_turn_id_(int64_t &turn_id);
int get_tenant_meta_index_retry_id_(const share::ObBackupDataType &backup_data_type,
const int64_t turn_id, int64_t &retry_id);
int check_tablet_replica_validity_(const uint64_t tenant_id, const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id, const share::ObBackupDataType &backup_data_type);
int compare_prev_item_(const ObBackupProviderItem &item);
private:
@ -388,6 +384,35 @@ private:
DISALLOW_COPY_AND_ASSIGN(ObBackupMacroBlockTaskMgr);
};
class ObBackupTabletChecker final
{
public:
ObBackupTabletChecker();
~ObBackupTabletChecker();
int init(const ObLSBackupParam &param, common::ObMySQLProxy &sql_proxy,
ObBackupIndexKVCache &index_kv_cache);
int check_tablet_valid(const uint64_t tenant_id, const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id, const storage::ObTabletHandle &tablet_handle);
private:
int check_tablet_replica_validity_(const uint64_t tenant_id, const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id);
int check_tablet_continuity_(const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id, const storage::ObTabletHandle &tablet_handle);
int build_tenant_meta_index_store_(const share::ObBackupDataType &backup_data_type);
int get_tenant_meta_index_turn_id_(int64_t &turn_id);
int get_tenant_meta_index_retry_id_(const share::ObBackupDataType &backup_data_type,
const int64_t turn_id, int64_t &retry_id);
private:
bool is_inited_;
ObLSBackupParam param_;
common::ObMySQLProxy *sql_proxy_;
ObBackupIndexKVCache *index_kv_cache_;
ObBackupMetaIndexStore meta_index_store_;
DISALLOW_COPY_AND_ASSIGN(ObBackupTabletChecker);
};
} // namespace backup
} // namespace oceanbase