Fix migration deleted local tablet when src tablet do not exist

This commit is contained in:
godyangfight
2023-01-28 18:47:40 +08:00
committed by ob-robot
parent ff1e2ec0f3
commit 1a949cec1d
9 changed files with 209 additions and 119 deletions

View File

@ -443,6 +443,7 @@ class ObString;
ACT(ARCHIVE_SENDER_HANDLE_TASK_DONE,)\
ACT(BEFORE_SET_LS_MEMBER_LIST,)\
ACT(BEFORE_MIGRATION_FETCH_TABLET_INFO,)\
ACT(BEFORE_BUILD_TABLET_GROUP_INFO,)\
ACT(MAX_DEBUG_SYNC_POINT,)
DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF);

View File

@ -946,9 +946,7 @@ int ObStartMigrationTask::process()
LOG_WARN("failed to choose src", K(ret), K(*ctx_));
} else if (OB_FAIL(update_ls_())) {
LOG_WARN("failed to update_ls_", K(ret), K(*ctx_));
} else if (OB_FAIL(try_remove_unneeded_tablets_())) {
LOG_WARN("failed to try remove unneeded tablets", K(ret), KPC(ctx_));
} else if (OB_FAIL(deal_local_restore_ls_(need_generate_dag))) {
} else if (OB_FAIL(deal_local_restore_ls_(need_generate_dag))) {
LOG_WARN("failed to deal local restore ls", K(ret), KPC(ctx_));
} else if (!need_generate_dag) {
//do nothing
@ -1407,84 +1405,6 @@ int ObStartMigrationTask::deal_local_restore_ls_(bool &need_generate_dag)
return ret;
}
int ObStartMigrationTask::try_remove_unneeded_tablets_()
{
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
hash::ObHashSet<ObTabletID> tablet_id_set;
int64_t bucket_num = 0;
ObArray<ObTabletID> tablet_id_array;
const int64_t MAX_BUCKET_NUM = 1024;
const bool need_initial_state = true;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("start migration task do not init", K(ret));
} else if (OB_FAIL(ObStorageHADagUtils::get_ls(ctx_->arg_.ls_id_, ls_handle))) {
LOG_WARN("failed to get ls", K(ret), KPC(ctx_));
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", K(ret), KPC(ctx_));
} else if (FALSE_IT(bucket_num = std::max(MAX_BUCKET_NUM,
ctx_->sys_tablet_id_array_.count() + ctx_->data_tablet_id_array_.count()))) {
} else if (OB_FAIL(tablet_id_set.create(bucket_num))) {
LOG_WARN("failed to create tablet id set", K(ret), KPC(ctx_));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < ctx_->sys_tablet_id_array_.count(); ++i) {
const ObTabletID &tablet_id = ctx_->sys_tablet_id_array_.at(i);
if (OB_FAIL(tablet_id_set.set_refactored(tablet_id))) {
LOG_WARN("failed to set tablet into set", K(ret), K(tablet_id), KPC(ctx_));
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < ctx_->data_tablet_id_array_.count(); ++i) {
const ObTabletID &tablet_id = ctx_->data_tablet_id_array_.at(i);
if (OB_FAIL(tablet_id_set.set_refactored(tablet_id))) {
LOG_WARN("failed to set tablet into set", K(ret), K(tablet_id), KPC(ctx_));
}
}
if (OB_SUCC(ret)) {
ObHALSTabletIDIterator iter(ls->get_ls_id(), need_initial_state);
ObTabletID tablet_id;
if (OB_FAIL(ls->get_tablet_svr()->build_tablet_iter(iter))) {
LOG_WARN("failed to build tablet iter", K(ret), KPC(ctx_));
} else {
while (OB_SUCC(ret)) {
if (OB_FAIL(iter.get_next_tablet_id(tablet_id))) {
if (OB_ITER_END == ret) {
ret = OB_SUCCESS;
break;
} else {
LOG_WARN("failed to get tablet id", K(ret));
}
} else {
int32_t hash_ret = tablet_id_set.exist_refactored(tablet_id);
if (OB_HASH_EXIST == hash_ret) {
//do nothing
} else if (OB_HASH_NOT_EXIST == hash_ret) {
if (OB_FAIL(tablet_id_array.push_back(tablet_id))) {
LOG_WARN("failed to push tablet id into array", K(ret), K(tablet_id));
}
} else {
ret = hash_ret == OB_SUCCESS ? OB_ERR_UNEXPECTED : hash_ret;
}
}
}
if (OB_FAIL(ret)) {
} else if (tablet_id_array.empty()) {
//do nothing
} else if (OB_FAIL(ls->remove_tablets(tablet_id_array))) {
LOG_WARN("failed to remove tablets", K(ret), KPC(ls), K(tablet_id_array));
}
}
}
}
return ret;
}
int ObStartMigrationTask::record_server_event_()
{
int ret = OB_SUCCESS;
@ -2956,6 +2876,7 @@ int ObDataTabletsMigrationTask::process()
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
bool is_ls_online_success = false;
LOG_INFO("start do data tablets migration task", K(ret), KPC(ctx_));
if (!is_inited_) {
@ -2965,16 +2886,26 @@ int ObDataTabletsMigrationTask::process()
//do nothing
} else if (OB_FAIL(create_or_update_tablets_())) {
LOG_WARN("failed to create or update tablets", K(ret), K(*ctx_));
} else if (OB_FAIL(try_remove_unneeded_tablets_())) {
LOG_WARN("failed to try remove unneeded tablets", K(ret), KPC(ctx_));
} else if (OB_FAIL(ls_online_())) {
LOG_WARN("failed to start realy log", K(ret), K(*ctx_));
} else if (FALSE_IT(is_ls_online_success = true)) {
} else if (OB_FAIL(build_tablet_group_info_())) {
LOG_WARN("failed to build tablet group info", K(ret), KPC(ctx_));
} else {
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(EventTable::EN_DATA_TABLETS_MIGRATION_TASK_FAILED) OB_SUCCESS;
if (OB_FAIL(ret)) {
STORAGE_LOG(ERROR, "fake EN_DATA_TABLETS_MIGRATION_TASK_FAILED", K(ret));
int32_t retry_count = 0;
if (OB_SUCCESS != (tmp_ret = ctx_->get_retry_count(retry_count))) {
LOG_WARN("failed to get retry count", K(tmp_ret), K(retry_count));
} else if (retry_count > 0) {
ret = OB_SUCCESS;
} else {
ret = OB_E(EventTable::EN_DATA_TABLETS_MIGRATION_TASK_FAILED) OB_SUCCESS;
if (OB_FAIL(ret)) {
STORAGE_LOG(ERROR, "fake EN_DATA_TABLETS_MIGRATION_TASK_FAILED", K(ret));
}
}
}
#endif
@ -2990,7 +2921,8 @@ int ObDataTabletsMigrationTask::process()
if (OB_FAIL(ret)) {
int tmp_ret = OB_SUCCESS;
if (OB_SUCCESS != (tmp_ret = ObStorageHADagUtils::deal_with_fo(ret, this->get_dag()))) {
const bool allow_retry = !is_ls_online_success;
if (OB_SUCCESS != (tmp_ret = ObStorageHADagUtils::deal_with_fo(ret, this->get_dag(), allow_retry))) {
LOG_WARN("failed to deal with fo", K(ret), K(tmp_ret), K(*ctx_));
}
}
@ -3075,6 +3007,8 @@ int ObDataTabletsMigrationTask::build_tablet_group_info_()
ObArray<ObTabletID> tablet_id_array;
hash::ObHashSet<ObTabletID> remove_tablet_set;
DEBUG_SYNC(BEFORE_BUILD_TABLET_GROUP_INFO);
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("data tablets migration task do not init", K(ret));
@ -3240,6 +3174,89 @@ int ObDataTabletsMigrationTask::generate_tablet_group_dag_()
return ret;
}
int ObDataTabletsMigrationTask::try_remove_unneeded_tablets_()
{
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
hash::ObHashSet<ObTabletID> tablet_id_set;
int64_t bucket_num = 0;
ObArray<ObTabletID> tablet_id_array;
const int64_t MAX_BUCKET_NUM = 1024;
const bool need_initial_state = true;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("start migration task do not init", K(ret));
} else if (OB_FAIL(ObStorageHADagUtils::get_ls(ctx_->arg_.ls_id_, ls_handle))) {
LOG_WARN("failed to get ls", K(ret), KPC(ctx_));
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", K(ret), KPC(ctx_));
} else if (FALSE_IT(bucket_num = std::max(MAX_BUCKET_NUM,
ctx_->sys_tablet_id_array_.count() + ctx_->data_tablet_id_array_.count()))) {
} else if (OB_FAIL(tablet_id_set.create(bucket_num))) {
LOG_WARN("failed to create tablet id set", K(ret), KPC(ctx_));
} else if (OB_FAIL(ha_tablets_builder_.get_src_deleted_tablet_list(tablet_id_array))) {
LOG_WARN("failed to get src deleted tablet list", K(ret), KPC(ctx_));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < ctx_->sys_tablet_id_array_.count(); ++i) {
const ObTabletID &tablet_id = ctx_->sys_tablet_id_array_.at(i);
if (OB_FAIL(tablet_id_set.set_refactored(tablet_id))) {
LOG_WARN("failed to set tablet into set", K(ret), K(tablet_id), KPC(ctx_));
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < ctx_->data_tablet_id_array_.count(); ++i) {
const ObTabletID &tablet_id = ctx_->data_tablet_id_array_.at(i);
if (OB_FAIL(tablet_id_set.set_refactored(tablet_id))) {
LOG_WARN("failed to set tablet into set", K(ret), K(tablet_id), KPC(ctx_));
}
}
if (OB_SUCC(ret)) {
ObHALSTabletIDIterator iter(ls->get_ls_id(), need_initial_state);
ObTabletID tablet_id;
if (OB_FAIL(ls->get_tablet_svr()->build_tablet_iter(iter))) {
LOG_WARN("failed to build tablet iter", K(ret), KPC(ctx_));
} else {
while (OB_SUCC(ret)) {
if (OB_FAIL(iter.get_next_tablet_id(tablet_id))) {
if (OB_ITER_END == ret) {
ret = OB_SUCCESS;
break;
} else {
LOG_WARN("failed to get tablet id", K(ret));
}
} else {
int32_t hash_ret = tablet_id_set.exist_refactored(tablet_id);
if (OB_HASH_EXIST == hash_ret) {
//do nothing
} else if (OB_HASH_NOT_EXIST == hash_ret) {
if (OB_FAIL(tablet_id_array.push_back(tablet_id))) {
LOG_WARN("failed to push tablet id into array", K(ret), K(tablet_id));
}
} else {
ret = hash_ret == OB_SUCCESS ? OB_ERR_UNEXPECTED : hash_ret;
}
}
}
if (OB_FAIL(ret)) {
} else if (tablet_id_array.empty()) {
//do nothing
} else if (OB_FAIL(ls->remove_tablets(tablet_id_array))) {
LOG_WARN("failed to remove tablets", K(ret), KPC(ls), K(tablet_id_array));
} else {
FLOG_INFO("succeed remove tablets", K(tablet_id_array));
}
}
}
}
return ret;
}
int ObDataTabletsMigrationTask::record_server_event_()
{
int ret = OB_SUCCESS;

View File

@ -242,8 +242,6 @@ private:
int get_tablet_id_array_(common::ObIArray<common::ObTabletID> &tablet_id_array);
int check_ls_need_copy_data_(bool &need_copy);
int deal_local_restore_ls_(bool &need_generate_dag);
int try_remove_unneeded_tablets_();
int record_server_event_();
private:
@ -418,6 +416,7 @@ private:
common::ObIArray<ObTabletGroupMigrationDag *> &tablet_group_dag_array);
int build_tablet_group_info_();
int generate_tablet_group_dag_();
int try_remove_unneeded_tablets_();
int record_server_event_();
private:

View File

@ -327,7 +327,8 @@ int ObStorageHADag::check_is_in_retry(bool &is_in_retry)
/******************ObStorageHADagUtils*********************/
int ObStorageHADagUtils::deal_with_fo(
const int err,
share::ObIDag *dag)
share::ObIDag *dag,
const bool allow_retry)
{
int ret = OB_SUCCESS;
ObStorageHADag *ha_dag = nullptr;
@ -342,7 +343,7 @@ int ObStorageHADagUtils::deal_with_fo(
} else if (OB_ISNULL(ha_dag = static_cast<ObStorageHADag *>(dag))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ha dag should not be NULL", K(ret), KPC(ha_dag));
} else if (OB_FAIL(ha_dag->set_result(err))) {
} else if (OB_FAIL(ha_dag->set_result(err, allow_retry))) {
LOG_WARN("failed to set result", K(ret), K(err));
}
return ret;

View File

@ -162,7 +162,8 @@ class ObStorageHADagUtils
public:
static int deal_with_fo(
const int err,
share::ObIDag *dag);
share::ObIDag *dag,
const bool allow_retry = true);
static int get_ls(
const share::ObLSID &ls_id,
ObLSHandle &ls_handle);

View File

@ -966,38 +966,82 @@ int ObCopyTabletInfoObProducer::get_next_tablet_info(obrpc::ObCopyTabletInfo &ta
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("copy tablet info ob producer do not init", K(ret));
} else if (tablet_index_ == tablet_id_array_.count()) {
ret = OB_ITER_END;
} else {
while (OB_SUCC(ret)) {
if (tablet_index_ == tablet_id_array_.count()) {
ret = OB_ITER_END;
} else {
const ObTabletID &tablet_id = tablet_id_array_.at(tablet_index_);
if (OB_ISNULL(ls = ls_handle_.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("log stream should not be NULL", K(ret), KP(ls));
} else if (OB_FAIL(ls->get_tablet(tablet_id, tablet_handle, timeout_us))) {
if (OB_TABLET_NOT_EXIST == ret) {
LOG_INFO("tablet in src not exist", K(ls->get_ls_id()), K(tablet_id));
tablet_index_++;
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id), K(tablet_handle));
}
} else if (OB_ISNULL(tablet = tablet_handle.get_obj())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet should not be NULL", K(ret), KP(tablet), K(tablet_id));
} else if (OB_FAIL(tablet->build_migration_tablet_param(tablet_info.param_))) {
LOG_WARN("failed to build migration tablet param", K(ret), K(tablet_id));
} else if (OB_FAIL(tablet->get_ha_sstable_size(tablet_info.data_size_))) {
LOG_WARN("failed to get sstable size", K(ret), K(tablet_id));
const ObTabletID &tablet_id = tablet_id_array_.at(tablet_index_);
if (OB_ISNULL(ls = ls_handle_.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("log stream should not be NULL", K(ret), KP(ls));
} else if (OB_FAIL(ls->get_tablet(tablet_id, tablet_handle, timeout_us))) {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id), K(tablet_handle));
if (OB_TABLET_NOT_EXIST == ret) {
//overwrite ret
if (OB_FAIL(build_deleted_tablet_info_(ls->get_ls_id(), tablet_id, tablet_info))) {
LOG_WARN("failed to build delete tablet info", K(ret), KPC(ls), K(tablet_id));
} else {
tablet_info.tablet_id_ = tablet_id;
tablet_info.status_ = ObCopyTabletStatus::TABLET_EXIST;
tablet_index_++;
LOG_INFO("succeed get copy tablet info", K(tablet_info), K(tablet_index_));
break;
}
} else {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id), K(tablet_handle));
}
} else if (OB_ISNULL(tablet = tablet_handle.get_obj())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet should not be NULL", K(ret), KP(tablet), K(tablet_id));
} else if (OB_FAIL(tablet->build_migration_tablet_param(tablet_info.param_))) {
LOG_WARN("failed to build migration tablet param", K(ret), K(tablet_id));
} else if (OB_FAIL(tablet->get_ha_sstable_size(tablet_info.data_size_))) {
LOG_WARN("failed to get sstable size", K(ret), K(tablet_id));
} else {
tablet_info.tablet_id_ = tablet_id;
tablet_info.status_ = ObCopyTabletStatus::TABLET_EXIST;
tablet_index_++;
LOG_INFO("succeed get copy tablet info", K(tablet_info), K(tablet_index_));
}
}
return ret;
}
int ObCopyTabletInfoObProducer::build_deleted_tablet_info_(
const share::ObLSID &ls_id,
const ObTabletID &tablet_id,
obrpc::ObCopyTabletInfo &tablet_info)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("copy tablet info ob producer do not init", K(ret));
} else {
const ObTabletRestoreStatus::STATUS restore_status = ObTabletRestoreStatus::FULL;
const ObTabletDataStatus::STATUS data_status = ObTabletDataStatus::COMPLETE;
tablet_info.tablet_id_ = tablet_id;
tablet_info.status_ = ObCopyTabletStatus::TABLET_NOT_EXIST;
tablet_info.param_.ls_id_ = ls_id;
tablet_info.param_.tablet_id_ = tablet_id;
tablet_info.param_.data_tablet_id_ = tablet_id;
tablet_info.param_.create_scn_ = ObTabletMeta::INIT_CREATE_SCN;
tablet_info.param_.start_scn_ = ObTabletMeta::INIT_CLOG_CHECKPOINT_SCN;
tablet_info.param_.clog_checkpoint_scn_ = ObTabletMeta::INIT_CLOG_CHECKPOINT_SCN;
tablet_info.param_.compat_mode_ = lib::Worker::get_compatibility_mode();
tablet_info.param_.multi_version_start_ = 0;
tablet_info.param_.snapshot_version_ = 0;
tablet_info.param_.tx_data_.tablet_status_ = ObTabletStatus::DELETED;
tablet_info.param_.tx_data_.tx_id_ = ObTabletCommon::FINAL_TX_ID;
tablet_info.param_.tx_data_.tx_scn_ = SCN::min_scn();
if (OB_FAIL(tablet_info.param_.ha_status_.set_restore_status(restore_status))) {
LOG_WARN("failed to set restore status", K(ret), K(restore_status));
} else if (OB_FAIL(tablet_info.param_.ha_status_.set_data_status(data_status))) {
LOG_WARN("failed to set data status", K(ret), K(data_status));
} else if (OB_FAIL(ObMigrationTabletParam::construct_placeholder_storage_schema_and_medium(
tablet_info.param_.allocator_,
tablet_info.param_.storage_schema_,
tablet_info.param_.medium_info_list_))) {
LOG_WARN("failed to construct placeholder storage schema");
} else if (!tablet_info.param_.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("create tablet param is invalid", K(ret), K(tablet_info));
}
}
return ret;
@ -1829,6 +1873,7 @@ int ObCopySSTableInfoObProducer::get_tablet_meta_(ObMigrationTabletParam &tablet
return ret;
}
//TODO(muwei.ym) This code will not been needed in 4.2
int ObCopySSTableInfoObProducer::fake_deleted_tablet_meta_(
ObMigrationTabletParam &tablet_meta)
{

View File

@ -266,7 +266,11 @@ public:
const common::ObIArray<common::ObTabletID> &tablet_id_array);
int get_next_tablet_info(obrpc::ObCopyTabletInfo &tablet_info);
private:
int build_deleted_tablet_info_(
const share::ObLSID &ls_id,
const ObTabletID &tablet_id,
obrpc::ObCopyTabletInfo &tablet_info);
private:
bool is_inited_;
ObArray<common::ObTabletID> tablet_id_array_;

View File

@ -115,7 +115,8 @@ int ObStorageHATabletsBuilderParam::assign(const ObStorageHATabletsBuilderParam
ObStorageHATabletsBuilder::ObStorageHATabletsBuilder()
: is_inited_(false),
param_(),
tablet_simple_info_map_()
tablet_simple_info_map_(),
deleted_tablet_id_list_()
{
}
@ -174,6 +175,10 @@ int ObStorageHATabletsBuilder::create_or_update_tablets()
} else {
LOG_WARN("failed to fetch tablet info", K(ret));
}
} else if (ObCopyTabletStatus::TABLET_NOT_EXIST == tablet_info.status_) {
if (OB_FAIL(deleted_tablet_id_list_.push_back(tablet_info.tablet_id_))) {
LOG_WARN("failed to push deleted tablet id into array", K(ret), K(tablet_info));
}
} else if (OB_FAIL(create_or_update_tablet_(tablet_info, ls))) {
LOG_WARN("failed to create or update tablet", K(ret), K(tablet_info));
} else {
@ -1156,6 +1161,21 @@ int ObStorageHATabletsBuilder::create_tablet_with_major_sstables_(
return ret;
}
int ObStorageHATabletsBuilder::get_src_deleted_tablet_list(
common::ObIArray<common::ObTabletID> &tablet_id_list)
{
int ret = OB_SUCCESS;
tablet_id_list.reset();
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("storage ha tablets builder do not init", K(ret));
} else if (OB_FAIL(tablet_id_list.assign(deleted_tablet_id_list_))) {
LOG_WARN("failed to assign tablet id list", K(ret), K(deleted_tablet_id_list_));
}
return ret;
}
/******************ObStorageHATabletTableInfoMgr*********************/
ObStorageHATableInfoMgr::ObStorageHATabletTableInfoMgr::ObStorageHATabletTableInfoMgr()
: is_inited_(false),

View File

@ -73,6 +73,7 @@ public:
int build_tablets_sstable_info();
int update_local_tablets();
const CopyTabletSimpleInfoMap &get_tablets_simple_info_map() { return tablet_simple_info_map_ ; }
int get_src_deleted_tablet_list(common::ObIArray<common::ObTabletID> &tablet_id_list);
private:
int get_tablet_info_reader_(ObICopyTabletInfoReader *&reader);
@ -146,6 +147,7 @@ private:
bool is_inited_;
ObStorageHATabletsBuilderParam param_;
CopyTabletSimpleInfoMap tablet_simple_info_map_;
ObArray<common::ObTabletID> deleted_tablet_id_list_;
DISALLOW_COPY_AND_ASSIGN(ObStorageHATabletsBuilder);
};