vector ddl task use errsim to check for hung bugs

Co-authored-by: JLY2015 <1623359870@qq.com>
This commit is contained in:
wu-xingying
2024-09-02 06:27:57 +00:00
committed by ob-robot
parent 5b30583964
commit 72ccbd990f
4 changed files with 204 additions and 26 deletions

View File

@ -348,6 +348,18 @@ GLOBAL_ERRSIM_POINT_DEF(516, EN_DDL_REPORT_REPLICA_BUILD_STATUS_FAIL, "");
GLOBAL_ERRSIM_POINT_DEF(517, EN_DDL_DIRECT_LOAD_WAIT_TABLE_LOCK_FAIL, "");
GLOBAL_ERRSIM_POINT_DEF(518, EN_DDL_LOBID_CACHE_SIZE_INJECTED, "");
GLOBAL_ERRSIM_POINT_DEF(519, EN_DDL_EXECUTE_FAILED, "");
// vec index
GLOBAL_ERRSIM_POINT_DEF(530, EN_VEC_INDEX_DROP_SHARE_TABLE_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(531, EN_VEC_INDEX_DROP_AUX_TABLE_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(532, EN_VEC_INDEX_WAIT_TRANS_END_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(533, EN_VEC_INDEX_DROP_LOB_META_ROW_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(534, EN_VEC_INDEX_OBTAIN_SNAPSHOT_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(535, EN_POST_VEC_INDEX_PREPARE_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(536, EN_POST_VEC_INDEX_PREPARE_ROWKEY_VID_TBL_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(537, EN_POST_VEC_INDEX_PREPARE_DELTA_OR_INDEX_ID_TBL_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(538, EN_POST_VEC_INDEX_PREPARE_VID_ROWKEY_OR_SNAP_TBL_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(539, EN_POST_VEC_INDEX_CHECKSUM_ERR, "");
GLOBAL_ERRSIM_POINT_DEF(540, EN_POST_VEC_INDEX_WAIT_AUX_TBL_COMPLEMENT_ERR, "");
// SQL Optimizer related 551-599
GLOBAL_ERRSIM_POINT_DEF(551, EN_EXPLAIN_GENERATE_PLAN_WITH_OUTLINE, "Used to enable outline validity check for explain query");

View File

@ -157,27 +157,50 @@ int ObDropVecIndexTask::init(const ObDDLTaskRecord &task_record)
int ObDropVecIndexTask::obtain_snapshot(const share::ObDDLTaskStatus next_task_status)
{
int ret = OB_SUCCESS;
bool state_finished = false;
ObDDLTaskStatus old_status = task_status_;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret));
} else if ((snapshot_version_ > 0 && snapshot_held_) || !vec_index_snapshot_data_.is_valid()) {
// do nothing, already hold snapshot or do not need snapshot(when snapshot table is not built)
} else if (!vec_index_snapshot_data_.is_valid()) {
// do not need snapshot and delete lob meta row(when snapshot table is not built)
state_finished = true;
if (OB_FAIL(switch_status(ObDDLTaskStatus::DROP_AUX_INDEX_TABLE, true, ret))) {
LOG_WARN("fail to switch task status to ObDDLTaskStatus::DROP_AUX_INDEX_TABLE", K(ret));
}
} else if (snapshot_version_ > 0 && snapshot_held_) {
// already hold snapshot, switch to next status
state_finished = true;
if (OB_FAIL(switch_status(next_task_status, true, ret))) {
LOG_WARN("fail to switch task status", K(ret), K(next_task_status));
}
} else if (OB_FAIL(ObDDLUtil::obtain_snapshot(next_task_status, vec_index_snapshot_data_.table_id_,
vec_index_snapshot_data_.table_id_, snapshot_version_,
snapshot_held_, this))) {
LOG_WARN("fail to obtain_snapshot", K(ret), K(snapshot_version_), K(snapshot_held_));
} else {
state_finished = true;
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_VEC_INDEX_OBTAIN_SNAPSHOT_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] fail to obtain snapshot", K(ret));
}
}
#endif
if (state_finished && OB_SUCC(ret)) {
LOG_INFO("success to obtain_snapshot", K(ret));
} else if (next_task_status == task_status_) { // resume old task status and retry
if (OB_FAIL(switch_status(old_status, true, ret))) {
LOG_WARN("fail to switch status", K(ret), K(old_status), K(task_status_));
} else {
LOG_INFO("resume obtain_snapshot success", K(ret), K(old_status), K(task_status_), K(task_id_));
}
}
return ret;
}
int64_t ObDropVecIndexTask::get_build_replica_request_time()
{
TCRLockGuard guard(lock_);
return delte_lob_meta_request_time_;
}
int ObDropVecIndexTask::drop_lob_meta_row(const ObDDLTaskStatus next_task_status)
{
int ret = OB_SUCCESS;
@ -201,12 +224,58 @@ int ObDropVecIndexTask::drop_lob_meta_row(const ObDDLTaskStatus next_task_status
}
if (is_build_replica_end) {
ret = OB_SUCC(ret) ? delte_lob_meta_job_ret_code_ : ret;
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_VEC_INDEX_DROP_LOB_META_ROW_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] fail to drop lob meta row", K(ret));
}
}
#endif
if (OB_FAIL(ret)) {
LOG_WARN("fail in delete lob meta row", K(ret));
} else if (OB_FAIL(finish())) {
LOG_WARN("fail in release snapshot", K(ret));
} else if (OB_FAIL(switch_status(next_task_status, true/*enable_flt*/, ret))) {
LOG_WARN("fail to swith task status", K(ret));
LOG_WARN("fail to switch task status", K(ret), K(next_task_status));
} else {
LOG_INFO("drop_lob_meta_row success", K(ret));
}
}
return ret;
}
int ObDropVecIndexTask::wait_trans_end(ObDDLTaskStatus next_task_status)
{
int ret = OB_SUCCESS;
bool state_finished = false;
ObDDLTaskStatus old_status = task_status_;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret));
} else if (ObDDLTaskStatus::WAIT_TRANS_END != task_status_) {
ret = OB_STATE_NOT_MATCH;
LOG_WARN("task status not match", K(ret), K(task_status_));
} else if (OB_FAIL(ObDDLTask::wait_trans_end(wait_trans_ctx_, next_task_status))) {
LOG_WARN("fail to wait trans end", K(ret));
} else {
state_finished = true;
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_VEC_INDEX_WAIT_TRANS_END_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] fail to wait trans end", K(ret));
}
}
#endif
if (state_finished && OB_SUCC(ret)) {
LOG_INFO("success to wait trans end", K(ret));
} else if (next_task_status == task_status_) { // resume old task status and retry
if (OB_FAIL(switch_status(old_status, true, ret))) {
LOG_WARN("fail to switch status", K(ret), K(old_status), K(task_status_));
} else {
LOG_INFO("resume wait_trans_end old status success", K(ret), K(old_status), K(task_status_), K(task_id_));
}
}
return ret;
@ -232,7 +301,7 @@ int ObDropVecIndexTask::process()
}
break;
case ObDDLTaskStatus::WAIT_TRANS_END:
if (OB_FAIL(wait_trans_end(wait_trans_ctx_, ObDDLTaskStatus::OBTAIN_SNAPSHOT))) {
if (OB_FAIL(wait_trans_end(ObDDLTaskStatus::OBTAIN_SNAPSHOT))) {
LOG_WARN("fail to wait trans end", K(ret));
}
break;
@ -468,13 +537,22 @@ int ObDropVecIndexTask::check_switch_succ()
int ObDropVecIndexTask::prepare(const share::ObDDLTaskStatus &new_status)
{
int ret = OB_SUCCESS;
bool state_finished = false;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret));
LOG_WARN("not init", K(ret));
} else if (ObDDLTaskStatus::PREPARE != task_status_) {
ret = OB_STATE_NOT_MATCH;
LOG_WARN("task status not match", K(ret), K(task_status_));
} else {
state_finished = true;
}
if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) {
// overwrite ret
LOG_WARN("fail to switch status", K(ret));
if (state_finished) {
if (OB_FAIL(switch_status(new_status, true, ret))) {
LOG_WARN("switch status failed", K(ret), K(new_status), K(task_status_));
} else {
LOG_INFO("prepare success", K(ret), K(parent_task_id_), K(task_id_), K(*this));
}
}
return ret;
}
@ -506,12 +584,22 @@ int ObDropVecIndexTask::drop_aux_index_table(const share::ObDDLTaskStatus &new_s
} else if (OB_FAIL(wait_none_share_index_child_task_finish(has_finished))) {
LOG_WARN("fail to wait vec none share child task finish", K(ret));
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_VEC_INDEX_DROP_AUX_TABLE_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
has_finished = false;
LOG_WARN("[ERRSIM] fail to drop aux index table", K(ret));
}
}
#endif
if (has_finished) {
// overwrite return code
if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) {
LOG_WARN("fail to switch status", K(ret), K(new_status));
} else {
vec_index_snapshot_data_.table_id_ = OB_INVALID_ID;
LOG_INFO("drop_aux_index_table success", K(ret));
}
}
return ret;
@ -530,10 +618,21 @@ int ObDropVecIndexTask::check_and_wait_finish(const share::ObDDLTaskStatus &new_
} else if (OB_FAIL(wait_share_index_child_task_finish(has_finished))) {
LOG_WARN("fail to wait share index child task finish", K(ret));
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_VEC_INDEX_DROP_SHARE_TABLE_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
has_finished = false;
LOG_WARN("[ERRSIM] fail to drop share index table", K(ret));
}
}
#endif
if (has_finished) {
// overwrite return code
if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) {
LOG_WARN("fail to switch status", K(ret), K(new_status));
} else {
LOG_INFO("check_and_wait_finish success", K(ret));
}
}
return ret;
@ -843,7 +942,7 @@ int ObDropVecIndexTask::send_build_single_replica_request()
} else if (OB_FAIL(ObDDLUtil::get_tablets(dst_tenant_id_, vec_index_snapshot_data_.table_id_, param.dest_tablet_ids_))) {
LOG_WARN("fail to get tablets", K(ret), K(tenant_id_), K(target_object_id_));
} else if (OB_FAIL(replica_builder_.build(param))) {
LOG_WARN("fail to send build single replica", K(ret));
LOG_WARN("fail to send build single replica", K(ret), K(param));
} else {
del_lob_meta_row_task_submitted_ = true;
delte_lob_meta_request_time_ = ObTimeUtility::current_time();

View File

@ -53,8 +53,6 @@ public:
int64_t &pos) override;
virtual int64_t get_serialize_param_size() const override;
virtual int on_child_task_finish(const uint64_t child_task_key, const int ret_code) override { return OB_SUCCESS; }
int64_t get_build_replica_request_time();
int reap_old_replica_build_task(bool &need_exec_new_inner_sql);
int update_drop_lob_meta_row_job_status(const common::ObTabletID &tablet_id,
const int64_t snapshot_version,
const int64_t execution_id,
@ -72,6 +70,7 @@ private:
int prepare(const share::ObDDLTaskStatus &status);
int check_and_wait_finish(const share::ObDDLTaskStatus &status);
int release_snapshot(const int64_t snapshot_version);
int wait_trans_end(const ObDDLTaskStatus next_task_status);
int obtain_snapshot(const share::ObDDLTaskStatus next_task_status);
int drop_aux_index_table(const share::ObDDLTaskStatus &status);
int drop_lob_meta_row(const share::ObDDLTaskStatus next_task_status);

View File

@ -329,7 +329,7 @@ int ObVecIndexBuildTask::check_health()
LOG_WARN("refresh status failed", K(ret));
} else if (OB_FAIL(refresh_schema_version())) {
LOG_WARN("refresh schema version failed", K(ret));
} else if (status == ObDDLTaskStatus::FAIL && drop_index_task_submitted_) {
} else if (status == ObDDLTaskStatus::FAIL) {
/*already failed, and have submitted drop index task, do nothing*/
} else {
ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service();
@ -367,7 +367,8 @@ int ObVecIndexBuildTask::check_health()
ret = check_errsim_error();
}
#endif
if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) {
if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)
&& static_cast<ObDDLTaskStatus>(task_status_) != ObDDLTaskStatus::FAIL) {
const ObDDLTaskStatus old_status = static_cast<ObDDLTaskStatus>(task_status_);
const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL;
(void)switch_status(new_status, false, ret);
@ -483,7 +484,7 @@ int ObVecIndexBuildTask::check_aux_table_schemas_exist(bool &is_all_exist)
K(index_id_exist), K(index_snapshot_data_exist), K(status),
K(rowkey_vid_aux_table_id_), K(vid_rowkey_aux_table_id_),
K(delta_buffer_table_id_), K(index_id_table_id_),
K(index_snapshot_data_table_id_));
K(index_snapshot_data_table_id_), K(drop_index_task_submitted_));
}
}
return ret;
@ -553,7 +554,14 @@ int ObVecIndexBuildTask::prepare()
} else {
state_finished = true;
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_POST_VEC_INDEX_PREPARE_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] build vec index fail to prepare", K(ret));
}
}
#endif
if (state_finished && OB_SUCC(ret)) {
ObDDLTaskStatus next_status;
if (OB_FAIL(get_next_status(next_status))) {
@ -562,6 +570,10 @@ int ObVecIndexBuildTask::prepare()
(void)switch_status(next_status, true, ret);
LOG_INFO("prepare finished", K(ret), K(parent_task_id_), K(task_id_), K(*this));
}
} else if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) {
(void)switch_status(ObDDLTaskStatus::FAIL, false, ret); // allow clean up
LOG_INFO("prepare failed", K(ret), K(parent_task_id_), K(task_id_), K(*this));
ret = OB_SUCCESS;
}
return ret;
}
@ -618,6 +630,14 @@ int ObVecIndexBuildTask::prepare_rowkey_vid_table()
if (OB_SUCC(ret) && (rowkey_vid_task_submitted_ || is_rebuild_index_)) {
state_finished = true;
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_POST_VEC_INDEX_PREPARE_ROWKEY_VID_TBL_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] build vec index fail to prepare_rowkey_vid_table", K(ret));
}
}
#endif
if (state_finished && OB_SUCC(ret)) {
ObDDLTaskStatus next_status;
if (OB_FAIL(get_next_status(next_status))) {
@ -627,6 +647,10 @@ int ObVecIndexBuildTask::prepare_rowkey_vid_table()
LOG_INFO("generate schema finished", K(ret), K(parent_task_id_), K(task_id_),
K(*this));
}
} else if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) {
(void)switch_status(ObDDLTaskStatus::FAIL, false, ret); // allow clean up
LOG_INFO("prepare failed", K(ret), K(parent_task_id_), K(task_id_), K(*this));
ret = OB_SUCCESS;
}
return ret;
}
@ -659,6 +683,14 @@ int ObVecIndexBuildTask::prepare_aux_index_tables()
if (OB_SUCC(ret) && delta_buffer_task_submitted_ && index_id_task_submitted_) {
state_finished = true;
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_POST_VEC_INDEX_PREPARE_DELTA_OR_INDEX_ID_TBL_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] build vec index fail to prepare_delta_or_index_id_table", K(ret));
}
}
#endif
if (state_finished && OB_SUCC(ret)) {
ObDDLTaskStatus next_status;
if (OB_FAIL(get_next_status(next_status))) {
@ -668,6 +700,10 @@ int ObVecIndexBuildTask::prepare_aux_index_tables()
LOG_INFO("generate schema finished", K(ret), K(parent_task_id_), K(task_id_),
K(*this));
}
} else if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) {
(void)switch_status(ObDDLTaskStatus::FAIL, false, ret); // allow clean up
LOG_INFO("prepare failed", K(ret), K(parent_task_id_), K(task_id_), K(*this));
ret = OB_SUCCESS;
}
return ret;
}
@ -734,6 +770,14 @@ int ObVecIndexBuildTask::prepare_vid_rowkey_table()
(vid_rowkey_task_submitted_ || is_rebuild_index_) && index_snapshot_data_task_submitted_) {
state_finished = true;
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_POST_VEC_INDEX_PREPARE_VID_ROWKEY_OR_SNAP_TBL_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] build vec index fail to prepare_vid_rowkey_or_snapshot_table", K(ret));
}
}
#endif
if (state_finished && OB_SUCC(ret)) {
ObDDLTaskStatus next_status;
if (OB_FAIL(get_next_status(next_status))) {
@ -743,6 +787,10 @@ int ObVecIndexBuildTask::prepare_vid_rowkey_table()
LOG_INFO("generate schema finished", K(ret), K(parent_task_id_), K(task_id_),
K(*this));
}
} else if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) {
(void)switch_status(ObDDLTaskStatus::FAIL, false, ret); // allow clean up
LOG_INFO("prepare failed", K(ret), K(parent_task_id_), K(task_id_), K(*this));
ret = OB_SUCCESS;
}
return ret;
}
@ -941,9 +989,17 @@ int ObVecIndexBuildTask::wait_aux_table_complement()
state_finished = true;
}
}
if (state_finished) {
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_POST_VEC_INDEX_WAIT_AUX_TBL_COMPLEMENT_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] build vec index fail to complement aux table data", K(ret));
}
}
#endif
if (state_finished || OB_FAIL(ret)) {
ObDDLTaskStatus next_status;
if (child_task_failed) {
if (child_task_failed || OB_FAIL(ret)) {
if (!ObIDDLTask::in_ddl_retry_white_list(ret)) {
const ObDDLTaskStatus old_status = static_cast<ObDDLTaskStatus>(task_status_);
const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL;
@ -1708,7 +1764,7 @@ int ObVecIndexBuildTask::submit_drop_vec_index_task()
LOG_WARN("check table exist failed", K(ret), K_(tenant_id), K(index_table_id));
} else if (!is_index_exist) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("vec index aux schema is nullptr, fail to roll back", K(ret));
LOG_WARN("vec index aux schema is nullptr, fail to roll back", K(ret), K(index_table_id), K(delta_buffer_table_id_), K(index_table_id_));
} else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, index_table_id, index_table_schema))) {
LOG_WARN("get index schema failed", K(ret), K(tenant_id_), K(index_table_id));
} else if (OB_ISNULL(index_table_schema)) {
@ -1824,6 +1880,14 @@ int ObVecIndexBuildTask::validate_checksum()
state_finished = true;
}
}
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = OB_E(common::EventTable::EN_POST_VEC_INDEX_CHECKSUM_ERR) OB_SUCCESS;
if (OB_FAIL(ret)) {
LOG_WARN("[ERRSIM] build vec index fail to checksum", K(ret));
}
}
#endif
if (state_finished && OB_SUCC(ret)) {
ObDDLTaskStatus next_status;
if (OB_FAIL(get_next_status(next_status))) {
@ -1833,6 +1897,10 @@ int ObVecIndexBuildTask::validate_checksum()
LOG_INFO("validate checksum finished", K(ret), K(parent_task_id_),
K(task_id_), K(*this));
}
} else if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) {
(void)switch_status(ObDDLTaskStatus::FAIL, false, ret); // allow clean up
LOG_INFO("prepare failed", K(ret), K(parent_task_id_), K(task_id_), K(*this));
ret = OB_SUCCESS;
}
return ret;
}