[CP] [DAS RETRY] check schema status to determine whether to retry

This commit is contained in:
pe-99y
2024-04-01 09:21:06 +00:00
committed by ob-robot
parent 8658717410
commit b17d28d52d
3 changed files with 38 additions and 51 deletions

View File

@ -1067,10 +1067,10 @@ int ObQueryRetryCtrl::init()
ERR_RETRY_FUNC("LOCATION", OB_LS_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
// OB_TABLET_NOT_EXIST may be caused by old version schema or incorrect location.
// Just use location_error_proc to retry sql and a new schema guard will be obtained during the retry process.
ERR_RETRY_FUNC("LOCATION", OB_TABLET_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_not_exist_retry_proc);
ERR_RETRY_FUNC("LOCATION", OB_TABLET_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
ERR_RETRY_FUNC("LOCATION", OB_LS_LOCATION_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
ERR_RETRY_FUNC("LOCATION", OB_PARTITION_IS_BLOCKED, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
ERR_RETRY_FUNC("LOCATION", OB_MAPPING_BETWEEN_TABLET_AND_LS_NOT_EXIST, location_error_proc,inner_location_error_proc, ObDASRetryCtrl::tablet_not_exist_retry_proc);
ERR_RETRY_FUNC("LOCATION", OB_MAPPING_BETWEEN_TABLET_AND_LS_NOT_EXIST, location_error_proc,inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
ERR_RETRY_FUNC("LOCATION", OB_GET_LOCATION_TIME_OUT, location_error_proc, inner_table_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);

View File

@ -21,6 +21,15 @@ using namespace share;
using namespace share::schema;
namespace sql {
/**
*
* DAS cannot unconditionally retry for the error of tablet_location or ls_location, like -4725, -4721,
* and needs to determine whether the real cause of the error is due to DDL operations or transfer.
* 1. When the table, partition or tenant was dropped, which is caused by DDL, das task cannot be retried.
* 2. When a partition was transfered and tablet location cache is not updated, tablet location cache should
* be updated and das task needs to be retried.
*
**/
void ObDASRetryCtrl::tablet_location_retry_proc(ObDASRef &das_ref,
ObIDASTaskOp &task_op,
bool &need_retry)
@ -30,13 +39,35 @@ void ObDASRetryCtrl::tablet_location_retry_proc(ObDASRef &das_ref,
ObTableID ref_table_id = task_op.get_ref_table_id();
ObDASLocationRouter &loc_router = DAS_CTX(das_ref.get_exec_ctx()).get_location_router();
const ObDASTabletLoc *tablet_loc = task_op.get_tablet_loc();
if (is_virtual_table(ref_table_id)) {
//the location of the virtual table can't be refreshed,
//so when a location exception occurs, virtual table is not retryable
need_retry = false;
} else if (OB_ISNULL(tablet_loc)) {
bool tablet_exist = false;
schema::ObSchemaGetterGuard schema_guard;
const schema::ObTableSchema *table_schema = nullptr;
if (OB_ISNULL(tablet_loc)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet loc is nullptr", K(ret));
} else if (OB_ISNULL(GCTX.schema_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid schema service", K(ret));
} else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(MTL_ID(), schema_guard))) {
// tenant could be dropped
task_op.set_errcode(ret);
LOG_WARN("get tenant schema guard fail", KR(ret), K(MTL_ID()));
} else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), ref_table_id, table_schema))) {
task_op.set_errcode(ret);
LOG_WARN("failed to get table schema", KR(ret), K(ref_table_id));
} else if (OB_ISNULL(table_schema)) {
// table could be dropped
task_op.set_errcode(OB_TABLE_NOT_EXIST);
LOG_WARN("table not exist, maybe dropped by DDL, stop das retry", K(ref_table_id));
} else if (table_schema->is_vir_table()) {
// the location of the virtual table can't be refreshed,
// so when a location exception occurs, virtual table is not retryable
} else if (OB_FAIL(table_schema->check_if_tablet_exists(tablet_loc->tablet_id_, tablet_exist))) {
LOG_WARN("failed to check if tablet exists", K(ret), K(tablet_loc), K(ref_table_id));
} else if (!tablet_exist) {
// partition could be dropped
task_op.set_errcode(OB_PARTITION_NOT_EXIST);
LOG_WARN("partition not exist, maybe dropped by DDL, stop das retry", K(tablet_loc), K(ref_table_id));
} else {
loc_router.force_refresh_location_cache(true, task_op.get_errcode());
need_retry = true;
@ -60,48 +91,5 @@ void ObDASRetryCtrl::task_network_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &n
need_retry = true;
}
/**
* The storage throws 4725 to the DAS in two cases:
* 1. When a table or partition is dropped, the tablet is recycled, which is caused by DDL and cannot be retried.
* 2. When a partition is transfered, but the tablet location cache is not updated,
* the TSC operation is sent to the old server, and the storage reports 4725, this case needs to be retried.
* The DAS cannot unconditionally retry 4725,
* and needs to determine whether the real cause of the 4725 error is a drop table or a transfer.
**/
void ObDASRetryCtrl::tablet_not_exist_retry_proc(ObDASRef &das_ref,
ObIDASTaskOp &task_op,
bool &need_retry)
{
int ret = OB_SUCCESS;
need_retry = false;
ObTableID ref_table_id = task_op.get_ref_table_id();
bool tablet_exist = false;
schema::ObSchemaGetterGuard schema_guard;
const schema::ObTableSchema *table_schema = nullptr;
const ObDASTabletLoc *tablet_loc = task_op.get_tablet_loc();
if (OB_ISNULL(GCTX.schema_service_) || OB_ISNULL(tablet_loc)) {
LOG_WARN("invalid schema service", KR(ret), K(GCTX.schema_service_), K(tablet_loc));
} else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(MTL_ID(), schema_guard))) {
// tenant could be deleted
task_op.set_errcode(ret);
LOG_WARN("get tenant schema guard fail", KR(ret), K(MTL_ID()));
} else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), ref_table_id, table_schema))) {
task_op.set_errcode(ret);
LOG_WARN("failed to get table schema", KR(ret));
} else if (OB_ISNULL(table_schema)) {
//table could be dropped
task_op.set_errcode(OB_TABLE_NOT_EXIST);
LOG_WARN("table not exist, fast fail das task", K(ref_table_id));
} else if (table_schema->is_vir_table()) {
need_retry = false;
} else if (OB_FAIL(table_schema->check_if_tablet_exists(tablet_loc->tablet_id_, tablet_exist))) {
LOG_WARN("check if tablet exists failed", K(ret), K(tablet_loc), K(ref_table_id));
} else if (!tablet_exist) {
task_op.set_errcode(OB_PARTITION_NOT_EXIST);
LOG_WARN("partition not exist, maybe dropped by DDL", K(ret), K(tablet_loc), K(ref_table_id));
} else {
tablet_location_retry_proc(das_ref, task_op, need_retry);
}
}
} // namespace sql
} // namespace oceanbase

View File

@ -33,7 +33,6 @@ public:
static void tablet_location_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
static void tablet_nothing_readable_proc(ObDASRef &, ObIDASTaskOp &, bool &);
static void task_network_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
static void tablet_not_exist_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
};
} // namespace sql