[CP] [DAS RETRY] check schema status to determine whether to retry
This commit is contained in:
@ -1067,10 +1067,10 @@ int ObQueryRetryCtrl::init()
|
|||||||
ERR_RETRY_FUNC("LOCATION", OB_LS_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
ERR_RETRY_FUNC("LOCATION", OB_LS_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
||||||
// OB_TABLET_NOT_EXIST may be caused by old version schema or incorrect location.
|
// OB_TABLET_NOT_EXIST may be caused by old version schema or incorrect location.
|
||||||
// Just use location_error_proc to retry sql and a new schema guard will be obtained during the retry process.
|
// Just use location_error_proc to retry sql and a new schema guard will be obtained during the retry process.
|
||||||
ERR_RETRY_FUNC("LOCATION", OB_TABLET_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_not_exist_retry_proc);
|
ERR_RETRY_FUNC("LOCATION", OB_TABLET_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
||||||
ERR_RETRY_FUNC("LOCATION", OB_LS_LOCATION_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
ERR_RETRY_FUNC("LOCATION", OB_LS_LOCATION_NOT_EXIST, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
||||||
ERR_RETRY_FUNC("LOCATION", OB_PARTITION_IS_BLOCKED, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
ERR_RETRY_FUNC("LOCATION", OB_PARTITION_IS_BLOCKED, location_error_proc, inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
||||||
ERR_RETRY_FUNC("LOCATION", OB_MAPPING_BETWEEN_TABLET_AND_LS_NOT_EXIST, location_error_proc,inner_location_error_proc, ObDASRetryCtrl::tablet_not_exist_retry_proc);
|
ERR_RETRY_FUNC("LOCATION", OB_MAPPING_BETWEEN_TABLET_AND_LS_NOT_EXIST, location_error_proc,inner_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
||||||
|
|
||||||
ERR_RETRY_FUNC("LOCATION", OB_GET_LOCATION_TIME_OUT, location_error_proc, inner_table_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
ERR_RETRY_FUNC("LOCATION", OB_GET_LOCATION_TIME_OUT, location_error_proc, inner_table_location_error_proc, ObDASRetryCtrl::tablet_location_retry_proc);
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,15 @@ using namespace share;
|
|||||||
using namespace share::schema;
|
using namespace share::schema;
|
||||||
namespace sql {
|
namespace sql {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* DAS cannot unconditionally retry for the error of tablet_location or ls_location, like -4725, -4721,
|
||||||
|
* and needs to determine whether the real cause of the error is due to DDL operations or transfer.
|
||||||
|
* 1. When the table, partition or tenant was dropped, which is caused by DDL, das task cannot be retried.
|
||||||
|
* 2. When a partition was transfered and tablet location cache is not updated, tablet location cache should
|
||||||
|
* be updated and das task needs to be retried.
|
||||||
|
*
|
||||||
|
**/
|
||||||
void ObDASRetryCtrl::tablet_location_retry_proc(ObDASRef &das_ref,
|
void ObDASRetryCtrl::tablet_location_retry_proc(ObDASRef &das_ref,
|
||||||
ObIDASTaskOp &task_op,
|
ObIDASTaskOp &task_op,
|
||||||
bool &need_retry)
|
bool &need_retry)
|
||||||
@ -30,13 +39,35 @@ void ObDASRetryCtrl::tablet_location_retry_proc(ObDASRef &das_ref,
|
|||||||
ObTableID ref_table_id = task_op.get_ref_table_id();
|
ObTableID ref_table_id = task_op.get_ref_table_id();
|
||||||
ObDASLocationRouter &loc_router = DAS_CTX(das_ref.get_exec_ctx()).get_location_router();
|
ObDASLocationRouter &loc_router = DAS_CTX(das_ref.get_exec_ctx()).get_location_router();
|
||||||
const ObDASTabletLoc *tablet_loc = task_op.get_tablet_loc();
|
const ObDASTabletLoc *tablet_loc = task_op.get_tablet_loc();
|
||||||
if (is_virtual_table(ref_table_id)) {
|
bool tablet_exist = false;
|
||||||
//the location of the virtual table can't be refreshed,
|
schema::ObSchemaGetterGuard schema_guard;
|
||||||
//so when a location exception occurs, virtual table is not retryable
|
const schema::ObTableSchema *table_schema = nullptr;
|
||||||
need_retry = false;
|
if (OB_ISNULL(tablet_loc)) {
|
||||||
} else if (OB_ISNULL(tablet_loc)) {
|
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("tablet loc is nullptr", K(ret));
|
LOG_WARN("tablet loc is nullptr", K(ret));
|
||||||
|
} else if (OB_ISNULL(GCTX.schema_service_)) {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("invalid schema service", K(ret));
|
||||||
|
} else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(MTL_ID(), schema_guard))) {
|
||||||
|
// tenant could be dropped
|
||||||
|
task_op.set_errcode(ret);
|
||||||
|
LOG_WARN("get tenant schema guard fail", KR(ret), K(MTL_ID()));
|
||||||
|
} else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), ref_table_id, table_schema))) {
|
||||||
|
task_op.set_errcode(ret);
|
||||||
|
LOG_WARN("failed to get table schema", KR(ret), K(ref_table_id));
|
||||||
|
} else if (OB_ISNULL(table_schema)) {
|
||||||
|
// table could be dropped
|
||||||
|
task_op.set_errcode(OB_TABLE_NOT_EXIST);
|
||||||
|
LOG_WARN("table not exist, maybe dropped by DDL, stop das retry", K(ref_table_id));
|
||||||
|
} else if (table_schema->is_vir_table()) {
|
||||||
|
// the location of the virtual table can't be refreshed,
|
||||||
|
// so when a location exception occurs, virtual table is not retryable
|
||||||
|
} else if (OB_FAIL(table_schema->check_if_tablet_exists(tablet_loc->tablet_id_, tablet_exist))) {
|
||||||
|
LOG_WARN("failed to check if tablet exists", K(ret), K(tablet_loc), K(ref_table_id));
|
||||||
|
} else if (!tablet_exist) {
|
||||||
|
// partition could be dropped
|
||||||
|
task_op.set_errcode(OB_PARTITION_NOT_EXIST);
|
||||||
|
LOG_WARN("partition not exist, maybe dropped by DDL, stop das retry", K(tablet_loc), K(ref_table_id));
|
||||||
} else {
|
} else {
|
||||||
loc_router.force_refresh_location_cache(true, task_op.get_errcode());
|
loc_router.force_refresh_location_cache(true, task_op.get_errcode());
|
||||||
need_retry = true;
|
need_retry = true;
|
||||||
@ -60,48 +91,5 @@ void ObDASRetryCtrl::task_network_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &n
|
|||||||
need_retry = true;
|
need_retry = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* The storage throws 4725 to the DAS in two cases:
|
|
||||||
* 1. When a table or partition is dropped, the tablet is recycled, which is caused by DDL and cannot be retried.
|
|
||||||
* 2. When a partition is transfered, but the tablet location cache is not updated,
|
|
||||||
* the TSC operation is sent to the old server, and the storage reports 4725, this case needs to be retried.
|
|
||||||
* The DAS cannot unconditionally retry 4725,
|
|
||||||
* and needs to determine whether the real cause of the 4725 error is a drop table or a transfer.
|
|
||||||
**/
|
|
||||||
void ObDASRetryCtrl::tablet_not_exist_retry_proc(ObDASRef &das_ref,
|
|
||||||
ObIDASTaskOp &task_op,
|
|
||||||
bool &need_retry)
|
|
||||||
{
|
|
||||||
int ret = OB_SUCCESS;
|
|
||||||
need_retry = false;
|
|
||||||
ObTableID ref_table_id = task_op.get_ref_table_id();
|
|
||||||
bool tablet_exist = false;
|
|
||||||
schema::ObSchemaGetterGuard schema_guard;
|
|
||||||
const schema::ObTableSchema *table_schema = nullptr;
|
|
||||||
const ObDASTabletLoc *tablet_loc = task_op.get_tablet_loc();
|
|
||||||
if (OB_ISNULL(GCTX.schema_service_) || OB_ISNULL(tablet_loc)) {
|
|
||||||
LOG_WARN("invalid schema service", KR(ret), K(GCTX.schema_service_), K(tablet_loc));
|
|
||||||
} else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(MTL_ID(), schema_guard))) {
|
|
||||||
// tenant could be deleted
|
|
||||||
task_op.set_errcode(ret);
|
|
||||||
LOG_WARN("get tenant schema guard fail", KR(ret), K(MTL_ID()));
|
|
||||||
} else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), ref_table_id, table_schema))) {
|
|
||||||
task_op.set_errcode(ret);
|
|
||||||
LOG_WARN("failed to get table schema", KR(ret));
|
|
||||||
} else if (OB_ISNULL(table_schema)) {
|
|
||||||
//table could be dropped
|
|
||||||
task_op.set_errcode(OB_TABLE_NOT_EXIST);
|
|
||||||
LOG_WARN("table not exist, fast fail das task", K(ref_table_id));
|
|
||||||
} else if (table_schema->is_vir_table()) {
|
|
||||||
need_retry = false;
|
|
||||||
} else if (OB_FAIL(table_schema->check_if_tablet_exists(tablet_loc->tablet_id_, tablet_exist))) {
|
|
||||||
LOG_WARN("check if tablet exists failed", K(ret), K(tablet_loc), K(ref_table_id));
|
|
||||||
} else if (!tablet_exist) {
|
|
||||||
task_op.set_errcode(OB_PARTITION_NOT_EXIST);
|
|
||||||
LOG_WARN("partition not exist, maybe dropped by DDL", K(ret), K(tablet_loc), K(ref_table_id));
|
|
||||||
} else {
|
|
||||||
tablet_location_retry_proc(das_ref, task_op, need_retry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace sql
|
} // namespace sql
|
||||||
} // namespace oceanbase
|
} // namespace oceanbase
|
||||||
|
|||||||
@ -33,7 +33,6 @@ public:
|
|||||||
static void tablet_location_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
|
static void tablet_location_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
|
||||||
static void tablet_nothing_readable_proc(ObDASRef &, ObIDASTaskOp &, bool &);
|
static void tablet_nothing_readable_proc(ObDASRef &, ObIDASTaskOp &, bool &);
|
||||||
static void task_network_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
|
static void task_network_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
|
||||||
static void tablet_not_exist_retry_proc(ObDASRef &, ObIDASTaskOp &, bool &);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace sql
|
} // namespace sql
|
||||||
|
|||||||
Reference in New Issue
Block a user