Fix kill trans return 4023 bug
This commit is contained in:
File diff suppressed because one or more lines are too long
@ -1797,6 +1797,7 @@ DEFINE_ERROR(OB_TRANSFER_SRC_LS_NOT_EXIST, -7115, -1, "HY000", "transfer src ls
|
|||||||
DEFINE_ERROR(OB_TRANSFER_SRC_TABLET_NOT_EXIST, -7116, -1, "HY000", "transfer src tablet does not exist");
|
DEFINE_ERROR(OB_TRANSFER_SRC_TABLET_NOT_EXIST, -7116, -1, "HY000", "transfer src tablet does not exist");
|
||||||
DEFINE_ERROR(OB_LS_NEED_REBUILD, -7117, -1, "HY000", "ls need rebuild");
|
DEFINE_ERROR(OB_LS_NEED_REBUILD, -7117, -1, "HY000", "ls need rebuild");
|
||||||
DEFINE_ERROR(OB_OBSOLETE_CLOG_NEED_SKIP, -7118, -1, "HY000", "obsolete clog need skip");
|
DEFINE_ERROR(OB_OBSOLETE_CLOG_NEED_SKIP, -7118, -1, "HY000", "obsolete clog need skip");
|
||||||
|
DEFINE_ERROR(OB_TRANSFER_WAIT_TRANSACTION_END_TIMEOUT, -7119, -1, "HY000", "transfer wait transactions end timeout");
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//error code for gis -7201 ---- -7300
|
//error code for gis -7201 ---- -7300
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -1406,6 +1406,7 @@ constexpr int OB_TRANSFER_SRC_LS_NOT_EXIST = -7115;
|
|||||||
constexpr int OB_TRANSFER_SRC_TABLET_NOT_EXIST = -7116;
|
constexpr int OB_TRANSFER_SRC_TABLET_NOT_EXIST = -7116;
|
||||||
constexpr int OB_LS_NEED_REBUILD = -7117;
|
constexpr int OB_LS_NEED_REBUILD = -7117;
|
||||||
constexpr int OB_OBSOLETE_CLOG_NEED_SKIP = -7118;
|
constexpr int OB_OBSOLETE_CLOG_NEED_SKIP = -7118;
|
||||||
|
constexpr int OB_TRANSFER_WAIT_TRANSACTION_END_TIMEOUT = -7119;
|
||||||
constexpr int OB_ERR_INVALID_XML_DATATYPE = -7402;
|
constexpr int OB_ERR_INVALID_XML_DATATYPE = -7402;
|
||||||
constexpr int OB_ERR_XML_MISSING_COMMA = -7403;
|
constexpr int OB_ERR_XML_MISSING_COMMA = -7403;
|
||||||
constexpr int OB_ERR_INVALID_XPATH_EXPRESSION = -7404;
|
constexpr int OB_ERR_INVALID_XPATH_EXPRESSION = -7404;
|
||||||
@ -3394,6 +3395,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
|
|||||||
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__USER_ERROR_MSG "transfer src tablet does not exist"
|
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__USER_ERROR_MSG "transfer src tablet does not exist"
|
||||||
#define OB_LS_NEED_REBUILD__USER_ERROR_MSG "ls need rebuild"
|
#define OB_LS_NEED_REBUILD__USER_ERROR_MSG "ls need rebuild"
|
||||||
#define OB_OBSOLETE_CLOG_NEED_SKIP__USER_ERROR_MSG "obsolete clog need skip"
|
#define OB_OBSOLETE_CLOG_NEED_SKIP__USER_ERROR_MSG "obsolete clog need skip"
|
||||||
|
#define OB_TRANSFER_WAIT_TRANSACTION_END_TIMEOUT__USER_ERROR_MSG "transfer wait transactions end timeout"
|
||||||
#define OB_ERR_GIS_DIFFERENT_SRIDS__USER_ERROR_MSG "Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
|
#define OB_ERR_GIS_DIFFERENT_SRIDS__USER_ERROR_MSG "Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
|
||||||
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__USER_ERROR_MSG "Calling geometry function %s with unsupported types of arguments."
|
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__USER_ERROR_MSG "Calling geometry function %s with unsupported types of arguments."
|
||||||
#define OB_ERR_GIS_UNKNOWN_ERROR__USER_ERROR_MSG "Unknown GIS error occurred in function %s."
|
#define OB_ERR_GIS_UNKNOWN_ERROR__USER_ERROR_MSG "Unknown GIS error occurred in function %s."
|
||||||
@ -5493,6 +5495,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
|
|||||||
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7116, transfer src tablet does not exist"
|
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7116, transfer src tablet does not exist"
|
||||||
#define OB_LS_NEED_REBUILD__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7117, ls need rebuild"
|
#define OB_LS_NEED_REBUILD__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7117, ls need rebuild"
|
||||||
#define OB_OBSOLETE_CLOG_NEED_SKIP__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7118, obsolete clog need skip"
|
#define OB_OBSOLETE_CLOG_NEED_SKIP__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7118, obsolete clog need skip"
|
||||||
|
#define OB_TRANSFER_WAIT_TRANSACTION_END_TIMEOUT__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7119, transfer wait transactions end timeout"
|
||||||
#define OB_ERR_GIS_DIFFERENT_SRIDS__ORA_USER_ERROR_MSG "ORA-00600: Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
|
#define OB_ERR_GIS_DIFFERENT_SRIDS__ORA_USER_ERROR_MSG "ORA-00600: Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
|
||||||
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__ORA_USER_ERROR_MSG "ORA-00600: Calling geometry function %s with unsupported types of arguments."
|
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__ORA_USER_ERROR_MSG "ORA-00600: Calling geometry function %s with unsupported types of arguments."
|
||||||
#define OB_ERR_GIS_UNKNOWN_ERROR__ORA_USER_ERROR_MSG "ORA-00600: Unknown GIS error occurred in function %s."
|
#define OB_ERR_GIS_UNKNOWN_ERROR__ORA_USER_ERROR_MSG "ORA-00600: Unknown GIS error occurred in function %s."
|
||||||
@ -5960,7 +5963,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
|
|||||||
#define OB_ERR_DATA_TOO_LONG_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-12899: value too large for column %.*s (actual: %ld, maximum: %ld)"
|
#define OB_ERR_DATA_TOO_LONG_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-12899: value too large for column %.*s (actual: %ld, maximum: %ld)"
|
||||||
#define OB_ERR_INVALID_DATE_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-01861: Incorrect datetime value for column '%.*s' at row %ld"
|
#define OB_ERR_INVALID_DATE_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-01861: Incorrect datetime value for column '%.*s' at row %ld"
|
||||||
|
|
||||||
extern int g_all_ob_errnos[2095];
|
extern int g_all_ob_errnos[2096];
|
||||||
|
|
||||||
const char *ob_error_name(const int oberr);
|
const char *ob_error_name(const int oberr);
|
||||||
const char* ob_error_cause(const int oberr);
|
const char* ob_error_cause(const int oberr);
|
||||||
|
@ -1486,6 +1486,10 @@ DEF_TIME(_balance_kill_transaction_threshold, OB_TENANT_PARAMETER, "100ms", "[1m
|
|||||||
"the time given to the transaction to execute when do balance"
|
"the time given to the transaction to execute when do balance"
|
||||||
"before it will be killed. Range: [1ms, 60s]",
|
"before it will be killed. Range: [1ms, 60s]",
|
||||||
ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||||
|
DEF_TIME(_balance_wait_killing_transaction_end_threshold, OB_TENANT_PARAMETER, "100ms", "[10ms, 60s]",
|
||||||
|
"the threshold for waiting time after killing transactions until they end."
|
||||||
|
"Range: [10ms, 60s]",
|
||||||
|
ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||||
DEF_BOOL(_enable_px_fast_reclaim, OB_CLUSTER_PARAMETER, "True",
|
DEF_BOOL(_enable_px_fast_reclaim, OB_CLUSTER_PARAMETER, "True",
|
||||||
"Enable the fast reclaim function through PX tasks deteting for survival by detect manager. The default value is True.",
|
"Enable the fast reclaim function through PX tasks deteting for survival by detect manager. The default value is True.",
|
||||||
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||||
|
@ -418,7 +418,6 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
|
|||||||
ObTimeoutCtx timeout_ctx;
|
ObTimeoutCtx timeout_ctx;
|
||||||
ObMySQLTransaction trans;
|
ObMySQLTransaction trans;
|
||||||
bool enable_kill_trx = false;
|
bool enable_kill_trx = false;
|
||||||
int64_t kill_trx_threshold = 0;
|
|
||||||
|
|
||||||
if (!is_inited_) {
|
if (!is_inited_) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
@ -450,14 +449,13 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
|
|||||||
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
|
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
|
||||||
if (tenant_config.is_valid()) {
|
if (tenant_config.is_valid()) {
|
||||||
enable_kill_trx = tenant_config->_enable_balance_kill_transaction;
|
enable_kill_trx = tenant_config->_enable_balance_kill_transaction;
|
||||||
kill_trx_threshold = tenant_config->_balance_kill_transaction_threshold;
|
|
||||||
}
|
}
|
||||||
if (OB_FAIL(ret)) {
|
if (OB_FAIL(ret)) {
|
||||||
} else if (OB_FAIL(lock_src_and_dest_ls_member_list_(task_info, task_info.src_ls_id_, task_info.dest_ls_id_))) {
|
} else if (OB_FAIL(lock_src_and_dest_ls_member_list_(task_info, task_info.src_ls_id_, task_info.dest_ls_id_))) {
|
||||||
LOG_WARN("failed to lock src and dest ls member list", K(ret), K(task_info));
|
LOG_WARN("failed to lock src and dest ls member list", K(ret), K(task_info));
|
||||||
} else if (!enable_kill_trx && OB_FAIL(check_src_ls_has_active_trans_(task_info.src_ls_id_))) {
|
} else if (!enable_kill_trx && OB_FAIL(check_src_ls_has_active_trans_(task_info.src_ls_id_))) {
|
||||||
LOG_WARN("failed to check src ls active trans", K(ret), K(task_info));
|
LOG_WARN("failed to check src ls active trans", K(ret), K(task_info));
|
||||||
} else if (OB_FAIL(block_and_kill_tx_(task_info, enable_kill_trx, kill_trx_threshold, timeout_ctx))) {
|
} else if (OB_FAIL(block_and_kill_tx_(task_info, enable_kill_trx, timeout_ctx))) {
|
||||||
LOG_WARN("failed to block and kill tx", K(ret), K(task_info));
|
LOG_WARN("failed to block and kill tx", K(ret), K(task_info));
|
||||||
} else if (OB_FAIL(check_start_status_transfer_tablets_(task_info))) {
|
} else if (OB_FAIL(check_start_status_transfer_tablets_(task_info))) {
|
||||||
LOG_WARN("failed to check start status transfer tablets", K(ret), K(task_info));
|
LOG_WARN("failed to check start status transfer tablets", K(ret), K(task_info));
|
||||||
@ -905,7 +903,11 @@ int ObTransferHandler::start_trans_(
|
|||||||
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id));
|
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id));
|
||||||
int64_t stmt_timeout = 10_s;
|
int64_t stmt_timeout = 10_s;
|
||||||
if (tenant_config.is_valid()) {
|
if (tenant_config.is_valid()) {
|
||||||
stmt_timeout = tenant_config->_transfer_start_trans_timeout + tenant_config->_balance_kill_transaction_threshold;
|
stmt_timeout = tenant_config->_transfer_start_trans_timeout;
|
||||||
|
if (tenant_config->_enable_balance_kill_transaction) {
|
||||||
|
stmt_timeout += tenant_config->_balance_kill_transaction_threshold;
|
||||||
|
stmt_timeout += tenant_config->_balance_wait_killing_transaction_end_threshold;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_inited_) {
|
if (!is_inited_) {
|
||||||
@ -1746,22 +1748,34 @@ int ObTransferHandler::do_worker_transfer_()
|
|||||||
int ObTransferHandler::block_and_kill_tx_(
|
int ObTransferHandler::block_and_kill_tx_(
|
||||||
const share::ObTransferTaskInfo &task_info,
|
const share::ObTransferTaskInfo &task_info,
|
||||||
const bool enable_kill_trx,
|
const bool enable_kill_trx,
|
||||||
const int64_t kill_trx_threshold,
|
|
||||||
ObTimeoutCtx &timeout_ctx)
|
ObTimeoutCtx &timeout_ctx)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
const uint64_t tenant_id = task_info.tenant_id_;
|
const uint64_t tenant_id = task_info.tenant_id_;
|
||||||
const share::ObLSID &src_ls_id = task_info.src_ls_id_;
|
const share::ObLSID &src_ls_id = task_info.src_ls_id_;
|
||||||
const int64_t start_ts = ObTimeUtil::current_time();
|
const int64_t start_ts = ObTimeUtil::current_time();
|
||||||
|
int64_t before_kill_trx_threshold = 0;
|
||||||
|
int64_t after_kill_trx_threshold = 0;
|
||||||
|
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
|
||||||
|
int64_t active_trans_count = 0;
|
||||||
|
if (tenant_config.is_valid()) {
|
||||||
|
before_kill_trx_threshold = tenant_config->_balance_kill_transaction_threshold;
|
||||||
|
after_kill_trx_threshold = tenant_config->_balance_wait_killing_transaction_end_threshold;
|
||||||
|
}
|
||||||
|
|
||||||
if (OB_FAIL(block_tx_(tenant_id, src_ls_id))) {
|
if (OB_FAIL(block_tx_(tenant_id, src_ls_id))) {
|
||||||
LOG_WARN("failed to block tx", K(ret), K(task_info));
|
LOG_WARN("failed to block tx", K(ret), K(task_info));
|
||||||
} else if (!enable_kill_trx) {
|
} else if (!enable_kill_trx) {
|
||||||
LOG_INFO("transfer no need kill tx", K(task_info));
|
if (OB_FAIL(get_ls_active_trans_count_(src_ls_id, active_trans_count))) {
|
||||||
} else if (OB_FAIL(check_for_kill_(tenant_id, src_ls_id, kill_trx_threshold, false/*is_after_kill*/, timeout_ctx))) {
|
LOG_WARN("failed to get src ls has active trans", K(ret));
|
||||||
LOG_WARN("failed to check before kill", K(ret));
|
} else if (0 != active_trans_count) {
|
||||||
} else if (OB_FAIL(kill_tx_(tenant_id, src_ls_id))) {
|
ret = OB_TRANSFER_WAIT_TRANSACTION_END_TIMEOUT;
|
||||||
LOG_WARN("failed to kill tx", K(ret));
|
LOG_WARN("transfer src ls still has active transactions, cannot do transfer", K(ret), K(src_ls_id),
|
||||||
} else if (OB_FAIL(check_for_kill_(tenant_id, src_ls_id, kill_trx_threshold, true/*is_after_kill*/, timeout_ctx))) {
|
K(active_trans_count));
|
||||||
|
}
|
||||||
|
} else if (OB_FAIL(check_and_kill_tx_(tenant_id, src_ls_id, before_kill_trx_threshold, false/*with_trans_kill*/, timeout_ctx))) {
|
||||||
|
LOG_WARN("failed to check after kill", K(ret));
|
||||||
|
} else if (OB_FAIL(check_and_kill_tx_(tenant_id, src_ls_id, after_kill_trx_threshold, true/*with_trans_kill*/, timeout_ctx))) {
|
||||||
LOG_WARN("failed to check after kill", K(ret));
|
LOG_WARN("failed to check after kill", K(ret));
|
||||||
} else {
|
} else {
|
||||||
LOG_INFO("[TRANSFER] success to block and kill tx", "cost", ObTimeUtil::current_time() - start_ts);
|
LOG_INFO("[TRANSFER] success to block and kill tx", "cost", ObTimeUtil::current_time() - start_ts);
|
||||||
@ -1773,11 +1787,11 @@ int ObTransferHandler::block_and_kill_tx_(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ObTransferHandler::check_for_kill_(
|
int ObTransferHandler::check_and_kill_tx_(
|
||||||
const uint64_t tenant_id,
|
const uint64_t tenant_id,
|
||||||
const share::ObLSID &ls_id,
|
const share::ObLSID &ls_id,
|
||||||
const int64_t timeout,
|
const int64_t timeout,
|
||||||
const bool is_after_kill,
|
const bool with_trans_kill,
|
||||||
ObTimeoutCtx &timeout_ctx)
|
ObTimeoutCtx &timeout_ctx)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
@ -1790,9 +1804,9 @@ int ObTransferHandler::check_for_kill_(
|
|||||||
ret = OB_TIMEOUT;
|
ret = OB_TIMEOUT;
|
||||||
LOG_WARN("trans ctx already timeout", K(ret));
|
LOG_WARN("trans ctx already timeout", K(ret));
|
||||||
} else if (cur_ts - start_ts > timeout) {
|
} else if (cur_ts - start_ts > timeout) {
|
||||||
if (is_after_kill) {
|
if (with_trans_kill) {
|
||||||
ret = OB_TIMEOUT;
|
ret = OB_TRANSFER_WAIT_TRANSACTION_END_TIMEOUT;
|
||||||
LOG_WARN("check active trans after kill timeout", K(cur_ts), K(start_ts));
|
LOG_WARN("wait active trans finish timeout", K(ret), K(cur_ts), K(start_ts));
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1806,6 +1820,13 @@ int ObTransferHandler::check_for_kill_(
|
|||||||
LOG_WARN("failed to get src ls has active trans", K(ret));
|
LOG_WARN("failed to get src ls has active trans", K(ret));
|
||||||
} else if (0 != active_trans_count) {
|
} else if (0 != active_trans_count) {
|
||||||
LOG_INFO("still has active trans", K(tenant_id), K(ls_id), K(active_trans_count));
|
LOG_INFO("still has active trans", K(tenant_id), K(ls_id), K(active_trans_count));
|
||||||
|
if (with_trans_kill && OB_FAIL(kill_tx_(tenant_id, ls_id))) {
|
||||||
|
if (OB_EAGAIN == ret) {
|
||||||
|
ret = OB_SUCCESS;
|
||||||
|
} else {
|
||||||
|
LOG_WARN("failed to kill tx", K(ret), K(tenant_id), K(ls_id));
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -191,16 +191,15 @@ private:
|
|||||||
int block_and_kill_tx_(
|
int block_and_kill_tx_(
|
||||||
const share::ObTransferTaskInfo &task_info,
|
const share::ObTransferTaskInfo &task_info,
|
||||||
const bool enable_kill_trx,
|
const bool enable_kill_trx,
|
||||||
const int64_t kill_trx_threshold,
|
|
||||||
ObTimeoutCtx &timeout_ctx);
|
ObTimeoutCtx &timeout_ctx);
|
||||||
int block_tx_(
|
int block_tx_(
|
||||||
const uint64_t tenant_id,
|
const uint64_t tenant_id,
|
||||||
const share::ObLSID &ls_id);
|
const share::ObLSID &ls_id);
|
||||||
int check_for_kill_(
|
int check_and_kill_tx_(
|
||||||
const uint64_t tenant_id,
|
const uint64_t tenant_id,
|
||||||
const share::ObLSID &ls_id,
|
const share::ObLSID &ls_id,
|
||||||
const int64_t timeout,
|
const int64_t timeout,
|
||||||
const bool is_after_kill,
|
const bool with_trans_kill,
|
||||||
ObTimeoutCtx &timeout_ctx);
|
ObTimeoutCtx &timeout_ctx);
|
||||||
int kill_tx_(
|
int kill_tx_(
|
||||||
const uint64_t tenant_id,
|
const uint64_t tenant_id,
|
||||||
|
@ -243,6 +243,7 @@ _backup_idle_time
|
|||||||
_backup_task_keep_alive_interval
|
_backup_task_keep_alive_interval
|
||||||
_backup_task_keep_alive_timeout
|
_backup_task_keep_alive_timeout
|
||||||
_balance_kill_transaction_threshold
|
_balance_kill_transaction_threshold
|
||||||
|
_balance_wait_killing_transaction_end_threshold
|
||||||
_bloom_filter_enabled
|
_bloom_filter_enabled
|
||||||
_bloom_filter_ratio
|
_bloom_filter_ratio
|
||||||
_cache_wash_interval
|
_cache_wash_interval
|
||||||
|
Reference in New Issue
Block a user