[NetStandby] Add retry when query primary failed and modify log level for several specific error numbers

This commit is contained in:
obdev
2023-07-13 06:42:20 +00:00
committed by ob-robot
parent 67fbfa127f
commit 075115e5dc
3 changed files with 249 additions and 222 deletions

View File

@ -32,6 +32,11 @@ namespace oceanbase
{ {
namespace logfetcher namespace logfetcher
{ {
#define IS_WARN_LOG_LEVEL(arg) \
if (OB_TIMEOUT == arg \
|| OB_TENANT_NOT_EXIST == arg \
|| OB_TENANT_NOT_IN_SERVER == arg \
|| OB_IN_STOP_STATE == arg) \
int64_t FetchStream::g_rpc_timeout = ObLogFetcherConfig::default_fetch_log_rpc_timeout_sec * _SEC_; int64_t FetchStream::g_rpc_timeout = ObLogFetcherConfig::default_fetch_log_rpc_timeout_sec * _SEC_;
int64_t FetchStream::g_dml_progress_limit = ObLogFetcherConfig::default_progress_limit_sec_for_dml * _SEC_; int64_t FetchStream::g_dml_progress_limit = ObLogFetcherConfig::default_progress_limit_sec_for_dml * _SEC_;
@ -1238,15 +1243,13 @@ int FetchStream::handle_fetch_log_error_(
need_kick_out = true; need_kick_out = true;
kick_out_reason = FETCH_LOG_FAIL_ON_RPC; kick_out_reason = FETCH_LOG_FAIL_ON_RPC;
if (OB_NOT_NULL(ls_fetch_ctx_)) { if (OB_NOT_NULL(ls_fetch_ctx_)) {
if (OB_IN_STOP_STATE != rcode.rcode_) {
ls_fetch_ctx_->handle_error(ls_fetch_ctx_->get_tls_id().get_ls_id(), ls_fetch_ctx_->handle_error(ls_fetch_ctx_->get_tls_id().get_ls_id(),
IObLogErrHandler::ErrType::FETCH_LOG, IObLogErrHandler::ErrType::FETCH_LOG,
trace_id, trace_id,
ls_fetch_ctx_->get_next_lsn(), ls_fetch_ctx_->get_next_lsn(),
rcode.rcode_, rcode.rcode_,
"%s"); "%s");
} IS_WARN_LOG_LEVEL(rcode.rcode_) {
if (OB_TIMEOUT == rcode.rcode_) {
LOG_WARN("fetch log fail on rpc, need_switch_server", K(svr_), K(rcode), "fetch_stream", this); LOG_WARN("fetch log fail on rpc, need_switch_server", K(svr_), K(rcode), "fetch_stream", this);
} else { } else {
LOG_ERROR("fetch log fail on rpc, need_switch_server", K(svr_), K(rcode), "fetch_stream", this); LOG_ERROR("fetch log fail on rpc, need_switch_server", K(svr_), K(rcode), "fetch_stream", this);
@ -1259,15 +1262,13 @@ int FetchStream::handle_fetch_log_error_(
need_kick_out = true; need_kick_out = true;
kick_out_reason = FETCH_LOG_FAIL_ON_SERVER; kick_out_reason = FETCH_LOG_FAIL_ON_SERVER;
if (OB_NOT_NULL(ls_fetch_ctx_)) { if (OB_NOT_NULL(ls_fetch_ctx_)) {
if (OB_IN_STOP_STATE != resp.get_err()) {
ls_fetch_ctx_->handle_error(ls_fetch_ctx_->get_tls_id().get_ls_id(), ls_fetch_ctx_->handle_error(ls_fetch_ctx_->get_tls_id().get_ls_id(),
IObLogErrHandler::ErrType::FETCH_LOG, IObLogErrHandler::ErrType::FETCH_LOG,
trace_id, trace_id,
ls_fetch_ctx_->get_next_lsn(), ls_fetch_ctx_->get_next_lsn(),
resp.get_err(), resp.get_err(),
"%s"); "%s");
} IS_WARN_LOG_LEVEL(resp.get_err()) {
if (OB_TIMEOUT == resp.get_err()) {
LOG_WARN("fetch log fail on server, need_switch_server", "fetch_stream", this, K(svr_), LOG_WARN("fetch log fail on server, need_switch_server", "fetch_stream", this, K(svr_),
"svr_err", resp.get_err(), "svr_debug_err", resp.get_debug_err(), "svr_err", resp.get_err(), "svr_debug_err", resp.get_debug_err(),
K(rcode), K(resp)); K(rcode), K(resp));

View File

@ -513,8 +513,11 @@ void ObLogRestoreHandler::mark_error(share::ObTaskId &trace_id,
context_.error_context_.ret_code_ = ret_code; context_.error_context_.ret_code_ = ret_code;
context_.error_context_.trace_id_.set(trace_id); context_.error_context_.trace_id_.set(trace_id);
context_.error_context_.err_lsn_ = lsn; context_.error_context_.err_lsn_ = lsn;
if (OB_TIMEOUT == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) { if ((OB_TIMEOUT == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
CLOG_LOG(WARN, "fetch log timeout in restore", KPC(parent_), KPC(this)); || (OB_TENANT_NOT_EXIST == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|| (OB_TENANT_NOT_IN_SERVER == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|| (OB_IN_STOP_STATE == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)) {
CLOG_LOG(WARN, "fetch log failed in restore", KPC(parent_), KPC(this));
} else if(OB_SUCCESS != ret_code) { } else if(OB_SUCCESS != ret_code) {
CLOG_LOG(ERROR, "fatal error occur in restore", KPC(parent_), KPC(this)); CLOG_LOG(ERROR, "fatal error occur in restore", KPC(parent_), KPC(this));
} }

View File

@ -48,7 +48,8 @@ namespace share
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args ", please check the privileges"); \ LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args ", please check the privileges"); \
break; \ break; \
case OB_ERR_NULL_VALUE: \ case OB_ERR_NULL_VALUE: \
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args ", primary may not be ready"); \ case OB_ERR_WAIT_REMOTE_SCHEMA_REFRESH: \
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args ", query primary failed"); \
break; \ break; \
case -ER_CONNECT_FAILED: \ case -ER_CONNECT_FAILED: \
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args ", please check the network"); \ LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args ", please check the network"); \
@ -57,6 +58,13 @@ namespace share
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args); \ LOG_USER_ERROR(OB_INVALID_ARGUMENT, "get primary " args); \
} \ } \
#define RESTORE_RETRY(arg) \
int64_t retry_time = 0; \
do { \
arg \
} while (OB_FAIL(ret) && retry_time++ < server_prover_.get_server_count() - 1);
ObLogRestoreMySQLProvider::ObLogRestoreMySQLProvider() : server_list_() {} ObLogRestoreMySQLProvider::ObLogRestoreMySQLProvider() : server_list_() {}
ObLogRestoreMySQLProvider::~ObLogRestoreMySQLProvider() ObLogRestoreMySQLProvider::~ObLogRestoreMySQLProvider()
@ -288,6 +296,7 @@ int ObLogRestoreProxyUtil::try_init(const uint64_t tenant_id,
const char *ORACLE_DB = "SYS"; const char *ORACLE_DB = "SYS";
if (OB_SUCC(init(tenant_id, server_list, user_name, user_password, MYSQL_DB))) { if (OB_SUCC(init(tenant_id, server_list, user_name, user_password, MYSQL_DB))) {
RESTORE_RETRY(
SMART_VAR(ObMySQLProxy::MySQLResult, result) { SMART_VAR(ObMySQLProxy::MySQLResult, result) {
ObSqlString sql; ObSqlString sql;
if (OB_FAIL(sql.assign_fmt("SELECT 1"))) { if (OB_FAIL(sql.assign_fmt("SELECT 1"))) {
@ -303,11 +312,13 @@ int ObLogRestoreProxyUtil::try_init(const uint64_t tenant_id,
LOG_INFO("proxy connect to primary oceanabse db success"); LOG_INFO("proxy connect to primary oceanabse db success");
} }
} }
)
} }
if (OB_FAIL(ret)) { if (OB_FAIL(ret)) {
LOG_WARN("proxy connect to primary oceanbase db failed, then try connect to sys db"); LOG_WARN("proxy connect to primary oceanbase db failed, then try connect to sys db");
(void)destroy(); (void)destroy();
if (OB_SUCC(init(tenant_id, server_list, user_name, user_password, ORACLE_DB))) { if (OB_SUCC(init(tenant_id, server_list, user_name, user_password, ORACLE_DB))) {
RESTORE_RETRY(
SMART_VAR(ObMySQLProxy::MySQLResult, result) { SMART_VAR(ObMySQLProxy::MySQLResult, result) {
ObSqlString sql; ObSqlString sql;
if (OB_FAIL(sql.assign_fmt("SELECT 1 FROM DUAL"))) { if (OB_FAIL(sql.assign_fmt("SELECT 1 FROM DUAL"))) {
@ -323,6 +334,7 @@ int ObLogRestoreProxyUtil::try_init(const uint64_t tenant_id,
LOG_INFO("proxy connect to sys db success"); LOG_INFO("proxy connect to sys db success");
} }
} }
)
} }
} }
if (OB_FAIL(ret)) { if (OB_FAIL(ret)) {
@ -336,6 +348,7 @@ int ObLogRestoreProxyUtil::try_init(const uint64_t tenant_id,
int ObLogRestoreProxyUtil::get_tenant_id(char *tenant_name, uint64_t &tenant_id) int ObLogRestoreProxyUtil::get_tenant_id(char *tenant_name, uint64_t &tenant_id)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
RESTORE_RETRY(
SMART_VAR(ObMySQLProxy::MySQLResult, result) { SMART_VAR(ObMySQLProxy::MySQLResult, result) {
ObSqlString sql; ObSqlString sql;
if (OB_FAIL(sql.assign_fmt("SELECT TENANT_ID FROM %s WHERE TENANT_NAME='%s'", if (OB_FAIL(sql.assign_fmt("SELECT TENANT_ID FROM %s WHERE TENANT_NAME='%s'",
@ -356,6 +369,7 @@ int ObLogRestoreProxyUtil::get_tenant_id(char *tenant_name, uint64_t &tenant_id)
} }
} }
} }
)
if (OB_FAIL(ret)) { if (OB_FAIL(ret)) {
LOG_WARN("failed to get tenant id result"); LOG_WARN("failed to get tenant id result");
RESTORE_PROXY_USER_ERROR("tenant id"); RESTORE_PROXY_USER_ERROR("tenant id");
@ -366,6 +380,7 @@ int ObLogRestoreProxyUtil::get_tenant_id(char *tenant_name, uint64_t &tenant_id)
int ObLogRestoreProxyUtil::get_cluster_id(uint64_t tenant_id, int64_t &cluster_id) int ObLogRestoreProxyUtil::get_cluster_id(uint64_t tenant_id, int64_t &cluster_id)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
RESTORE_RETRY(
SMART_VAR(ObMySQLProxy::MySQLResult, result) { SMART_VAR(ObMySQLProxy::MySQLResult, result) {
ObSqlString sql; ObSqlString sql;
if (OB_FAIL(sql.assign_fmt("SELECT VALUE FROM %s WHERE NAME='cluster_id'", OB_GV_OB_PARAMETERS_TNAME))) { if (OB_FAIL(sql.assign_fmt("SELECT VALUE FROM %s WHERE NAME='cluster_id'", OB_GV_OB_PARAMETERS_TNAME))) {
@ -376,7 +391,7 @@ int ObLogRestoreProxyUtil::get_cluster_id(uint64_t tenant_id, int64_t &cluster_i
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("query result is null", K(sql)); LOG_WARN("query result is null", K(sql));
} else if (OB_FAIL(result.get_result()->next())) { } else if (OB_FAIL(result.get_result()->next())) {
LOG_WARN("get result next failed", K(tenant_id), K(sql)); LOG_WARN("get result next failed", K(sql));
} else { } else {
EXTRACT_INT_FIELD_MYSQL(*result.get_result(), "VALUE", cluster_id, int64_t); EXTRACT_INT_FIELD_MYSQL(*result.get_result(), "VALUE", cluster_id, int64_t);
@ -385,6 +400,7 @@ int ObLogRestoreProxyUtil::get_cluster_id(uint64_t tenant_id, int64_t &cluster_i
} }
} }
} }
)
if (OB_FAIL(ret)) { if (OB_FAIL(ret)) {
LOG_WARN("fail to get cluster id result"); LOG_WARN("fail to get cluster id result");
RESTORE_PROXY_USER_ERROR("cluster id"); RESTORE_PROXY_USER_ERROR("cluster id");
@ -397,6 +413,7 @@ int ObLogRestoreProxyUtil::get_compatibility_mode(const uint64_t tenant_id, ObCo
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
const char *MYSQL_STR = "MYSQL"; const char *MYSQL_STR = "MYSQL";
const char *ORACLE_STR = "ORACLE"; const char *ORACLE_STR = "ORACLE";
RESTORE_RETRY(
SMART_VAR(ObMySQLProxy::MySQLResult, result) { SMART_VAR(ObMySQLProxy::MySQLResult, result) {
ObSqlString sql; ObSqlString sql;
if (OB_FAIL(sql.assign_fmt("SELECT COMPATIBILITY_MODE FROM %s WHERE TENANT_ID=%ld", if (OB_FAIL(sql.assign_fmt("SELECT COMPATIBILITY_MODE FROM %s WHERE TENANT_ID=%ld",
@ -408,7 +425,7 @@ int ObLogRestoreProxyUtil::get_compatibility_mode(const uint64_t tenant_id, ObCo
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("query result is null", K(tenant_id), K(sql)); LOG_WARN("query result is null", K(tenant_id), K(sql));
} else if (OB_FAIL(result.get_result()->next())) { } else if (OB_FAIL(result.get_result()->next())) {
LOG_WARN("get result next failed", K(tenant_id), K(sql)); LOG_WARN("get result next failed", K(sql));
} else { } else {
ObString tmp_compat_mode; ObString tmp_compat_mode;
char compact[OB_MAX_COMPAT_MODE_STR_LEN + 1] = { 0 }; char compact[OB_MAX_COMPAT_MODE_STR_LEN + 1] = { 0 };
@ -430,6 +447,7 @@ int ObLogRestoreProxyUtil::get_compatibility_mode(const uint64_t tenant_id, ObCo
} }
} }
} }
)
return ret; return ret;
} }
@ -441,6 +459,7 @@ int ObLogRestoreProxyUtil::check_begin_lsn(const uint64_t tenant_id)
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(tenant_id)); LOG_WARN("invalid argument", KR(ret), K(tenant_id));
} else { } else {
RESTORE_RETRY(
SMART_VAR(ObMySQLProxy::MySQLResult, result) { SMART_VAR(ObMySQLProxy::MySQLResult, result) {
ObSqlString sql; ObSqlString sql;
if (OB_FAIL(sql.assign_fmt("SELECT COUNT(*) AS CNT FROM %s OB_LS LEFT JOIN" if (OB_FAIL(sql.assign_fmt("SELECT COUNT(*) AS CNT FROM %s OB_LS LEFT JOIN"
@ -458,10 +477,7 @@ int ObLogRestoreProxyUtil::check_begin_lsn(const uint64_t tenant_id)
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("config result is null", KR(ret), K(tenant_id), K(sql)); LOG_WARN("config result is null", KR(ret), K(tenant_id), K(sql));
} else if (OB_FAIL(result.get_result()->next())) { } else if (OB_FAIL(result.get_result()->next())) {
LOG_WARN("get result next failed", KR(ret), K(tenant_id), K(sql)); LOG_WARN("get result next failed", K(sql));
} else if (OB_ISNULL(result.get_result())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("result is null", KR(ret), K(tenant_id), K(sql));
} else { } else {
uint64_t cnt = 0; uint64_t cnt = 0;
EXTRACT_INT_FIELD_MYSQL(*result.get_result(), "CNT", cnt, uint64_t); EXTRACT_INT_FIELD_MYSQL(*result.get_result(), "CNT", cnt, uint64_t);
@ -474,6 +490,7 @@ int ObLogRestoreProxyUtil::check_begin_lsn(const uint64_t tenant_id)
} }
} }
} }
)
} }
return ret; return ret;
} }
@ -481,6 +498,7 @@ int ObLogRestoreProxyUtil::check_begin_lsn(const uint64_t tenant_id)
int ObLogRestoreProxyUtil::get_server_ip_list(const uint64_t tenant_id, common::ObArray<common::ObAddr> &addrs) int ObLogRestoreProxyUtil::get_server_ip_list(const uint64_t tenant_id, common::ObArray<common::ObAddr> &addrs)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
RESTORE_RETRY(
SMART_VAR(ObMySQLProxy::MySQLResult ,result) { SMART_VAR(ObMySQLProxy::MySQLResult ,result) {
ObSqlString sql; ObSqlString sql;
ObMySQLResult *res = NULL; ObMySQLResult *res = NULL;
@ -520,6 +538,7 @@ int ObLogRestoreProxyUtil::get_server_ip_list(const uint64_t tenant_id, common::
} }
} }
} }
)
return ret; return ret;
} }
@ -539,6 +558,7 @@ int ObLogRestoreProxyUtil::get_tenant_info(ObTenantRole &role, schema::ObTenantS
if (OB_UNLIKELY(!inited_)) { if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT; ret = OB_NOT_INIT;
} else { } else {
RESTORE_RETRY(
SMART_VAR(common::ObMySQLProxy::MySQLResult, res) { SMART_VAR(common::ObMySQLProxy::MySQLResult, res) {
common::sqlclient::ObMySQLResult *result = NULL; common::sqlclient::ObMySQLResult *result = NULL;
common::ObSqlString sql; common::ObSqlString sql;
@ -566,6 +586,7 @@ int ObLogRestoreProxyUtil::get_tenant_info(ObTenantRole &role, schema::ObTenantS
} }
} }
} }
)
} }
return ret; return ret;
} }
@ -583,6 +604,7 @@ int ObLogRestoreProxyUtil::get_max_log_info(const ObLSID &id, palf::AccessMode &
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
LOG_WARN("invlaid argument", K(id)); LOG_WARN("invlaid argument", K(id));
} else { } else {
RESTORE_RETRY(
SMART_VAR(common::ObMySQLProxy::MySQLResult, res) { SMART_VAR(common::ObMySQLProxy::MySQLResult, res) {
common::sqlclient::ObMySQLResult *result = NULL; common::sqlclient::ObMySQLResult *result = NULL;
common::ObSqlString sql; common::ObSqlString sql;
@ -611,6 +633,7 @@ int ObLogRestoreProxyUtil::get_max_log_info(const ObLSID &id, palf::AccessMode &
} }
} }
} }
)
} }
return ret; return ret;
} }