use NetKeepAlive for px check server alive
This commit is contained in:
@ -30,8 +30,8 @@
|
|||||||
#include "sql/dtl/ob_dtl_interm_result_manager.h"
|
#include "sql/dtl/ob_dtl_interm_result_manager.h"
|
||||||
#include "sql/dtl/ob_dtl_channel_loop.h"
|
#include "sql/dtl/ob_dtl_channel_loop.h"
|
||||||
#include "sql/dtl/ob_dtl_channel_watcher.h"
|
#include "sql/dtl/ob_dtl_channel_watcher.h"
|
||||||
#include "share/ob_server_blacklist.h"
|
|
||||||
#include "observer/omt/ob_th_worker.h"
|
#include "observer/omt/ob_th_worker.h"
|
||||||
|
#include "sql/engine/px/ob_px_util.h"
|
||||||
#include "sql/session/ob_sql_session_info.h"
|
#include "sql/session/ob_sql_session_info.h"
|
||||||
|
|
||||||
using namespace oceanbase::common;
|
using namespace oceanbase::common;
|
||||||
@ -850,8 +850,7 @@ int ObDtlBasicChannel::wait_unblocking()
|
|||||||
LOG_WARN("worker interrupt", K(tmp_ret), K(ret));
|
LOG_WARN("worker interrupt", K(tmp_ret), K(ret));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
|
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(peer_,
|
||||||
share::ObCascadMember(peer_, GCONF.cluster_id), true,
|
|
||||||
channel_loop_->get_process_query_time()))) {
|
channel_loop_->get_process_query_time()))) {
|
||||||
ret = OB_RPC_CONNECT_ERROR;
|
ret = OB_RPC_CONNECT_ERROR;
|
||||||
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(peer_),
|
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(peer_),
|
||||||
|
|||||||
@ -15,9 +15,9 @@
|
|||||||
#include "share/interrupt/ob_global_interrupt_call.h"
|
#include "share/interrupt/ob_global_interrupt_call.h"
|
||||||
#include "observer/omt/ob_tenant_config_mgr.h"
|
#include "observer/omt/ob_tenant_config_mgr.h"
|
||||||
#include "share/diagnosis/ob_sql_monitor_statname.h"
|
#include "share/diagnosis/ob_sql_monitor_statname.h"
|
||||||
#include "share/ob_server_blacklist.h"
|
|
||||||
#include "observer/omt/ob_th_worker.h"
|
#include "observer/omt/ob_th_worker.h"
|
||||||
#include "share/ob_occam_time_guard.h"
|
#include "share/ob_occam_time_guard.h"
|
||||||
|
#include "sql/engine/px/ob_px_util.h"
|
||||||
|
|
||||||
using namespace oceanbase::common;
|
using namespace oceanbase::common;
|
||||||
|
|
||||||
@ -359,9 +359,7 @@ int ObDtlChannelLoop::process_channels(ObIDltChannelLoopPred *pred, int64_t &nth
|
|||||||
LOG_WARN("unexpect next idx", K(next_idx_), K(chan_cnt), K(ret));
|
LOG_WARN("unexpect next idx", K(next_idx_), K(chan_cnt), K(ret));
|
||||||
} else {
|
} else {
|
||||||
chan = chans_[next_idx_];
|
chan = chans_[next_idx_];
|
||||||
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
|
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(chan->get_peer(), get_process_query_time()))) {
|
||||||
share::ObCascadMember(chan->get_peer(), GCONF.cluster_id), true,
|
|
||||||
get_process_query_time()))) {
|
|
||||||
ret = OB_RPC_CONNECT_ERROR;
|
ret = OB_RPC_CONNECT_ERROR;
|
||||||
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(chan->get_peer()),
|
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(chan->get_peer()),
|
||||||
K(static_cast<int64_t>(GCONF.cluster_id)));
|
K(static_cast<int64_t>(GCONF.cluster_id)));
|
||||||
@ -415,8 +413,7 @@ int ObDtlChannelLoop::process_channel(int64_t &nth_channel)
|
|||||||
}
|
}
|
||||||
ObDtlChannel *ch = sentinel_node_.next_link_;
|
ObDtlChannel *ch = sentinel_node_.next_link_;
|
||||||
while (OB_EAGAIN == ret && ch != &sentinel_node_) {
|
while (OB_EAGAIN == ret && ch != &sentinel_node_) {
|
||||||
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
|
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(ch->get_peer(),
|
||||||
share::ObCascadMember(ch->get_peer(), GCONF.cluster_id), true,
|
|
||||||
get_process_query_time()))) {
|
get_process_query_time()))) {
|
||||||
ret = OB_RPC_CONNECT_ERROR;
|
ret = OB_RPC_CONNECT_ERROR;
|
||||||
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(ch->get_peer()),
|
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(ch->get_peer()),
|
||||||
|
|||||||
@ -21,7 +21,6 @@
|
|||||||
#include "sql/engine/px/ob_px_dtl_msg.h"
|
#include "sql/engine/px/ob_px_dtl_msg.h"
|
||||||
#include "sql/engine/px/ob_px_rpc_processor.h"
|
#include "sql/engine/px/ob_px_rpc_processor.h"
|
||||||
#include "sql/engine/px/ob_px_sqc_async_proxy.h"
|
#include "sql/engine/px/ob_px_sqc_async_proxy.h"
|
||||||
#include "share/ob_server_blacklist.h"
|
|
||||||
|
|
||||||
using namespace oceanbase::common;
|
using namespace oceanbase::common;
|
||||||
using namespace oceanbase::share;
|
using namespace oceanbase::share;
|
||||||
@ -489,9 +488,7 @@ int ObSerialDfoScheduler::dispatch_sqcs(ObExecContext &exec_ctx,
|
|||||||
ObPxSqcMeta &sqc = *sqcs.at(idx);
|
ObPxSqcMeta &sqc = *sqcs.at(idx);
|
||||||
const ObAddr &addr = sqc.get_exec_addr();
|
const ObAddr &addr = sqc.get_exec_addr();
|
||||||
auto proxy = coord_info_.rpc_proxy_.to(addr);
|
auto proxy = coord_info_.rpc_proxy_.to(addr);
|
||||||
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
|
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(addr, session->get_process_query_time()))) {
|
||||||
share::ObCascadMember(addr, cluster_id), true /* add_server */,
|
|
||||||
session->get_process_query_time()))) {
|
|
||||||
if (!ignore_vtable_error) {
|
if (!ignore_vtable_error) {
|
||||||
ret = OB_RPC_CONNECT_ERROR;
|
ret = OB_RPC_CONNECT_ERROR;
|
||||||
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(sqc), K(cluster_id),
|
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(sqc), K(cluster_id),
|
||||||
|
|||||||
@ -14,7 +14,6 @@
|
|||||||
|
|
||||||
#include "ob_px_data_ch_provider.h"
|
#include "ob_px_data_ch_provider.h"
|
||||||
#include "sql/engine/px/ob_px_util.h"
|
#include "sql/engine/px/ob_px_util.h"
|
||||||
#include "share/ob_server_blacklist.h"
|
|
||||||
|
|
||||||
|
|
||||||
using namespace oceanbase::common;
|
using namespace oceanbase::common;
|
||||||
@ -426,9 +425,7 @@ int ObPxChProviderUtil::check_status(int64_t timeout_ts, const ObAddr &qc_addr,
|
|||||||
} else if (timeout_ts <= ObTimeUtility::current_time()) {
|
} else if (timeout_ts <= ObTimeUtility::current_time()) {
|
||||||
ret = OB_TIMEOUT;
|
ret = OB_TIMEOUT;
|
||||||
LOG_WARN("timeout and abort", K(timeout_ts), K(ret));
|
LOG_WARN("timeout and abort", K(timeout_ts), K(ret));
|
||||||
} else if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
|
} else if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(qc_addr, query_start_time))) {
|
||||||
share::ObCascadMember(qc_addr, GCONF.cluster_id), true,
|
|
||||||
query_start_time))) {
|
|
||||||
ret = OB_RPC_CONNECT_ERROR;
|
ret = OB_RPC_CONNECT_ERROR;
|
||||||
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(qc_addr),
|
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(qc_addr),
|
||||||
K(static_cast<int64_t>(GCONF.cluster_id)));
|
K(static_cast<int64_t>(GCONF.cluster_id)));
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
#define USING_LOG_PREFIX SQL_ENG
|
#define USING_LOG_PREFIX SQL_ENG
|
||||||
|
|
||||||
#include "sql/engine/px/ob_px_sqc_async_proxy.h"
|
#include "sql/engine/px/ob_px_sqc_async_proxy.h"
|
||||||
#include "share/ob_server_blacklist.h"
|
#include "sql/engine/px/ob_px_util.h"
|
||||||
|
|
||||||
namespace oceanbase {
|
namespace oceanbase {
|
||||||
using namespace common;
|
using namespace common;
|
||||||
@ -68,9 +68,7 @@ int ObPxSqcAsyncProxy::launch_all_rpc_request() {
|
|||||||
args.enable_serialize_cache();
|
args.enable_serialize_cache();
|
||||||
}
|
}
|
||||||
ARRAY_FOREACH_X(sqcs_, idx, count, OB_SUCC(ret)) {
|
ARRAY_FOREACH_X(sqcs_, idx, count, OB_SUCC(ret)) {
|
||||||
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
|
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(sqcs_.at(idx)->get_exec_addr(),
|
||||||
share::ObCascadMember(sqcs_.at(idx)->get_exec_addr(), cluster_id),
|
|
||||||
true /* add_server */,
|
|
||||||
session_->get_process_query_time()))) {
|
session_->get_process_query_time()))) {
|
||||||
ret = OB_RPC_CONNECT_ERROR;
|
ret = OB_RPC_CONNECT_ERROR;
|
||||||
LOG_WARN("peer no in communication, maybe crashed", K(ret),
|
LOG_WARN("peer no in communication, maybe crashed", K(ret),
|
||||||
|
|||||||
@ -28,9 +28,9 @@
|
|||||||
#include "share/schema/ob_part_mgr_util.h"
|
#include "share/schema/ob_part_mgr_util.h"
|
||||||
#include "sql/engine/dml/ob_table_insert_op.h"
|
#include "sql/engine/dml/ob_table_insert_op.h"
|
||||||
#include "sql/session/ob_sql_session_info.h"
|
#include "sql/session/ob_sql_session_info.h"
|
||||||
#include "share/ob_server_blacklist.h"
|
|
||||||
#include "common/ob_smart_call.h"
|
#include "common/ob_smart_call.h"
|
||||||
#include "storage/ob_locality_manager.h"
|
#include "storage/ob_locality_manager.h"
|
||||||
|
#include "rpc/obrpc/ob_net_keepalive.h"
|
||||||
|
|
||||||
using namespace oceanbase::common;
|
using namespace oceanbase::common;
|
||||||
using namespace oceanbase::sql;
|
using namespace oceanbase::sql;
|
||||||
@ -3480,14 +3480,12 @@ int ObExtraServerAliveCheck::do_check() const
|
|||||||
if (OB_FAIL(dfo_mgr_->get_running_dfos(dfos))) {
|
if (OB_FAIL(dfo_mgr_->get_running_dfos(dfos))) {
|
||||||
LOG_WARN("fail find dfo", K(ret));
|
LOG_WARN("fail find dfo", K(ret));
|
||||||
} else {
|
} else {
|
||||||
share::ObServerBlacklist &server_black_list = share::ObServerBlacklist::get_instance();
|
|
||||||
// need check all sqc because we set sqc need_report = false here and don't need wait sqc finish msg.
|
// need check all sqc because we set sqc need_report = false here and don't need wait sqc finish msg.
|
||||||
for (int64_t i = 0; i < dfos.count(); i++) {
|
for (int64_t i = 0; i < dfos.count(); i++) {
|
||||||
ObIArray<ObPxSqcMeta> &sqcs = dfos.at(i)->get_sqcs();
|
ObIArray<ObPxSqcMeta> &sqcs = dfos.at(i)->get_sqcs();
|
||||||
for (int64_t j = 0; j < sqcs.count(); j++) {
|
for (int64_t j = 0; j < sqcs.count(); j++) {
|
||||||
if (sqcs.at(j).need_report()) {
|
if (sqcs.at(j).need_report()) {
|
||||||
if (OB_UNLIKELY(server_black_list.is_in_blacklist(
|
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(sqcs.at(j).get_exec_addr(),
|
||||||
share::ObCascadMember(sqcs.at(j).get_exec_addr(), cluster_id_), true,
|
|
||||||
query_start_time_))) {
|
query_start_time_))) {
|
||||||
sqcs.at(j).set_need_report(false);
|
sqcs.at(j).set_need_report(false);
|
||||||
sqcs.at(j).set_thread_finish(true);
|
sqcs.at(j).set_thread_finish(true);
|
||||||
@ -3503,8 +3501,7 @@ int ObExtraServerAliveCheck::do_check() const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (OB_LIKELY(qc_addr_.is_valid())) {
|
} else if (OB_LIKELY(qc_addr_.is_valid())) {
|
||||||
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(share::ObCascadMember(
|
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(qc_addr_, query_start_time_))) {
|
||||||
qc_addr_, cluster_id_), true, query_start_time_))) {
|
|
||||||
ret = OB_RPC_CONNECT_ERROR;
|
ret = OB_RPC_CONNECT_ERROR;
|
||||||
LOG_WARN("qc not in communication, maybe crashed", K(ret), K(qc_addr_));
|
LOG_WARN("qc not in communication, maybe crashed", K(ret), K(qc_addr_));
|
||||||
}
|
}
|
||||||
@ -3535,3 +3532,19 @@ bool ObVirtualTableErrorWhitelist::should_ignore_vtable_error(int error_code)
|
|||||||
}
|
}
|
||||||
return should_ignore;
|
return should_ignore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ObPxCheckAlive::is_in_blacklist(const common::ObAddr &addr, int64_t server_start_time)
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
bool in_blacklist = false;
|
||||||
|
obrpc::ObNetKeepAliveData alive_data;
|
||||||
|
if (OB_FAIL(ObNetKeepAlive::get_instance().in_black(addr, in_blacklist, &alive_data))) {
|
||||||
|
LOG_WARN("check in black failed", K(ret));
|
||||||
|
} else if (!in_blacklist && server_start_time > 0) {
|
||||||
|
in_blacklist = alive_data.start_service_time_ >= server_start_time;
|
||||||
|
}
|
||||||
|
if (in_blacklist) {
|
||||||
|
LOG_WARN("server in blacklist", K(addr), K(server_start_time), K(alive_data.start_service_time_));
|
||||||
|
}
|
||||||
|
return in_blacklist;
|
||||||
|
}
|
||||||
@ -548,6 +548,12 @@ public:
|
|||||||
static bool should_ignore_vtable_error(int error_code);
|
static bool should_ignore_vtable_error(int error_code);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ObPxCheckAlive
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static bool is_in_blacklist(const common::ObAddr &addr, int64_t server_start_time);
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user