patch 4.0

This commit is contained in:
wangzelin.wzl
2022-10-24 10:34:53 +08:00
parent 4ad6e00ec3
commit 93a1074b0c
10533 changed files with 2588271 additions and 2299373 deletions

View File

@ -15,6 +15,8 @@
#include "share/interrupt/ob_global_interrupt_call.h"
#include "observer/omt/ob_tenant_config_mgr.h"
#include "share/diagnosis/ob_sql_monitor_statname.h"
#include "share/ob_server_blacklist.h"
#include "observer/omt/ob_th_worker.h"
using namespace oceanbase::common;
@ -29,7 +31,7 @@ ObDtlChannelLoop::ObDtlChannelLoop()
chans_(),
next_idx_(0),
last_msg_type_(static_cast<uint16_t>(ObDtlMsgType::MAX)),
cond_(),
cond_(ObWaitEventIds::PX_LOOP_COND_WAIT),
ignore_interrupt_(false),
tenant_id_(UINT64_MAX),
timeout_(INT64_MAX),
@ -38,25 +40,60 @@ ObDtlChannelLoop::ObDtlChannelLoop()
mock_addr_(),
sentinel_node_(1, 0, mock_addr_),
n_first_no_data_(0),
op_monitor_info_(nullptr),
op_monitor_info_(default_op_monitor_info_),
first_data_get_(false),
process_func_(last_msg_type_, proc_map_),
loop_times_(0),
use_interm_result_(false),
time_recorder_(0)
eof_channel_cnt_(0),
loop_times_(0),
begin_wait_time_(0),
process_query_time_(0)
{
op_monitor_info_.otherstat_5_id_ = ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS_AFTER_DATA;
op_monitor_info_.otherstat_6_id_ = ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS;
sentinel_node_.prev_link_ = &sentinel_node_;
sentinel_node_.next_link_ = &sentinel_node_;
}
void ObDtlChannelLoop::notify(ObDtlChannel& chan)
ObDtlChannelLoop::ObDtlChannelLoop(ObMonitorNode &op_monitor_info)
: proc_map_(),
interrupt_proc_(NULL),
chans_(),
next_idx_(0),
last_msg_type_(static_cast<uint16_t>(ObDtlMsgType::MAX)),
cond_(ObWaitEventIds::PX_LOOP_COND_WAIT),
ignore_interrupt_(false),
tenant_id_(UINT64_MAX),
timeout_(INT64_MAX),
proxy_first_buffer_cache_(nullptr),
spin_lock_(),
mock_addr_(),
sentinel_node_(1, 0, mock_addr_),
n_first_no_data_(0),
op_monitor_info_(op_monitor_info),
first_data_get_(false),
process_func_(last_msg_type_, proc_map_),
use_interm_result_(false),
eof_channel_cnt_(0),
loop_times_(0),
begin_wait_time_(0),
process_query_time_(0)
{
op_monitor_info_.otherstat_5_id_ = ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS_AFTER_DATA;
op_monitor_info_.otherstat_6_id_ = ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS;
sentinel_node_.prev_link_ = &sentinel_node_;
sentinel_node_.next_link_ = &sentinel_node_;
}
void ObDtlChannelLoop::notify(ObDtlChannel &chan)
{
UNUSED(chan);
add_last_data_list(&chan);
cond_.signal();
}
int ObDtlChannelLoop::has_first_buffer(uint64_t chan_id, bool& has_first_buffer)
int ObDtlChannelLoop::has_first_buffer(uint64_t chan_id, bool &has_first_buffer)
{
int ret = OB_SUCCESS;
has_first_buffer = false;
@ -64,12 +101,8 @@ int ObDtlChannelLoop::has_first_buffer(uint64_t chan_id, bool& has_first_buffer)
if (OB_FAIL(proxy_first_buffer_cache_->has_first_buffer(chan_id, has_first_buffer))) {
LOG_WARN("failed to get first buffer", K(ret));
} else {
LOG_DEBUG("trace has first buffer",
K(chan_id),
KP(chan_id),
K(has_first_buffer),
KP(proxy_first_buffer_cache_),
K(proxy_first_buffer_cache_->get_first_buffer_key()));
LOG_DEBUG("trace has first buffer", K(chan_id), KP(chan_id), K(has_first_buffer),
KP(proxy_first_buffer_cache_), K(proxy_first_buffer_cache_->get_first_buffer_key()));
}
}
return ret;
@ -86,7 +119,7 @@ int ObDtlChannelLoop::set_first_buffer(uint64_t chan_id)
return ret;
}
int ObDtlChannelLoop::unregister_channel(ObDtlChannel& chan)
int ObDtlChannelLoop::unregister_channel(ObDtlChannel &chan)
{
int ret = OB_SUCCESS;
int find_idx = -1;
@ -105,6 +138,7 @@ int ObDtlChannelLoop::unregister_channel(ObDtlChannel& chan)
return ret;
}
int ObDtlChannelLoop::unregister_all_channel()
{
int ret = OB_SUCCESS;
@ -112,7 +146,7 @@ int ObDtlChannelLoop::unregister_all_channel()
for (int64_t i = chans_.count() - 1; i >= 0; --i) {
if (OB_SUCCESS != (tmp_ret = chans_.remove(i))) {
if (OB_SUCCESS == ret) {
ret = tmp_ret; // record fisrt failure reason, but continue to remove
ret = tmp_ret; // record fisrt failure reason, but continue to remove
}
continue;
}
@ -120,12 +154,11 @@ int ObDtlChannelLoop::unregister_all_channel()
return ret;
}
int ObDtlChannelLoop::find(ObDtlChannel* ch, int64_t& out_idx)
int ObDtlChannelLoop::find(ObDtlChannel* ch, int64_t &out_idx)
{
int ret = OB_SUCCESS;
out_idx = OB_INVALID_ID;
ARRAY_FOREACH_X(chans_, idx, cnt, OB_INVALID_ID == out_idx)
{
ARRAY_FOREACH_X(chans_, idx, cnt, OB_INVALID_ID == out_idx) {
if (ch == chans_.at(idx)) {
out_idx = idx;
}
@ -137,7 +170,8 @@ int ObDtlChannelLoop::find(ObDtlChannel* ch, int64_t& out_idx)
return ret;
}
int ObDtlChannelLoop::ObDtlChannelLoopProc::process(const ObDtlLinkedBuffer& buffer, dtl::ObDtlMsgIterator* iter)
int ObDtlChannelLoop::ObDtlChannelLoopProc::process(
const ObDtlLinkedBuffer &buffer, bool &transferred)
{
int ret = OB_SUCCESS;
ObDtlMsgHeader header;
@ -146,10 +180,10 @@ int ObDtlChannelLoop::ObDtlChannelLoopProc::process(const ObDtlLinkedBuffer& buf
if (last_msg_type_ >= static_cast<int16_t>(ObDtlMsgType::MAX)) {
ret = OB_INVALID_ARGUMENT;
SQL_DTL_LOG(WARN, "channel has received message with unknown type", K(last_msg_type_));
} else if (proc_map_[last_msg_type_] == nullptr) {
} else if (proc_map_[last_msg_type_] == nullptr){
ret = OB_INVALID_ARGUMENT;
SQL_DTL_LOG(WARN, "channel has received message without processor", K(last_msg_type_), K(iter));
} else if (OB_FAIL(proc_map_[last_msg_type_]->process(buffer, iter))) {
SQL_DTL_LOG(WARN, "channel has received message without processor", K(last_msg_type_));
} else if (OB_FAIL(proc_map_[last_msg_type_]->process(buffer, transferred))) {
if (OB_ITER_END != ret) {
LOG_WARN("process message in channel fail", K(ret), K(last_msg_type_));
} else {
@ -157,31 +191,33 @@ int ObDtlChannelLoop::ObDtlChannelLoopProc::process(const ObDtlLinkedBuffer& buf
}
}
} else if (OB_FAIL(ObDtlLinkedBuffer::deserialize_msg_header(buffer, header))) {
// 这里可能是OB_ITER_END,不能打WARN日志.
LOG_TRACE("failed to deserialize msg", K(ret), K(&buffer), K(lbt()));
} else {
last_msg_type_ = header.type_;
if (proc_map_[header.type_] == nullptr) {
ret = OB_INVALID_ARGUMENT;
SQL_DTL_LOG(WARN, "channel has received message without processor", K(header), K(ret));
} else if (OB_FAIL(proc_map_[header.type_]->process(buffer))) {
LOG_WARN("process message in channel fail", K(header), K(ret));
SQL_DTL_LOG(WARN, "channel has received message without processor",
K(header), K(ret));
} else if (OB_FAIL(proc_map_[header.type_]->process(buffer, transferred))) {
LOG_WARN("process message in channel fail",
K(header), K(ret));
}
}
return ret;
}
int ObDtlChannelLoop::process_base(ObIDltChannelLoopPred* pred, int64_t& hinted_channel, int64_t timeout)
int ObDtlChannelLoop::process_base(ObIDltChannelLoopPred *pred, int64_t &hinted_channel, int64_t timeout)
{
int ret = OB_SUCCESS;
UNUSED(timeout);
if (chans_.count() == 0) {
ret = OB_INVALID_ARGUMENT;
LOG_ERROR("channel hasn't set", K(ret));
} else {
// The way it works:
// 1. loop all channels, if got amsg, succeed
// 2. if there is no message on any channel, sleep
// 3. when waken up, loop all channel again. If there is nothing to process, return EAGAIN
// 处理思路:
// 1. 轮询所有 channel,如果能收到一个数据,则成功
// 2. 如果没有任何消息,则等待消息唤醒
// 3. 被唤醒后,再轮询所有 channel 是否有数据可处理,如果没有则返回 EAGAIN
//
uint32_t wait_key = cond_.get_key();
if (OB_UNLIKELY(nullptr != pred)) {
@ -208,9 +244,12 @@ int ObDtlChannelLoop::process_base(ObIDltChannelLoopPred* pred, int64_t& hinted_
if (OB_SUCC(ret)) {
// succ process one channel
} else if (OB_EAGAIN == ret) {
// by testing TPCH 100G Q1 we found that different iteration gap time makes differen query RT.
// if gap time being too short, it wastes too much CPU cycles
if (OB_UNLIKELY(INT64_MAX == timeout_)) {
begin_wait_time_counting();
// 通过TPCH 100G Q1测试发现,轮询时间间隔会导致性能有差异,轮询时间间隔较短
// 会占用大量CPU,导致CPU利用率不高,从而影响性能
if (timeout > 0) {
cond_.wait(wait_key, timeout);
} else if (OB_UNLIKELY(INT64_MAX == timeout_)) {
ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id_));
if (tenant_config.is_valid()) {
timeout_ = tenant_config->_parallel_server_sleep_time * 1000;
@ -223,6 +262,7 @@ int ObDtlChannelLoop::process_base(ObIDltChannelLoopPred* pred, int64_t& hinted_
} else {
cond_.wait(wait_key, timeout_);
}
end_wait_time_counting();
} else {
LOG_WARN("fail process channel", K(ret));
}
@ -231,9 +271,9 @@ int ObDtlChannelLoop::process_base(ObIDltChannelLoopPred* pred, int64_t& hinted_
if (ignore_interrupt_) {
// do nothing.
} else if ((loop_times_ & (INTERRUPT_CHECK_TIMES - 1)) == 0 && OB_UNLIKELY(IS_INTERRUPTED())) {
// interrupt handling
// 中断错误处理
// overwrite ret
ObInterruptCode& code = GET_INTERRUPT_CODE();
ObInterruptCode &code = GET_INTERRUPT_CODE();
ret = code.code_;
LOG_WARN("message loop is interrupted", K(code), K(ret));
}
@ -241,13 +281,13 @@ int ObDtlChannelLoop::process_base(ObIDltChannelLoopPred* pred, int64_t& hinted_
return ret;
}
// there are 3 kinds of 'process':
// 1) fifo. any data is welcome
// 2) merge receive. sorted data first. if not data, fallback to fifo
// 1)+2) -> use process_one
// 3) merge sort coord. use pred to select channel
// 3) -> use process_one_if
int ObDtlChannelLoop::process_one(int64_t& hinted_channel, int64_t timeout)
// 目前使用process处理数据主要可以分为3类
// 1fifo,纯拿数据,不计较数据顺序
// 2merge receive,优先拿排序列,没有,则任意数据,类似1)
// 1)+2) -> 使用process_one
// 3merge sort coord,pred限制拿的数据,如要求是控制消息或者排序对应的channel数据
// 3-> 使用process_one_if
int ObDtlChannelLoop::process_one(int64_t &hinted_channel, int64_t timeout)
{
int ret = OB_SUCCESS;
ret = process_base(nullptr, hinted_channel, timeout);
@ -258,13 +298,13 @@ int ObDtlChannelLoop::process_one(int64_t& hinted_channel, int64_t timeout)
return ret;
}
int ObDtlChannelLoop::process_one(int64_t timeout)
int ObDtlChannelLoop::process_any(int64_t timeout)
{
int64_t hinted_channel = OB_INVALID_INDEX_INT64;
return process_base(nullptr, hinted_channel, timeout);
}
int ObDtlChannelLoop::process_one_if(ObIDltChannelLoopPred* pred, int64_t timeout, int64_t& ret_channel)
int ObDtlChannelLoop::process_one_if(ObIDltChannelLoopPred *pred, int64_t &ret_channel, int64_t timeout)
{
int ret = OB_SUCCESS;
ret_channel = OB_INVALID_INDEX_INT64;
@ -276,10 +316,10 @@ int ObDtlChannelLoop::process_one_if(ObIDltChannelLoopPred* pred, int64_t timeou
return ret;
}
int ObDtlChannelLoop::process_channels(ObIDltChannelLoopPred* pred, int64_t& nth_channel)
int ObDtlChannelLoop::process_channels(ObIDltChannelLoopPred *pred, int64_t &nth_channel)
{
int ret = OB_EAGAIN;
ObDtlChannel* chan = nullptr;
ObDtlChannel *chan = nullptr;
bool last_row_in_buffer = false;
int64_t chan_cnt = chans_.count();
for (int64_t i = 0; i != chan_cnt && ret == OB_EAGAIN; ++i) {
@ -291,10 +331,9 @@ int ObDtlChannelLoop::process_channels(ObIDltChannelLoopPred* pred, int64_t& nth
if (nullptr == pred || pred->pred_process(next_idx_, chan)) {
if (OB_SUCC(chan->process1(&process_func_, 0, last_row_in_buffer))) {
nth_channel = next_idx_;
if (nullptr != op_monitor_info_) {
first_data_get_ = true;
}
first_data_get_ = true;
if (last_row_in_buffer) {
// 每次接收一个channel的所有buffer
++next_idx_;
if (next_idx_ >= chan_cnt) {
next_idx_ %= chan_cnt;
@ -313,26 +352,39 @@ int ObDtlChannelLoop::process_channels(ObIDltChannelLoopPred* pred, int64_t& nth
next_idx_ %= chan_cnt;
}
}
if (nullptr != op_monitor_info_) {
if (first_data_get_) {
op_monitor_info_->otherstat_5_id_ = ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS_AFTER_DATA;
++op_monitor_info_->otherstat_5_value_;
}
op_monitor_info_->otherstat_6_id_ = ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS;
++op_monitor_info_->otherstat_6_value_;
if (first_data_get_) {
// 第一次命中后的miss计数
// ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS_AFTER_DATA;
++op_monitor_info_.otherstat_5_value_;
}
// ObSqlMonitorStatIds::DTL_LOOP_TOTAL_MISS;
++op_monitor_info_.otherstat_6_value_;
}
return ret;
}
int ObDtlChannelLoop::process_channel(int64_t& nth_channel)
int ObDtlChannelLoop::process_channel(int64_t &nth_channel)
{
int ret = OB_EAGAIN;
int64_t n_times = 0;
bool last_row_in_buffer = false;
ObDtlChannel* ch = sentinel_node_.next_link_;
if (ret == OB_EAGAIN && (OB_ISNULL(proxy_first_buffer_cache_) ||
use_interm_result_ ||
(0 < proxy_first_buffer_cache_->get_first_buffer_cnt() &&
n_first_no_data_ < chans_.count()))) {
// less then chan_cnt, then probe first buffer
ret = process_channels(nullptr, nth_channel);
}
ObDtlChannel *ch = sentinel_node_.next_link_;
while (OB_EAGAIN == ret && ch != &sentinel_node_) {
if (OB_SUCC(ch->process1(&process_func_, 0, last_row_in_buffer))) {
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
share::ObCascadMember(ch->get_peer(), GCONF.cluster_id), true,
get_process_query_time()))) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(ch->get_peer()),
K(static_cast<int64_t>(GCONF.cluster_id)));
break;
} else if (OB_SUCC(ch->process1(&process_func_, 0, last_row_in_buffer))) {
nth_channel = ch->get_loop_index();
break;
} else if (OB_EAGAIN == ret) {
@ -341,24 +393,24 @@ int ObDtlChannelLoop::process_channel(int64_t& nth_channel)
if (n_times > 100) {
// it's maybe unexpected !
LOG_WARN("loop times", K(n_times));
usleep(1);
int tmp_ret = THIS_WORKER.check_status();
if (OB_SUCCESS != tmp_ret) {
ret = tmp_ret;
LOG_WARN("worker interrupt", K(tmp_ret), K(ret));
break;
}
ob_usleep<ObWaitEventIds::DTL_PROCESS_CHANNEL_SLEEP>(1);
}
++loop_times_;
if (ignore_interrupt_) {
} else if ((loop_times_ & (INTERRUPT_CHECK_TIMES - 1)) == 0 && OB_UNLIKELY(IS_INTERRUPTED())) {
ObInterruptCode& code = GET_INTERRUPT_CODE();
ObInterruptCode &code = GET_INTERRUPT_CODE();
ret = code.code_;
LOG_WARN("message loop is interrupted", K(code), K(ret));
}
ch = sentinel_node_.next_link_;
++n_times;
}
if (ret == OB_EAGAIN &&
(OB_ISNULL(proxy_first_buffer_cache_) || use_interm_result_ ||
(0 < proxy_first_buffer_cache_->get_first_buffer_cnt() && n_first_no_data_ < chans_.count()))) {
// less then chan_cnt, then probe first buffer
ret = process_channels(nullptr, nth_channel);
}
return ret;
}
@ -371,10 +423,11 @@ int ObDtlChannelLoop::unblock_channels(int64_t data_channel_idx)
int ret = OB_SUCCESS;
if (data_channel_idx >= chans_.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: data channel idx is invalid", K(ret), K(data_channel_idx), K(chans_.count()));
LOG_WARN("unexpected status: data channel idx is invalid", K(ret),
K(data_channel_idx), K(chans_.count()));
} else {
ObDfcServer& dfc_server = DTL.get_dfc_server();
ObDtlChannel* ch = chans_.at(data_channel_idx);
ObDfcServer &dfc_server = DTL.get_dfc_server();
ObDtlChannel *ch = chans_.at(data_channel_idx);
if (nullptr == ch->get_dfc()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid data channel, dfc is null", K(ret), K(data_channel_idx), K(chans_.count()));
@ -387,6 +440,34 @@ int ObDtlChannelLoop::unblock_channels(int64_t data_channel_idx)
return ret;
}
} // namespace dtl
} // namespace sql
} // namespace oceanbase
// It's not used now !!!
// for merge sort coord
// when merge sort coord can't get row for cur_channel
// then unblock blocked channel
// if not, then maybe hang
// only for one channel
int ObDtlChannelLoop::unblock_channel(int64_t start_data_channel_idx, int64_t dfc_channel_idx)
{
int ret = OB_SUCCESS;
if (start_data_channel_idx >= chans_.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected status: data channel idx is invalid", K(ret),
K(start_data_channel_idx), K(chans_.count()));
} else {
ObDfcServer &dfc_server = DTL.get_dfc_server();
ObDtlChannel *ch = chans_.at(start_data_channel_idx);
if (nullptr == ch->get_dfc()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid data channel, dfc is null", K(ret), K(start_data_channel_idx), K(chans_.count()));
} else {
if (OB_FAIL(dfc_server.unblock_channel(ch->get_dfc(), dfc_channel_idx))) {
LOG_WARN("failed to unblock channels", K(ret));
}
}
}
return ret;
}
} // dtl
} // sql
} // oceanbase