patch 4.0

This commit is contained in:
wangzelin.wzl
2022-10-24 10:34:53 +08:00
parent 4ad6e00ec3
commit 93a1074b0c
10533 changed files with 2588271 additions and 2299373 deletions

View File

@ -33,43 +33,52 @@
#include "share/config/ob_server_config.h"
#include "sql/engine/px/ob_px_sqc_async_proxy.h"
#include "sql/engine/px/datahub/ob_dh_dtl_proc.h"
#include "sql/engine/px/datahub/components/ob_dh_rollup_key.h"
#include "sql/engine/px/datahub/components/ob_dh_winbuf.h"
#include "sql/engine/px/datahub/components/ob_dh_sample.h"
#include "sql/engine/px/ob_px_sqc_proxy.h"
#include "storage/tx/ob_trans_service.h"
namespace oceanbase {
namespace oceanbase
{
using namespace common;
using namespace share;
using namespace omt;
using namespace share::schema;
using namespace sql;
using namespace sql::dtl;
namespace sql {
namespace sql
{
// 仅用于本 cpp 文件,所以可以放在这里
// 专用于处理 datahub piece 消息的逻辑
template <typename PieceMsg>
class ObDhPieceMsgProc {
class ObDhPieceMsgProc
{
public:
ObDhPieceMsgProc() = default;
~ObDhPieceMsgProc() = default;
int on_piece_msg(ObPxCoordInfo& coord_info, ObExecContext& ctx, const PieceMsg& pkt)
int on_piece_msg(ObPxCoordInfo &coord_info, ObExecContext &ctx, const PieceMsg &pkt)
{
int ret = OB_SUCCESS;
UNUSED(ctx);
ObArray<ObPxSqcMeta*> sqcs;
ObDfo* dfo = nullptr;
ObPieceMsgCtx* piece_ctx = nullptr;
// FIXME (TODO): the dfo id is not necessary, a mapping from op_id to dfo id can be maintained locally
ObArray<ObPxSqcMeta *> sqcs;
ObDfo *dfo = nullptr;
ObPieceMsgCtx *piece_ctx = nullptr;
// FIXME (TODO xiaochu):这个 dfo id 不是必须的,本地可以维护一个 op_id 到 dfo id 的映射
if (OB_FAIL(coord_info.dfo_mgr_.find_dfo_edge(pkt.dfo_id_, dfo))) {
LOG_WARN("fail find dfo", K(pkt), K(ret));
} else if (OB_ISNULL(dfo)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL ptr or null session ptr", KP(dfo), K(pkt), K(ret));
} else if (OB_FAIL(coord_info.piece_msg_ctx_mgr_.find_piece_ctx(pkt.op_id_, piece_ctx))) {
// create a ctx if not found
// NOTE: The method of creating a piece_ctx here will not cause concurrency problems.
// because QC is a single-threaded message loop, the messages sent by SQC are processed one by one
// 如果找不到则创建一个 ctx
// NOTE: 这里新建一个 piece_ctx 的方式不会出现并发问题,
// 因为 QC 是单线程消息循环,逐个处理 SQC 发来的消息
if (OB_ENTRY_NOT_EXIST != ret) {
LOG_WARN("fail get ctx", K(pkt), K(ret));
} else if (OB_FAIL(
PieceMsg::PieceMsgCtx::alloc_piece_msg_ctx(pkt, ctx, dfo->get_total_task_count(), piece_ctx))) {
} else if (OB_FAIL(PieceMsg::PieceMsgCtx::alloc_piece_msg_ctx(pkt, coord_info, ctx,
dfo->get_total_task_count(), piece_ctx))) {
LOG_WARN("fail to alloc piece msg", K(ret));
} else if (nullptr != piece_ctx) {
if (OB_FAIL(coord_info.piece_msg_ctx_mgr_.add_piece_ctx(piece_ctx))) {
@ -79,7 +88,7 @@ public:
}
if (OB_SUCC(ret)) {
typename PieceMsg::PieceMsgCtx* ctx = static_cast<typename PieceMsg::PieceMsgCtx*>(piece_ctx);
typename PieceMsg::PieceMsgCtx *ctx = static_cast<typename PieceMsg::PieceMsgCtx *>(piece_ctx);
if (OB_FAIL(dfo->get_sqcs(sqcs))) {
LOG_WARN("fail get qc-sqc channel for QC", K(ret));
} else if (OB_FAIL(PieceMsg::PieceMsgListener::on_message(*ctx, sqcs, pkt))) {
@ -90,18 +99,24 @@ public:
}
};
int ObPxMsgProc::on_process_end(ObExecContext& ctx)
int ObPxMsgProc::on_process_end(ObExecContext &ctx)
{
UNUSED(ctx);
int ret = OB_SUCCESS;
// 处理扫尾工作
return ret;
}
// entry function
int ObPxMsgProc::startup_msg_loop(ObExecContext& ctx)
// 调度入口函数
int ObPxMsgProc::startup_msg_loop(ObExecContext &ctx)
{
int ret = OB_SUCCESS;
LOG_TRACE("TIMERECORD ", "reserve:=-1 name:=QC dfoid:=-1 sqcid:=-1 taskid:=-1 start:", ObTimeUtility::current_time());
LOG_TRACE("TIMERECORD ",
"reserve:=-1 name:=QC dfoid:=-1 sqcid:=-1 taskid:=-1 start:",
ObTimeUtility::current_time());
if (OB_FAIL(scheduler_->init_all_dfo_channel(ctx))) {
LOG_WARN("fail to init all dfo channel", K(ret));
} else if (OB_FAIL(scheduler_->try_schedule_next_dfo(ctx))) {
@ -110,25 +125,24 @@ int ObPxMsgProc::startup_msg_loop(ObExecContext& ctx)
return ret;
}
// 1. Find the corresponding dfo, sqc according to the pkt information, and mark the current sqc thread allocation is
// completed
// 2. Determine whether all sqc under the dfo have been allocated threads
// if it is completed, mark dfo as thread_inited, and go to step 3, otherwise end processing
// 3. Check whether the parent of the current dfo is in thread_inited state:
// if true:
// - call on_dfo_pair_thread_inited to trigger channel pairing and distribution of two dfos
// else:
// - check whether any child of the current dfo is in thread_inited state
// - if true, call on_dfo_pair_thread_inited to trigger channel pairing and distribution of two dfos
// - else nop
int ObPxMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcResultMsg& pkt)
// 1. 根据 pkt 信息找到对应 dfo, sqc,标记当前 sqc 线程分配完成
// 2. 判断该 dfo 下是否所有 sqc 都分配线程完成
// 如果完成,则标记 dfo 为 thread_inited, 进入第 3 步,否则结束处理
// 3. 判断当前 dfo 的 parent 是否处于 thread_inited 状态,
// 如果是:
// - 调用 on_dfo_pair_thread_inited 来触发 两个 dfo 的 channel 配对和分发,
// 否则:
// - 判断当前 dfo 的**任意** child 是否有处于 thread_inited 状态,
// - 如果是,则调用 on_dfo_pair_thread_inited 来触发 两个 dfo 的 channel 配对和分发
// - 否则 nop
int ObPxMsgProc::on_sqc_init_msg(ObExecContext &ctx, const ObPxInitSqcResultMsg &pkt)
{
int ret = OB_SUCCESS;
LOG_TRACE("on_sqc_init_msg", K(pkt));
ObDfo* edge = NULL;
ObPxSqcMeta* sqc = NULL;
ObDfo *edge = NULL;
ObPxSqcMeta *sqc = NULL;
if (OB_FAIL(coord_info_.dfo_mgr_.find_dfo_edge(pkt.dfo_id_, edge))) {
LOG_WARN("fail find dfo", K(pkt), K(ret));
} else if (OB_ISNULL(edge)) {
@ -144,14 +158,11 @@ int ObPxMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcResultMsg&
ret = pkt.rc_;
update_error_code(coord_info_.first_error_code_, pkt.rc_);
LOG_WARN("fail init sqc, please check remote server log for details",
"remote_server",
sqc->get_exec_addr(),
K(pkt),
KP(ret));
"remote_server", sqc->get_exec_addr(), K(pkt), KP(ret));
} else if (pkt.task_count_ <= 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("task count returned by sqc invalid. expect 1 or more", K(pkt), K(ret));
} else if (OB_FAIL(sqc->get_partitions_info().assign(pkt.partitions_info_))) {
} else if (OB_FAIL(sqc->get_partitions_info().assign(pkt.tablets_info_))) {
LOG_WARN("Failed to assign partitions info", K(ret));
} else {
sqc->set_task_count(pkt.task_count_);
@ -160,14 +171,13 @@ int ObPxMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcResultMsg&
}
if (OB_SUCC(ret)) {
ObArray<ObPxSqcMeta*> sqcs;
ObArray<ObPxSqcMeta *> sqcs;
if (OB_FAIL(edge->get_sqcs(sqcs))) {
LOG_WARN("fail get qc-sqc channel for QC", K(ret));
} else {
bool sqc_threads_inited = true;
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret))
{
ObPxSqcMeta* sqc = sqcs.at(idx);
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret)) {
ObPxSqcMeta *sqc = sqcs.at(idx);
if (OB_ISNULL(sqc)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL unexpected sqc", K(ret));
@ -177,7 +187,8 @@ int ObPxMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcResultMsg&
}
}
if (OB_SUCC(ret) && sqc_threads_inited) {
LOG_TRACE("on_sqc_init_msg: all sqc returned task count. ready to do on_sqc_threads_inited", K(*edge));
LOG_TRACE("on_sqc_init_msg: all sqc returned task count. ready to do on_sqc_threads_inited",
K(*edge));
edge->set_thread_inited(true);
ret = scheduler_->on_sqc_threads_inited(ctx, *edge);
}
@ -186,17 +197,34 @@ int ObPxMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcResultMsg&
if (OB_SUCC(ret)) {
if (edge->is_thread_inited()) {
// Try to notify parent and child at the same time
/* 要同时尝试通知 parent 和 child,考虑情况:
*
* parent (thread inited)
* |
* self
* |
* child (thread inited)
*
* 在这个情况下,self 线程被全部调度起来之前,
* child 和 parent 的 thread inited 消息都无法
* 触发 on_dfo_pair_thread_inited 事件。
* self 的 thread inited 之后,必须同时触发
* 同 parent 和 child 的 on_dfo_pair_thread_inited 事件
*/
// 尝试调度 self-parent 对
if (edge->has_parent() && edge->parent()->is_thread_inited()) {
if (OB_FAIL(on_dfo_pair_thread_inited(ctx, *edge, *edge->parent()))) {
LOG_WARN("fail co-schedule parent-edge", K(ret));
}
}
// 尝试调度 self-child 对
if (OB_SUCC(ret)) {
int64_t cnt = edge->get_child_count();
for (int64_t idx = 0; idx < cnt && OB_SUCC(ret); ++idx) {
ObDfo* child = NULL;
ObDfo *child= NULL;
if (OB_FAIL(edge->get_child_dfo(idx, child))) {
LOG_WARN("fail get child dfo", K(idx), K(cnt), K(ret));
} else if (OB_ISNULL(child)) {
@ -215,39 +243,41 @@ int ObPxMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcResultMsg&
return ret;
}
// 1. Find dfo, sqc according to pkt information, mark the current sqc has been executed
// 2. Determine whether all sqc under the dfo have been executed
// if completed, mark dfo as thread_finish
// 3. Determine whether the current dfo is marked as thread_finish state
// if true:
// - send release thread message to dfo
// - schedule the next dfo
// - if all dfos have been scheduled to complete (regardless of Coord), nop
// else:
// 1. 根据 pkt 信息找到 dfo, sqc,标记当前 sqc 已经执行完成
// 2. 判断该 dfo 下是否所有 sqc 都执行完成
// 如果完成,则标记 dfo thread_finish
// 3. 判断当前 dfo 被标记成为 thread_finish 状态,
// 如果是:
// - 给 dfo 发送释放线程消息
// - 调度下一个 dfo
// - 如果所有 dfo 都已调度完成 (不考虑 Coord)nop
// 否则:
// - nop
int ObPxMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinishSqcResultMsg& pkt)
int ObPxMsgProc::on_sqc_finish_msg(ObExecContext &ctx,
const ObPxFinishSqcResultMsg &pkt)
{
int ret = OB_SUCCESS;
int sqc_ret = OB_SUCCESS;
ObDfo* edge = NULL;
ObPxSqcMeta* sqc = NULL;
ObSQLSessionInfo* session = NULL;
ObDfo *edge = NULL;
ObPxSqcMeta *sqc = NULL;
ObSQLSessionInfo *session = NULL;
ObPhysicalPlanCtx *phy_plan_ctx = NULL;
if (OB_ISNULL(session = ctx.get_my_session())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL ptr session", K(ret));
} else if (OB_FAIL(session->get_trans_result().merge_result(pkt.get_trans_result()))) {
LOG_WARN("fail merge result",
K(ret),
"session_trans_result",
session->get_trans_result(),
"packet_trans_result",
pkt.get_trans_result());
} else if (OB_ISNULL(phy_plan_ctx = GET_PHY_PLAN_CTX(ctx))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("phy plan ctx NULL", K(ret));
} else if (OB_ISNULL(session->get_tx_desc())) {
} else if (OB_FAIL(MTL(transaction::ObTransService*)
->add_tx_exec_result(*session->get_tx_desc(),
pkt.get_trans_result()))) {
LOG_WARN("fail merge result", K(ret),
"packet_trans_result", pkt.get_trans_result(),
"tx_desc", *session->get_tx_desc());
} else {
LOG_DEBUG("on_sqc_finish_msg trans_result",
"session_trans_result",
session->get_trans_result(),
"packet_trans_result",
pkt.get_trans_result());
LOG_TRACE("on_sqc_finish_msg trans_result",
"packet_trans_result", pkt.get_trans_result(),
"tx_desc", *session->get_tx_desc());
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(coord_info_.dfo_mgr_.find_dfo_edge(pkt.dfo_id_, edge))) {
@ -258,33 +288,40 @@ int ObPxMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinishSqcResult
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL ptr", KP(edge), KP(sqc), K(ret));
} else if (FALSE_IT(sqc->set_need_report(false))) {
} else if (OB_FAIL(sqc->set_task_monitor_info_array(pkt.task_monitor_info_array_))) {
LOG_WARN("fail to copy task monitor info array", K(ret));
} else if (common::OB_INVALID_ID != pkt.temp_table_id_ && pkt.interm_result_ids_.count() > 0) {
if (OB_FAIL(ctx.add_temp_table_interm_result_ids(pkt.temp_table_id_, pkt.sqc_id_, pkt.interm_result_ids_))) {
} else if (common::OB_INVALID_ID != pkt.temp_table_id_) {
if (OB_FAIL(ctx.add_temp_table_interm_result_ids(pkt.temp_table_id_,
sqc->get_exec_addr(),
pkt.interm_result_ids_))) {
LOG_WARN("failed to add temp table interm resuld ids.", K(ret));
} else { /*do nothing.*/
}
} else { /*do nothing.*/
}
} else { /*do nothing.*/ }
} else { /*do nothing.*/ }
if (OB_SUCC(ret)) {
sqc->set_thread_finish(true);
NG_TRACE_EXT(sqc_finish, OB_ID(dfo_id), sqc->get_dfo_id(), OB_ID(sqc_id), sqc->get_sqc_id());
if (sqc->is_ignore_vtable_error() && OB_SUCCESS != pkt.rc_) {
// 如果收到一个sqc finish消息, 如果该sqc涉及虚拟表, 需要忽略所有错误码
// 如果该dfo是root_dfo的child_dfo, 为了让px走出数据channel的消息循环
// 需要mock一个eof dtl buffer本地发送至px(实际未经过rpc, attach即可)
const_cast<ObPxFinishSqcResultMsg &>(pkt).rc_ = OB_SUCCESS;
OZ(root_dfo_action_.notify_peers_mock_eof(edge,
phy_plan_ctx->get_timeout_timestamp(),
sqc->get_exec_addr()));
}
NG_TRACE_EXT(sqc_finish,
OB_ID(dfo_id), sqc->get_dfo_id(),
OB_ID(sqc_id), sqc->get_sqc_id());
LOG_TRACE("[MSG] sqc finish", K(*edge), K(*sqc));
}
if (OB_SUCC(ret)) {
ObArray<ObPxSqcMeta*> sqcs;
ObArray<ObPxSqcMeta *> sqcs;
if (OB_FAIL(edge->get_sqcs(sqcs))) {
LOG_WARN("fail get qc-sqc channel for QC", K(ret));
} else {
bool sqc_threads_finish = true;
int64_t dfo_used_worker_count = 0;
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret))
{
ObPxSqcMeta* sqc = sqcs.at(idx);
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret)) {
ObPxSqcMeta *sqc = sqcs.at(idx);
if (OB_ISNULL(sqc)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL unexpected sqc", K(ret));
@ -305,21 +342,21 @@ int ObPxMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinishSqcResult
}
/**
* The reason for judging the error code here is that the status of the sqc and dfo that
* has the error needs to be updated. the sqc (including its workers) that sent the finish
* message has actually ended. but because of an error, the subsequent scheduling process
* does not need to continue, and the subsequent process will perform error handling
* 为什么在这里判断错误码?因为需要将这个出错的sqc以及dfo的状态更新,
* 发送这个finish消息的sqc(包括它的worker)其实已经结束了,需要将它
* 但是因为出错了,后续的调度流程不需要继续了,后面流程会进行错误处理。
*/
update_error_code(coord_info_.first_error_code_, pkt.rc_);
if (OB_SUCC(ret)) {
if (OB_FAIL(pkt.rc_)) {
LOG_WARN("sqc fail, abort qc", K(pkt), K(ret));
LOG_WARN("sqc fail, abort qc", K(pkt), K(ret), "sqc_addr", sqc->get_exec_addr());
} else {
// pkt rc_ == OB_SUCCESS
// 处理 dml + px 框架下的affected row
if (OB_ISNULL(ctx.get_physical_plan_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("phy plan ctx is null", K(ret));
} else {
} else {
ctx.get_physical_plan_ctx()->add_affected_rows(pkt.sqc_affected_rows_);
ctx.get_physical_plan_ctx()->add_px_dml_row_info(pkt.dml_row_info_);
}
@ -331,9 +368,10 @@ int ObPxMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinishSqcResult
ret = scheduler_->try_schedule_next_dfo(ctx);
if (OB_ITER_END == ret) {
coord_info_.all_threads_finish_ = true;
LOG_TRACE(
"TIMERECORD ", "reserve:=-1 name:=QC dfoid:=-1 sqcid:=-1 taskid:=-1 end:", ObTimeUtility::current_time());
ret = OB_SUCCESS; // ignore error
LOG_TRACE("TIMERECORD ",
"reserve:=-1 name:=QC dfoid:=-1 sqcid:=-1 taskid:=-1 end:",
ObTimeUtility::current_time());
ret = OB_SUCCESS; // 需要覆盖,否则无法跳出 loop
}
}
}
@ -341,19 +379,47 @@ int ObPxMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinishSqcResult
return ret;
}
int ObPxMsgProc::on_piece_msg(ObExecContext& ctx, const ObBarrierPieceMsg& pkt)
int ObPxMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObBarrierPieceMsg &pkt)
{
ObDhPieceMsgProc<ObBarrierPieceMsg> proc;
return proc.on_piece_msg(coord_info_, ctx, pkt);
}
int ObPxMsgProc::on_piece_msg(ObExecContext& ctx, const ObWinbufPieceMsg& pkt)
int ObPxMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObWinbufPieceMsg &pkt)
{
ObDhPieceMsgProc<ObWinbufPieceMsg> proc;
return proc.on_piece_msg(coord_info_, ctx, pkt);
}
int ObPxMsgProc::on_eof_row(ObExecContext& ctx)
int ObPxMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObDynamicSamplePieceMsg &pkt)
{
ObDhPieceMsgProc<ObDynamicSamplePieceMsg> proc;
return proc.on_piece_msg(coord_info_, ctx, pkt);
}
int ObPxMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObRollupKeyPieceMsg &pkt)
{
ObDhPieceMsgProc<ObRollupKeyPieceMsg> proc;
return proc.on_piece_msg(coord_info_, ctx, pkt);
}
int ObPxMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObRDWFPieceMsg &pkt)
{
ObDhPieceMsgProc<ObRDWFPieceMsg> proc;
return proc.on_piece_msg(coord_info_, ctx, pkt);
}
int ObPxMsgProc::on_eof_row(ObExecContext &ctx)
{
int ret = OB_SUCCESS;
UNUSED(ctx);
@ -362,18 +428,23 @@ int ObPxMsgProc::on_eof_row(ObExecContext& ctx)
return ret;
}
int ObPxMsgProc::on_dfo_pair_thread_inited(ObExecContext& ctx, ObDfo& child, ObDfo& parent)
int ObPxMsgProc::on_dfo_pair_thread_inited(ObExecContext &ctx, ObDfo &child, ObDfo &parent)
{
int ret = OB_SUCCESS;
// 位置分配好了,建立 DTL 映射
//
// NOTE: 这里暂时简化实现,如果 child/parent 的线程分配失败
// 则失败退出。更细致的实现里,可以修改不同 server 上线程分配数量,
// 然后重新尝试分配线程。在这种情形下, DTL Map 也要跟着变化
//
if (OB_SUCC(ret)) {
if (OB_FAIL(scheduler_->build_data_mn_xchg_ch(ctx, child, parent))) {
if (OB_FAIL(scheduler_->build_data_xchg_ch(ctx, child, parent))) {
LOG_WARN("fail init dtl data channel", K(ret));
} else {
LOG_TRACE("build data xchange channel for dfo pair ok", K(parent), K(child));
}
}
// Distribute the dtl channel information to the parent and child DFOs so that
// they can start sending and receiving data
// 将 dtl 通道信息分发给 parent child 两个 DFO,使得它们能够开始收发数据
if (OB_SUCC(ret)) {
if (OB_FAIL(scheduler_->dispatch_dtl_data_channel_info(ctx, child, parent))) {
LOG_WARN("fail setup dtl data channel for child-parent pair", K(ret));
@ -381,20 +452,120 @@ int ObPxMsgProc::on_dfo_pair_thread_inited(ObExecContext& ctx, ObDfo& child, ObD
LOG_TRACE("dispatch dtl data channel for pair ok", K(parent), K(child));
}
}
//如果执行计划包含px bloom filter, 判断子dfo是否包含use_filter算子,
//如果有, 则为它们建立channel信息, 并dispach给parent dfo
if (OB_SUCC(ret) && child.is_px_use_bloom_filter()) {
if (parent.is_root_dfo()
&& OB_FAIL(scheduler_->set_bloom_filter_ch_for_root_dfo(ctx, parent))) {
LOG_WARN("fail to set bloom filter ch for root dfo", K(ret));
} else if (OB_FAIL(scheduler_->build_bloom_filter_ch(ctx, child, parent))) {
LOG_WARN("fail to setup bloom filter channel", K(ret));
} else if (OB_FAIL(scheduler_->dispatch_bf_channel_info(ctx, child, parent))) {
LOG_WARN("fail setup bloom filter data channel for child-parent pair", K(ret));
} else {
LOG_TRACE("dispatch px bloom filter channel for pair ok", K(parent), K(child));
}
}
return ret;
}
int ObPxMsgProc::on_interrupted(ObExecContext& ctx, const ObInterruptCode& ic)
int ObPxMsgProc::mark_rpc_filter(ObExecContext &ctx)
{
int ret = OB_SUCCESS;
ObJoinFilterDataCtx &bf_ctx = ctx.get_bf_ctx();
ObPhysicalPlanCtx *phy_plan_ctx = GET_PHY_PLAN_CTX(ctx);
bf_ctx.filter_ready_ = false;
common::ObArray<dtl::ObDtlChannel *> channels;
uint64_t each_group_size = 0;
if (OB_ISNULL(bf_ctx.filter_data_) ||
OB_ISNULL(phy_plan_ctx) ||
OB_ISNULL(ctx.get_my_session())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("the filter data or phy plan ctx is null", K(ret));
} else if (0 == bf_ctx.ch_provider_ptr_) {
// root dfo
if (bf_ctx.ch_set_.count() <= 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("the count of bloom filter chsets is unexpected",
K(bf_ctx.ch_set_.count()), K(ret));
} else if (OB_FAIL(ObDtlChannelUtil::link_ch_set(bf_ctx.ch_set_, channels,
[&](ObDtlChannel *ch) {
ch->set_join_filter_owner();
ch->set_thread_id(GETTID());
}))) {
LOG_WARN("fail to link ch channel", K(ret));
} else {
bf_ctx.filter_data_->bloom_filter_count_ = 1;
}
} else {
int64_t sqc_count = 0;
int64_t tenant_id = ctx.get_my_session()->get_effective_tenant_id();
ObPxSQCProxy *ch_provider = reinterpret_cast<ObPxSQCProxy *>(bf_ctx.ch_provider_ptr_);
if (OB_FAIL(ch_provider->get_bloom_filter_ch(bf_ctx.ch_set_,
sqc_count, phy_plan_ctx->get_timeout_timestamp(), false))) {
LOG_WARN("fail get data ch sets from provider", K(ret));
} else if (OB_FAIL(ObDtlChannelUtil::link_ch_set(bf_ctx.ch_set_, channels,
[&](ObDtlChannel *ch) {
ch->set_join_filter_owner();
ch->set_thread_id(GETTID());
}))) {
LOG_WARN("fail to link ch channel", K(ret));
} else if (channels.count() <= 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("channels count is unexpected", K(ret));
} else {
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id));
if (OB_LIKELY(tenant_config.is_valid())) {
const char *ptr = NULL;
if (OB_ISNULL(ptr = tenant_config->_px_bloom_filter_group_size.get_value())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("each group size ptr is null", K(ret));
} else if (0 == ObString::make_string("auto").case_compare(ptr)) {
each_group_size = sqrt(channels.count());
} else {
char *end_ptr = nullptr;
each_group_size = strtoull(ptr, &end_ptr, 10);
if (*end_ptr != '\0') {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("each group size ptr is unexpected", K(ret));
}
}
}
each_group_size = (each_group_size <= 0 ? 1 : each_group_size);
int64_t send_size = GCONF._send_bloom_filter_size * 125;
int64_t send_count = ceil(bf_ctx.filter_data_->filter_.get_bits_array_length() / (double)send_size);
bf_ctx.filter_data_->bloom_filter_count_ = sqc_count * send_count;
ch_provider->set_filter_data(bf_ctx.filter_data_);
OZ(ch_provider->assign_bloom_filter_channels(channels));
OZ(ch_provider->assign_bf_ch_set(bf_ctx.ch_set_));
OZ(ch_provider->generate_filter_indexes(each_group_size, channels.count()));
if (OB_SUCC(ret)) {
ch_provider->set_bf_compress_type(ctx.get_bf_ctx().compressor_type_);
ch_provider->set_per_channel_bf_count(send_count);
ch_provider->set_bloom_filter_ready(true);
}
}
if (OB_FAIL(ret)) {
// if failed , need unlink chset
IGNORE_RETURN ObPxChannelUtil::unlink_ch_set(bf_ctx.ch_set_, nullptr, false);
}
}
return ret;
}
int ObPxMsgProc::on_interrupted(ObExecContext &ctx, const ObInterruptCode &ic)
{
int ret = OB_SUCCESS;
UNUSED(ctx);
// override ret code
// 抛出错误码到主处理路程
ret = ic.code_;
LOG_TRACE("qc received a interrupt and throw out of msg proc", K(ic), K(ret));
return ret;
}
int ObPxMsgProc::on_sqc_init_fail(ObDfo& dfo, ObPxSqcMeta& sqc)
// TODO
int ObPxMsgProc::on_sqc_init_fail(ObDfo &dfo, ObPxSqcMeta &sqc)
{
int ret = OB_SUCCESS;
UNUSED(dfo);
@ -402,14 +573,15 @@ int ObPxMsgProc::on_sqc_init_fail(ObDfo& dfo, ObPxSqcMeta& sqc)
return ret;
}
//////////// END /////////
int ObPxTerminateMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcResultMsg& pkt)
int ObPxTerminateMsgProc::on_sqc_init_msg(ObExecContext &ctx, const ObPxInitSqcResultMsg &pkt)
{
int ret = OB_SUCCESS;
UNUSED(ctx);
ObDfo* edge = NULL;
ObPxSqcMeta* sqc = NULL;
ObDfo *edge = NULL;
ObPxSqcMeta *sqc = NULL;
/**
* 标记sqc,dfo已经为启动状态。
*/
LOG_TRACE("terminate msg proc on sqc init msg", K(pkt.rc_));
if (pkt.task_count_ <= 0) {
ret = OB_ERR_UNEXPECTED;
@ -426,6 +598,7 @@ int ObPxTerminateMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcR
LOG_WARN("NULL ptr", KP(sqc), K(ret));
} else {
sqc->set_task_count(pkt.task_count_);
// 标记sqc已经完整启动了
sqc->set_thread_inited(true);
if (pkt.rc_ != OB_SUCCESS) {
@ -435,14 +608,13 @@ int ObPxTerminateMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcR
}
if (OB_SUCC(ret)) {
ObArray<ObPxSqcMeta*> sqcs;
ObArray<ObPxSqcMeta *> sqcs;
if (OB_FAIL(edge->get_sqcs(sqcs))) {
LOG_WARN("fail get qc-sqc channel for QC", K(ret));
} else {
bool sqc_threads_inited = true;
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret))
{
ObPxSqcMeta* sqc = sqcs.at(idx);
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret)) {
ObPxSqcMeta *sqc = sqcs.at(idx);
if (OB_ISNULL(sqc)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL unexpected sqc", K(ret));
@ -452,7 +624,9 @@ int ObPxTerminateMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcR
}
}
if (OB_SUCC(ret) && sqc_threads_inited) {
LOG_TRACE("sqc terminate msg: all sqc returned task count. ready to do on_sqc_threads_inited", K(*edge));
LOG_TRACE("sqc terminate msg: all sqc returned task count. ready to do on_sqc_threads_inited",
K(*edge));
// 标记dfo已经完整启动了
edge->set_thread_inited(true);
}
}
@ -461,29 +635,27 @@ int ObPxTerminateMsgProc::on_sqc_init_msg(ObExecContext& ctx, const ObPxInitSqcR
return ret;
}
int ObPxTerminateMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinishSqcResultMsg& pkt)
int ObPxTerminateMsgProc::on_sqc_finish_msg(ObExecContext &ctx, const ObPxFinishSqcResultMsg &pkt)
{
int ret = OB_SUCCESS;
LOG_TRACE("terminate msg : proc on sqc finish msg", K(pkt.rc_));
ObDfo* edge = NULL;
ObPxSqcMeta* sqc = NULL;
ObSQLSessionInfo* session = NULL;
ObDfo *edge = NULL;
ObPxSqcMeta *sqc = NULL;
ObSQLSessionInfo *session = NULL;
if (OB_ISNULL(session = ctx.get_my_session())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL ptr session", K(ret));
} else if (OB_FAIL(session->get_trans_result().merge_result(pkt.get_trans_result()))) {
LOG_WARN("fail merge result",
K(ret),
"session_trans_result",
session->get_trans_result(),
"packet_trans_result",
pkt.get_trans_result());
} else if (OB_ISNULL(session->get_tx_desc())) {
} else if (OB_FAIL(MTL(transaction::ObTransService*)
->add_tx_exec_result(*session->get_tx_desc(),
pkt.get_trans_result()))) {
LOG_WARN("fail report tx result", K(ret),
"packet_trans_result", pkt.get_trans_result(),
"tx_desc", *session->get_tx_desc());
} else {
LOG_DEBUG("on_sqc_finish_msg trans_result",
"session_trans_result",
session->get_trans_result(),
"packet_trans_result",
pkt.get_trans_result());
LOG_TRACE("on_sqc_finish_msg trans_result",
"packet_trans_result", pkt.get_trans_result(),
"tx_desc", *session->get_tx_desc());
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(coord_info_.dfo_mgr_.find_dfo_edge(pkt.dfo_id_, edge))) {
@ -494,8 +666,6 @@ int ObPxTerminateMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinish
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL ptr", KP(edge), KP(sqc), K(ret));
} else if (FALSE_IT(sqc->set_need_report(false))) {
} else if (OB_FAIL(sqc->set_task_monitor_info_array(pkt.task_monitor_info_array_))) {
LOG_WARN("fail to copy task monitor info array", K(ret));
} else {
sqc->set_thread_finish(true);
if (pkt.rc_ != OB_SUCCESS) {
@ -503,21 +673,22 @@ int ObPxTerminateMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinish
}
update_error_code(coord_info_.first_error_code_, pkt.rc_);
NG_TRACE_EXT(sqc_finish, OB_ID(dfo_id), sqc->get_dfo_id(), OB_ID(sqc_id), sqc->get_sqc_id());
NG_TRACE_EXT(sqc_finish,
OB_ID(dfo_id), sqc->get_dfo_id(),
OB_ID(sqc_id), sqc->get_sqc_id());
LOG_TRACE("terminate msg : sqc finish", K(*edge), K(*sqc));
}
if (OB_SUCC(ret)) {
ObArray<ObPxSqcMeta*> sqcs;
ObArray<ObPxSqcMeta *> sqcs;
if (OB_FAIL(edge->get_sqcs(sqcs))) {
LOG_WARN("fail get qc-sqc channel for QC", K(ret));
} else {
bool sqc_threads_finish = true;
int64_t dfo_used_worker_count = 0;
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret))
{
ObPxSqcMeta* sqc = sqcs.at(idx);
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret)) {
ObPxSqcMeta *sqc = sqcs.at(idx);
if (OB_ISNULL(sqc)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("NULL unexpected sqc", K(ret));
@ -540,7 +711,7 @@ int ObPxTerminateMsgProc::on_sqc_finish_msg(ObExecContext& ctx, const ObPxFinish
return ret;
}
int ObPxTerminateMsgProc::on_eof_row(ObExecContext& ctx)
int ObPxTerminateMsgProc::on_eof_row(ObExecContext &ctx)
{
int ret = OB_SUCCESS;
UNUSED(ctx);
@ -548,7 +719,7 @@ int ObPxTerminateMsgProc::on_eof_row(ObExecContext& ctx)
return ret;
}
int ObPxTerminateMsgProc::on_sqc_init_fail(ObDfo& dfo, ObPxSqcMeta& sqc)
int ObPxTerminateMsgProc::on_sqc_init_fail(ObDfo &dfo, ObPxSqcMeta &sqc)
{
int ret = OB_SUCCESS;
UNUSED(dfo);
@ -557,24 +728,28 @@ int ObPxTerminateMsgProc::on_sqc_init_fail(ObDfo& dfo, ObPxSqcMeta& sqc)
return ret;
}
int ObPxTerminateMsgProc::on_interrupted(ObExecContext& ctx, const common::ObInterruptCode& pkt)
int ObPxTerminateMsgProc::on_interrupted(ObExecContext &ctx, const common::ObInterruptCode &pkt)
{
int ret = OB_SUCCESS;
UNUSED(pkt);
UNUSED(ctx);
// 已经是在回收流程了,对中断不再响应.
LOG_WARN("terminate msg proc on sqc interrupted", K(ret));
return ret;
}
int ObPxTerminateMsgProc::startup_msg_loop(ObExecContext& ctx)
int ObPxTerminateMsgProc::startup_msg_loop(ObExecContext &ctx)
{
// 一个dfo都没有掉,在上层就直接返回了,不应该到这里。
int ret = OB_ERR_UNEXPECTED;
UNUSED(ctx);
LOG_WARN("terminate msg proc on sqc startup loop", K(ret));
return ret;
}
int ObPxTerminateMsgProc::on_piece_msg(ObExecContext& ctx, const ObBarrierPieceMsg& pkt)
int ObPxTerminateMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObBarrierPieceMsg &pkt)
{
int ret = common::OB_SUCCESS;
UNUSED(ctx);
@ -582,7 +757,9 @@ int ObPxTerminateMsgProc::on_piece_msg(ObExecContext& ctx, const ObBarrierPieceM
return ret;
}
int ObPxTerminateMsgProc::on_piece_msg(ObExecContext& ctx, const ObWinbufPieceMsg& pkt)
int ObPxTerminateMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObWinbufPieceMsg &pkt)
{
int ret = common::OB_SUCCESS;
UNUSED(ctx);
@ -590,5 +767,32 @@ int ObPxTerminateMsgProc::on_piece_msg(ObExecContext& ctx, const ObWinbufPieceMs
return ret;
}
} // end namespace sql
} // end namespace oceanbase
int ObPxTerminateMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObDynamicSamplePieceMsg &pkt)
{
int ret = common::OB_SUCCESS;
UNUSED(ctx);
UNUSED(pkt);
return ret;
}
int ObPxTerminateMsgProc::on_piece_msg(
ObExecContext &ctx,
const ObRollupKeyPieceMsg &pkt)
{
int ret = common::OB_SUCCESS;
UNUSED(ctx);
UNUSED(pkt);
return ret;
}
int ObPxTerminateMsgProc::on_piece_msg(
ObExecContext &,
const ObRDWFPieceMsg &)
{
return common::OB_SUCCESS;
}
} // end namespace sql
} // end namespace oceanbase