Files
openGauss-server/src/gausskernel/runtime/executor/execProcnode.cpp
2021-08-05 17:00:58 +08:00

1398 lines
50 KiB
C++

/* -------------------------------------------------------------------------
*
* execProcnode.cpp
* contains dispatch functions which call the appropriate "initialize",
* "get a tuple", and "cleanup" routines for the given node type.
* If the node has children, then it will presumably call ExecInitNode,
* ExecProcNode, or ExecEndNode on its subnodes and do the appropriate
* processing.
*
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/gausskernel/runtime/executor/execProcnode.cpp
*
* -------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* ExecInitNode - initialize a plan node and its subplans
* ExecProcNode - get a tuple by executing the plan node
* ExecEndNode - shut down a plan node and its subplans
*
* NOTES
* This used to be three files. It is now all combined into
* one file so that it is easier to keep ExecInitNode, ExecProcNode,
* and ExecEndNode in sync when new nodes are added.
*
* EXAMPLE
* Suppose we want the age of the manager of the shoe department and
* the number of employees in that department. So we have the query:
*
* select DEPT.no_emps, EMP.age
* where EMP.name = DEPT.mgr and
* DEPT.name = "shoe"
*
* Suppose the planner gives us the following plan:
*
* Nest Loop (DEPT.mgr = EMP.name)
* / \
* / \
* Seq Scan Seq Scan
* DEPT EMP
* (name = "shoe")
*
* ExecutorStart() is called first.
* It calls InitPlan() which calls ExecInitNode() on
* the root of the plan -- the nest loop node.
*
* * ExecInitNode() notices that it is looking at a nest loop and
* as the code below demonstrates, it calls ExecInitNestLoop().
* Eventually this calls ExecInitNode() on the right and left subplans
* and so forth until the entire plan is initialized. The result
* of ExecInitNode() is a plan state tree built with the same structure
* as the underlying plan tree.
*
* * Then when ExecutorRun() is called, it calls ExecutePlan() which calls
* ExecProcNode() repeatedly on the top node of the plan state tree.
* Each time this happens, ExecProcNode() will end up calling
* ExecNestLoop(), which calls ExecProcNode() on its subplans.
* Each of these subplans is a sequential scan so ExecSeqScan() is
* called. The slots returned by ExecSeqScan() may contain
* tuples which contain the attributes ExecNestLoop() uses to
* form the tuples it returns.
*
* * Eventually ExecSeqScan() stops returning tuples and the nest
* loop join ends. Lastly, ExecutorEnd() calls ExecEndNode() which
* calls ExecEndNestLoop() which in turn calls ExecEndNode() on
* its subplans which result in ExecEndSeqScan().
*
* This should show how the executor works by having
* ExecInitNode(), ExecProcNode() and ExecEndNode() dispatch
* their work to the appopriate node support routines which may
* in turn call these routines themselves on their subplans.
*/
#include "postgres.h"
#include "knl/knl_variable.h"
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "executor/nodeAppend.h"
#include "executor/nodeBitmapAnd.h"
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeBitmapIndexscan.h"
#include "executor/nodeBitmapOr.h"
#include "executor/nodeCtescan.h"
#include "executor/nodeExtensible.h"
#include "executor/nodeForeignscan.h"
#include "executor/nodeFunctionscan.h"
#include "executor/nodeGroup.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "executor/nodeIndexonlyscan.h"
#include "executor/nodeIndexscan.h"
#include "executor/nodeLimit.h"
#include "executor/nodeLockRows.h"
#include "executor/nodeMaterial.h"
#include "executor/nodeMergeAppend.h"
#include "executor/nodeMergejoin.h"
#include "executor/nodeModifyTable.h"
#include "executor/nodeNestloop.h"
#include "executor/nodePartIterator.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
#include "executor/nodeSort.h"
#include "executor/nodeStub.h"
#include "executor/nodeSubplan.h"
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
#include "executor/nodeWindowAgg.h"
#include "executor/nodeWorktablescan.h"
#include "executor/execStream.h"
#include "optimizer/clauses.h"
#include "optimizer/encoding.h"
#include "optimizer/ml_model.h"
#include "vecexecutor/vecstream.h"
#include "miscadmin.h"
#include "vecexecutor/vecnodecstorescan.h"
#include "vecexecutor/vecnodecstoreindexscan.h"
#include "vecexecutor/vecnodedfsindexscan.h"
#include "vecexecutor/vecnodevectorow.h"
#include "vecexecutor/vecnodecstoreindexctidscan.h"
#include "vecexecutor/vecnodecstoreindexheapscan.h"
#include "vecexecutor/vecnodecstoreindexand.h"
#include "vecexecutor/vecnodecstoreindexor.h"
#include "vecexecutor/vecnoderowtovector.h"
#include "vecexecutor/vecnodeforeignscan.h"
#include "vecexecutor/vecremotequery.h"
#include "vecexecutor/vecnoderesult.h"
#include "vecexecutor/vecsubqueryscan.h"
#include "vecexecutor/vecnodesort.h"
#include "vecexecutor/vecmodifytable.h"
#include "vecexecutor/vechashjoin.h"
#include "vecexecutor/vechashagg.h"
#include "vecexecutor/vecpartiterator.h"
#include "vecexecutor/vecappend.h"
#include "vecexecutor/veclimit.h"
#include "vecexecutor/vecsetop.h"
#ifdef ENABLE_MULTIPLE_NODES
#include "vecexecutor/vectsstorescan.h"
#include "tsdb/cache/tags_cachemgr.h"
#endif /* ENABLE_MULTIPLE_NODES */
#include "vecexecutor/vecgroup.h"
#include "vecexecutor/vecunique.h"
#include "vecexecutor/vecnestloop.h"
#include "vecexecutor/vecmaterial.h"
#include "vecexecutor/vecmergejoin.h"
#include "vecexecutor/vecwindowagg.h"
#include "utils/aes.h"
#include "utils/builtins.h"
#ifdef PGXC
#include "pgxc/execRemote.h"
#endif
#include "distributelayer/streamMain.h"
#include "pgxc/pgxc.h"
#include "securec.h"
#include "gstrace/gstrace_infra.h"
#include "gstrace/executer_gstrace.h"
#include "executor/nodeGD.h"
#include "executor/nodeKMeans.h"
#define NODENAMELEN 64
/*
* Function to determine a plannode should be processed in stub-routine when exec_nodes
* does not match current DN.
*
* The term of "processed in stub" means we need let ExecNodeInit() bypass the actual
* initilaization work like open scanrel, instead allow NodeInit work to continue on its
* lefttree/righttree
*/
bool NeedStubExecution(Plan* plan)
{
#ifndef ENABLE_MULTIPLE_NODES
return false;
#endif
/* If a plan node is under recursive union, we don't consider stub execution */
if (EXEC_IN_RECURSIVE_MODE(plan)) {
return false;
}
/* First, determine if this plan step needs excution on current dn */
if (NeedExecute(plan)) {
return false;
}
/* Second, determine if this plan step need stub processing */
switch (nodeTag(plan)) {
case T_ModifyTable:
case T_VecModifyTable:
case T_Scan:
case T_SeqScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapIndexScan:
case T_BitmapHeapScan:
case T_TidScan:
case T_CStoreScan:
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScan:
#endif
case T_CStoreIndexScan:
case T_CStoreIndexCtidScan:
case T_CStoreIndexHeapScan:
case T_SubqueryScan:
case T_FunctionScan:
return true;
default:
return false;
}
}
/*
* not need execute active sql if the datanode don't run in multi-nodegroup.
*/
static bool NeedExecuteActiveSql(Plan* plan)
{
if ((!IS_PGXC_COORDINATOR) && (!IS_SINGLE_NODE) && false == NeedExecute(plan)) {
return false;
}
return true;
}
static inline bool SeqScanNodeIsStub(SeqScanState* seq_scan)
{
return seq_scan->ss_currentScanDesc == NULL;
}
static inline bool IdxScanNodeIsStub(IndexScanState* index_scan)
{
return index_scan->iss_ScanDesc == NULL;
}
static inline bool IdxOnlyScanNodeIsStub(IndexOnlyScanState* index_only_scan)
{
return index_only_scan->ioss_ScanDesc == NULL;
}
static inline bool BmIdxOnlyScanNodeIsStub(BitmapIndexScanState* bm_index_scan)
{
return bm_index_scan->biss_ScanDesc == NULL;
}
static inline bool BmHeapScanNodeIsStub(BitmapHeapScanState* bm_heap_scan)
{
return bm_heap_scan->ss.ss_currentScanDesc == NULL;
}
PlanState* ExecInitNodeByType(Plan* node, EState* estate, int eflags)
{
switch (nodeTag(node)) {
case T_BaseResult:
return (PlanState*)ExecInitResult((BaseResult*)node, estate, eflags);
case T_ModifyTable:
return (PlanState*)ExecInitModifyTable((ModifyTable*)node, estate, eflags);
case T_Append:
return (PlanState*)ExecInitAppend((Append*)node, estate, eflags);
case T_MergeAppend:
return (PlanState*)ExecInitMergeAppend((MergeAppend*)node, estate, eflags);
case T_RecursiveUnion:
return (PlanState*)ExecInitRecursiveUnion((RecursiveUnion*)node, estate, eflags);
case T_BitmapAnd:
return (PlanState*)ExecInitBitmapAnd((BitmapAnd*)node, estate, eflags);
case T_BitmapOr:
return (PlanState*)ExecInitBitmapOr((BitmapOr*)node, estate, eflags);
case T_SeqScan:
return (PlanState*)ExecInitSeqScan((SeqScan*)node, estate, eflags);
case T_IndexScan:
return (PlanState*)ExecInitIndexScan((IndexScan*)node, estate, eflags);
case T_IndexOnlyScan:
return (PlanState*)ExecInitIndexOnlyScan((IndexOnlyScan*)node, estate, eflags);
case T_BitmapIndexScan:
return (PlanState*)ExecInitBitmapIndexScan((BitmapIndexScan*)node, estate, eflags);
case T_BitmapHeapScan:
return (PlanState*)ExecInitBitmapHeapScan((BitmapHeapScan*)node, estate, eflags);
case T_TidScan:
return (PlanState*)ExecInitTidScan((TidScan*)node, estate, eflags);
case T_SubqueryScan:
return (PlanState*)ExecInitSubqueryScan((SubqueryScan*)node, estate, eflags);
case T_FunctionScan:
return (PlanState*)ExecInitFunctionScan((FunctionScan*)node, estate, eflags);
case T_ValuesScan:
return (PlanState*)ExecInitValuesScan((ValuesScan*)node, estate, eflags);
case T_CteScan:
return (PlanState*)ExecInitCteScan((CteScan*)node, estate, eflags);
case T_WorkTableScan:
return (PlanState*)ExecInitWorkTableScan((WorkTableScan*)node, estate, eflags);
case T_ForeignScan:
return (PlanState*)ExecInitForeignScan((ForeignScan*)node, estate, eflags);
case T_ExtensiblePlan:
return (PlanState*)ExecInitExtensiblePlan((ExtensiblePlan*)node, estate, eflags);
case T_NestLoop:
return (PlanState*)ExecInitNestLoop((NestLoop*)node, estate, eflags);
case T_MergeJoin:
return (PlanState*)ExecInitMergeJoin((MergeJoin*)node, estate, eflags);
case T_HashJoin:
return (PlanState*)ExecInitHashJoin((HashJoin*)node, estate, eflags);
case T_Material:
return (PlanState*)ExecInitMaterial((Material*)node, estate, eflags);
case T_Sort:
return (PlanState*)ExecInitSort((Sort*)node, estate, eflags);
case T_Group:
return (PlanState*)ExecInitGroup((Group*)node, estate, eflags);
case T_Agg:
return (PlanState*)ExecInitAgg((Agg*)node, estate, eflags);
case T_WindowAgg:
return (PlanState*)ExecInitWindowAgg((WindowAgg*)node, estate, eflags);
case T_Unique:
return (PlanState*)ExecInitUnique((Unique*)node, estate, eflags);
case T_Hash:
return (PlanState*)ExecInitHash((Hash*)node, estate, eflags);
case T_SetOp:
return (PlanState*)ExecInitSetOp((SetOp*)node, estate, eflags);
case T_LockRows:
return (PlanState*)ExecInitLockRows((LockRows*)node, estate, eflags);
case T_Limit:
return (PlanState*)ExecInitLimit((Limit*)node, estate, eflags);
case T_PartIterator:
return (PlanState*)ExecInitPartIterator((PartIterator*)node, estate, eflags);
case T_Stream:
return (PlanState*)ExecInitStream((Stream*)node, estate, eflags);
#ifdef PGXC
case T_RemoteQuery:
return (PlanState*)ExecInitRemoteQuery((RemoteQuery*)node, estate, eflags);
#endif
case T_VecToRow:
return (PlanState*)ExecInitVecToRow((VecToRow*)node, estate, eflags);
case T_RowToVec:
return (PlanState*)ExecInitRowToVec((RowToVec*)node, estate, eflags);
case T_VecRemoteQuery:
return (PlanState*)ExecInitVecRemoteQuery((VecRemoteQuery*)node, estate, eflags);
case T_VecStream:
return (PlanState*)ExecInitVecStream((Stream*)node, estate, eflags);
case T_CStoreScan:
return (PlanState*)ExecInitCStoreScan((CStoreScan*)node, NULL, estate, eflags);
case T_DfsScan:
return (PlanState*)ExecInitDfsScan((DfsScan*)node, NULL, estate, eflags);
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScan:
return (PlanState*)ExecInitTsStoreScan((TsStoreScan *)node, NULL, estate, eflags);
#endif /* ENABLE_MULTIPLE_NODES */
case T_VecHashJoin:
return (PlanState*)ExecInitVecHashJoin((VecHashJoin*)node, estate, eflags);
case T_VecAgg:
return (PlanState*)ExecInitVecAggregation((VecAgg*)node, estate, eflags);
case T_DfsIndexScan:
return (PlanState*)ExecInitDfsIndexScan((DfsIndexScan*)node, estate, eflags);
case T_CStoreIndexScan:
return (PlanState*)ExecInitCstoreIndexScan((CStoreIndexScan*)node, estate, eflags);
case T_CStoreIndexCtidScan:
return (PlanState*)ExecInitCstoreIndexCtidScan((CStoreIndexCtidScan*)node, estate, eflags);
case T_CStoreIndexHeapScan:
return (PlanState*)ExecInitCstoreIndexHeapScan((CStoreIndexHeapScan*)node, estate, eflags);
case T_CStoreIndexAnd:
return (PlanState*)ExecInitCstoreIndexAnd((CStoreIndexAnd*)node, estate, eflags);
case T_CStoreIndexOr:
return (PlanState*)ExecInitCstoreIndexOr((CStoreIndexOr*)node, estate, eflags);
case T_VecSort:
return (PlanState*)ExecInitVecSort((Sort*)node, estate, eflags);
case T_VecMaterial:
return (PlanState*)ExecInitVecMaterial((VecMaterial*)node, estate, eflags);
case T_VecResult:
return (PlanState*)ExecInitVecResult((VecResult*)node, estate, eflags);
case T_VecSubqueryScan:
return (PlanState*)ExecInitVecSubqueryScan((VecSubqueryScan*)node, estate, eflags);
case T_VecForeignScan:
return (PlanState*)ExecInitVecForeignScan((VecForeignScan*)node, estate, eflags);
case T_VecModifyTable:
return (PlanState*)ExecInitVecModifyTable((VecModifyTable*)node, estate, eflags);
case T_VecPartIterator:
return (PlanState*)ExecInitVecPartIterator((VecPartIterator*)node, estate, eflags);
case T_VecAppend:
return (PlanState*)ExecInitVecAppend((VecAppend*)node, estate, eflags);
case T_VecGroup:
return (PlanState*)ExecInitVecGroup((VecGroup*)node, estate, eflags);
case T_VecLimit:
return (PlanState*)ExecInitVecLimit((VecLimit*)node, estate, eflags);
case T_VecUnique:
return (PlanState*)ExecInitVecUnique((VecUnique*)node, estate, eflags);
case T_VecSetOp:
return (PlanState*)ExecInitVecSetOp((VecSetOp*)node, estate, eflags);
case T_VecNestLoop:
return (PlanState*)ExecInitVecNestLoop((VecNestLoop*)node, estate, eflags);
case T_VecMergeJoin:
return (PlanState*)ExecInitVecMergeJoin((VecMergeJoin*)node, estate, eflags);
case T_VecWindowAgg:
return (PlanState*)ExecInitVecWindowAgg((VecWindowAgg*)node, estate, eflags);
case T_GradientDescent:
return (PlanState*)ExecInitGradientDescent((GradientDescent*)node, estate, eflags);
case T_KMeans:
return (PlanState*)ExecInitKMeans((KMeans*)node, estate, eflags);
default:
ereport(ERROR,
(errmodule(MOD_EXECUTOR),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized node type: %d when initializing executor.", (int)nodeTag(node))));
return NULL; /* keep compiler quiet */
}
}
void ExecInitNodeSubPlan(Plan* node, EState* estate, PlanState* result)
{
List* sub_ps = NIL;
ListCell* l = NULL;
foreach (l, node->initPlan) {
SubPlan* sub_plan = (SubPlan*)lfirst(l);
SubPlanState* s_state = NULL;
if (NULL == sub_plan)
continue;
Assert(IsA(sub_plan, SubPlan));
#ifdef ENABLE_MULTIPLE_NODES
if (IS_PGXC_COORDINATOR || estate->es_subplan_ids == NIL ||
#else
if (StreamTopConsumerAmI() || estate->es_subplan_ids == NIL ||
#endif
node->plan_node_id == list_nth_int(estate->es_subplan_ids, sub_plan->plan_id - 1)) {
s_state = ExecInitSubPlan(sub_plan, result);
if (s_state->planstate)
sub_ps = lappend(sub_ps, s_state);
}
}
result->initPlan = sub_ps;
}
/* ------------------------------------------------------------------------
* ExecInitNode
*
* Recursively initializes all the nodes in the plan tree rooted
* at 'node'.
*
* Inputs:
* 'node' is the current node of the plan produced by the query planner
* 'estate' is the shared execution state for the plan tree
* 'eflags' is a bitwise OR of flag bits described in executor.h
*
* Returns a PlanState node corresponding to the given Plan node.
* ------------------------------------------------------------------------
*/
PlanState* ExecInitNode(Plan* node, EState* estate, int e_flags)
{
PlanState* result = NULL;
MemoryContext old_context;
MemoryContext node_context;
MemoryContext query_context;
char context_name[NODENAMELEN];
int rc = 0;
/*
* do nothing when we get to the end of a leaf on tree.
*/
if (node == NULL) {
return NULL;
}
gstrace_entry(GS_TRC_ID_ExecInitNode);
if (!StreamTopConsumerAmI())
rc = snprintf_s(context_name,
NODENAMELEN,
NODENAMELEN - 1,
"%s_%lu",
nodeTagToString(nodeTag(node)),
t_thrd.proc_cxt.MyProcPid);
else
rc = snprintf_s(context_name,
NODENAMELEN,
NODENAMELEN - 1,
"%s_%lu_%d",
nodeTagToString(nodeTag(node)),
estate->es_plannedstmt->queryId,
node->plan_node_id);
securec_check_ss(rc, "", "");
/*
* Create working memory for expression evaluation in this context.
*/
node_context = AllocSetContextCreate(estate->es_const_query_cxt,
context_name,
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
query_context = estate->es_query_cxt;
// reassign the node context as we must run under this context.
estate->es_query_cxt = node_context;
/* Switch to Node Level Memory Context */
old_context = MemoryContextSwitchTo(node_context);
/*
* Check whether this 'plan node' needs be processed in current DN exec_nodes,
* skip real initialization if it is not in exec-nodes
*
* Note: We only have to do such kind of specialy pocessing in some plan nodes
*/
if (unlikely(IS_PGXC_DATANODE && NeedStubExecution(node))) {
result = (PlanState*)ExecInitNodeStubNorm(node, estate, e_flags);
} else {
result = ExecInitNodeByType(node, estate, e_flags);
}
/* Set the nodeContext */
result->nodeContext = node_context;
/*
* Initialize any initPlans present in this node. The planner put them in
* a separate list for us.
*/
/*
* We initialize subplan node on coordinator (for explain) or one dn thread
* that executes the subplan
*/
ExecInitNodeSubPlan(node, estate, result);
/* Set up instrumentation for this node if requested */
if (estate->es_instrument != INSTRUMENT_NONE) {
#ifdef ENABLE_MULTIPLE_NODES
/*
* "plan_node_id == 0" is special case, "with recursive + hdfs foreign table"
* will lead to plan_node_id of all plan node in subplan are zero.
* u_sess->instr_cxt.thread_instr->allocInstrSlot only return the instrArray->instr->instrPlanData
* which has allocated in threadinstrumentation.
*/
if (u_sess->instr_cxt.global_instr != NULL && u_sess->instr_cxt.thread_instr && node->plan_node_id > 0 &&
IS_PGXC_COORDINATOR && StreamTopConsumerAmI()) {
/* on compute pool */
result->instrument = u_sess->instr_cxt.thread_instr->allocInstrSlot(
node->plan_node_id, node->parent_node_id, result->plan, estate);
} else if (u_sess->instr_cxt.global_instr != NULL && u_sess->instr_cxt.thread_instr && node->plan_node_id > 0 &&
(IS_PGXC_DATANODE || (IS_PGXC_COORDINATOR && node->exec_type == EXEC_ON_COORDS))) {
/* plannode(exec on cn)or dn */
result->instrument = u_sess->instr_cxt.thread_instr->allocInstrSlot(
node->plan_node_id, node->parent_node_id, result->plan, estate);
} else {
/* on MPPDB CN */
result->instrument = InstrAlloc(1, estate->es_instrument);
}
#else
if (u_sess->instr_cxt.global_instr != NULL && u_sess->instr_cxt.thread_instr && node->plan_node_id > 0 &&
(!StreamTopConsumerAmI() ||
u_sess->instr_cxt.global_instr->get_planIdOffsetArray()[node->plan_node_id - 1] == 0)) {
result->instrument = u_sess->instr_cxt.thread_instr->allocInstrSlot(
node->plan_node_id, node->parent_node_id, result->plan, estate);
} else {
result->instrument = InstrAlloc(1, estate->es_instrument);
}
#endif
if (result->instrument) {
result->instrument->memoryinfo.nodeContext = node_context;
if (u_sess->attr.attr_resource.use_workload_manager &&
u_sess->attr.attr_resource.resource_track_level == RESOURCE_TRACK_OPERATOR &&
estate->es_can_realtime_statistics && u_sess->exec_cxt.need_track_resource &&
NeedExecuteActiveSql(node)) {
Qpid qid;
qid.plannodeid = node->plan_node_id;
qid.procId = u_sess->instr_cxt.gs_query_id->procId;
qid.queryId = u_sess->instr_cxt.gs_query_id->queryId;
int plan_dop = node->parallel_enabled ? u_sess->opt_cxt.query_dop : 1;
result->instrument->dop = plan_dop;
int64 plan_rows = e_rows_convert_to_int64(node->plan_rows);
if (nodeTag(node) == T_VecAgg &&
((Agg*)node)->aggstrategy == AGG_HASHED && ((VecAgg*)node)->is_sonichash) {
ExplainCreateDNodeInfoOnDN(&qid,
result->instrument,
node->exec_type == EXEC_ON_DATANODES,
"VectorSonicHashAgg",
plan_dop,
plan_rows);
} else if (nodeTag(node) == T_VecHashJoin && ((HashJoin*)node)->isSonicHash) {
ExplainCreateDNodeInfoOnDN(&qid,
result->instrument,
node->exec_type == EXEC_ON_DATANODES,
"VectorSonicHashJoin",
plan_dop,
plan_rows);
} else {
ExplainCreateDNodeInfoOnDN(&qid,
result->instrument,
node->exec_type == EXEC_ON_DATANODES,
nodeTagToString(nodeTag(node)),
plan_dop,
plan_rows);
}
}
}
}
/* Switch to OldContext */
MemoryContextSwitchTo(old_context);
/* restore the per query context */
estate->es_query_cxt = query_context;
result->ps_rownum = 0;
gstrace_exit(GS_TRC_ID_ExecInitNode);
return result;
}
TupleTableSlot* ExecProcNodeByType(PlanState* node)
{
TupleTableSlot* result = NULL;
switch (nodeTag(node)) {
case T_ResultState:
return ExecResult((ResultState*)node);
case T_ModifyTableState:
case T_DistInsertSelectState:
return ExecModifyTable((ModifyTableState*)node);
case T_AppendState:
return ExecAppend((AppendState*)node);
case T_MergeAppendState:
return ExecMergeAppend((MergeAppendState*)node);
case T_RecursiveUnionState:
return ExecRecursiveUnion((RecursiveUnionState*)node);
case T_SeqScanState:
return ExecSeqScan((SeqScanState*)node);
case T_IndexScanState:
return ExecIndexScan((IndexScanState*)node);
case T_IndexOnlyScanState:
return ExecIndexOnlyScan((IndexOnlyScanState*)node);
case T_BitmapHeapScanState:
return ExecBitmapHeapScan((BitmapHeapScanState*)node);
case T_TidScanState:
return ExecTidScan((TidScanState*)node);
case T_SubqueryScanState:
return ExecSubqueryScan((SubqueryScanState*)node);
case T_FunctionScanState:
return ExecFunctionScan((FunctionScanState*)node);
case T_ValuesScanState:
return ExecValuesScan((ValuesScanState*)node);
case T_CteScanState:
return ExecCteScan((CteScanState*)node);
case T_WorkTableScanState:
return ExecWorkTableScan((WorkTableScanState*)node);
case T_ForeignScanState:
return ExecForeignScan((ForeignScanState*)node);
case T_ExtensiblePlanState:
return ExecExtensiblePlan((ExtensiblePlanState*)node);
/*
* join nodes
*/
case T_NestLoopState:
return ExecNestLoop((NestLoopState*)node);
case T_MergeJoinState:
return ExecMergeJoin((MergeJoinState*)node);
case T_HashJoinState:
return ExecHashJoin((HashJoinState*)node);
/*
* partition iterator node
*/
case T_PartIteratorState:
return ExecPartIterator((PartIteratorState*)node);
/*
* materialization nodes
*/
case T_MaterialState:
return ExecMaterial((MaterialState*)node);
case T_SortState:
return ExecSort((SortState*)node);
case T_GroupState:
return ExecGroup((GroupState*)node);
case T_AggState:
return ExecAgg((AggState*)node);
case T_WindowAggState:
return ExecWindowAgg((WindowAggState*)node);
case T_UniqueState:
return ExecUnique((UniqueState*)node);
case T_HashState:
return ExecHash();
case T_SetOpState:
return ExecSetOp((SetOpState*)node);
case T_LockRowsState:
return ExecLockRows((LockRowsState*)node);
case T_LimitState:
return ExecLimit((LimitState*)node);
case T_VecToRowState:
return ExecVecToRow((VecToRowState*)node);
#ifdef PGXC
case T_RemoteQueryState:
t_thrd.pgxc_cxt.GlobalNetInstr = node->instrument;
result = ExecRemoteQuery((RemoteQueryState*)node);
t_thrd.pgxc_cxt.GlobalNetInstr = NULL;
return result;
#endif
case T_StreamState:
t_thrd.pgxc_cxt.GlobalNetInstr = node->instrument;
result = ExecStream((StreamState*)node);
t_thrd.pgxc_cxt.GlobalNetInstr = NULL;
return result;
case T_GradientDescentState:
return ExecGradientDescent((GradientDescentState*)node);
case T_KMeansState:
return ExecKMeans((KMeansState*)node);
default:
ereport(ERROR,
(errmodule(MOD_EXECUTOR),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized node type: %d when executing executor node.", (int)nodeTag(node))));
return NULL;
}
}
void ExecProcNodeInstr(PlanState* node, TupleTableSlot* result)
{
switch (nodeTag(node)) {
case T_ModifyTableState:
case T_DistInsertSelectState:
instr_time first_tuple;
INSTR_TIME_SET_ZERO(first_tuple);
INSTR_TIME_ACCUM_DIFF(
first_tuple, ((ModifyTableState*)node)->first_tuple_modified, node->instrument->starttime);
/*
* If the value of es_last_processed is zero means the value of es_processed
* just come from current operator. If not means the value of es_processed
* come from current operator and other operator, es_processed minus
* es_last_processed is tuples processed of curent operator when modify
* the hdfs table, which may include modify the main table and modify the
* detla table, in this case, the value of es_processed will be set twice,
* resulting in error row value for modify operator in explain command.
*/
if (node->state->es_last_processed == 0) {
InstrStopNode(node->instrument, node->state->es_processed);
} else {
InstrStopNode(node->instrument, node->state->es_processed - node->state->es_last_processed);
}
node->state->es_last_processed = node->state->es_processed;
node->instrument->firsttuple = INSTR_TIME_GET_DOUBLE(first_tuple);
break;
default:
InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
break;
}
node->instrument->memoryinfo.operatorMemory = SET_NODEMEM(node->plan->operatorMemKB[0], node->plan->dop);
if (TupIsNull(result))
node->instrument->status = true;
}
/* ----------------------------------------------------------------
* ExecProcNode
*
* Execute the given node to return a(nother) tuple.
* ----------------------------------------------------------------
*/
TupleTableSlot* ExecProcNode(PlanState* node)
{
TupleTableSlot* result = NULL;
CHECK_FOR_INTERRUPTS();
MemoryContext old_context;
/* Response to stop or cancel signal. */
if (unlikely(executorEarlyStop())) {
return NULL;
}
/* Switch to Node Level Memory Context */
old_context = MemoryContextSwitchTo(node->nodeContext);
if (node->chgParam != NULL) { /* something changed */
ExecReScan(node); /* let ReScan handle this */
}
if (node->instrument != NULL) {
InstrStartNode(node->instrument);
}
if (unlikely(planstate_need_stub(node))) {
result = ExecProcNodeStub(node);
} else {
result = ExecProcNodeByType(node);
}
if (node->instrument != NULL) {
ExecProcNodeInstr(node, result);
}
MemoryContextSwitchTo(old_context);
node->ps_rownum++;
return result;
}
/* ----------------------------------------------------------------
* MultiExecProcNode
*
* Execute a node that doesn't return individual tuples
* (it might return a hashtable, bitmap, etc). Caller should
* check it got back the expected kind of Node.
*
* This has essentially the same responsibilities as ExecProcNode,
* but it does not do InstrStartNode/InstrStopNode (mainly because
* it can't tell how many returned tuples to count). Each per-node
* function must provide its own instrumentation support.
* ----------------------------------------------------------------
*/
Node* MultiExecProcNode(PlanState* node)
{
Node* result = NULL;
MemoryContext old_context;
CHECK_FOR_INTERRUPTS();
/* Switch to Node Level Memory Context */
old_context = MemoryContextSwitchTo(node->nodeContext);
if (node->chgParam != NULL) { /* something changed */
ExecReScan(node); /* let ReScan handle this */
}
switch (nodeTag(node)) {
/*
* Only node types that actually support multiexec will be listed
*/
case T_HashState:
result = MultiExecHash((HashState*)node);
break;
case T_BitmapIndexScanState:
result = MultiExecBitmapIndexScan((BitmapIndexScanState*)node);
break;
case T_BitmapAndState:
result = MultiExecBitmapAnd((BitmapAndState*)node);
break;
case T_BitmapOrState:
result = MultiExecBitmapOr((BitmapOrState*)node);
break;
default:
ereport(ERROR,
(errmodule(MOD_EXECUTOR),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized node type: %d when executing multi executor node.", (int)nodeTag(node))));
result = NULL;
break;
}
/* Print Operator Memory for Hash operator */
if (node->instrument) {
node->instrument->memoryinfo.operatorMemory = node->plan->operatorMemKB[0];
}
MemoryContextSwitchTo(old_context);
return result;
}
void ExplainNodePending(PlanState* result_plan)
{
if (!u_sess->attr.attr_resource.use_workload_manager ||
u_sess->attr.attr_resource.resource_track_level != RESOURCE_TRACK_OPERATOR || result_plan == NULL) {
return;
}
if ((!IS_PGXC_COORDINATOR || IsConnFromCoord()) && !IS_SINGLE_NODE) {
return;
}
bool has_found = false;
Qpid qid;
int rc = 0;
qid.procId = u_sess->instr_cxt.gs_query_id->procId;
qid.queryId = u_sess->instr_cxt.gs_query_id->queryId;
qid.plannodeid = result_plan->plan->plan_node_id;
if (IsQpidInvalid(&qid)) {
return;
}
uint32 hash_code = GetHashPlanCode(&qid, sizeof(Qpid));
LockOperHistHashPartition(hash_code, LW_EXCLUSIVE);
ExplainDNodeInfo* p_detail =
(ExplainDNodeInfo*)hash_search(g_operator_table.collected_info_hashtbl, &qid, HASH_ENTER, &has_found);
if (p_detail != NULL) {
if (has_found) {
ereport(LOG,
(errmsg("History Trace Error: The new information has the same hash key as the existed record in "
"the hash table, which is not expected.")));
}
rc = memset_s(p_detail, sizeof(ExplainDNodeInfo), 0, sizeof(ExplainDNodeInfo));
securec_check(rc, "\0", "\0");
p_detail->qid = qid;
p_detail->status = Operator_Pending;
} else {
ereport(LOG, (errmsg("History Trace Error: Cannot alloc memory, out of memory!")));
}
UnLockOperHistHashPartition(hash_code);
}
void ExplainNodeFinish(PlanState* result_plan, PlannedStmt *pstmt, TimestampTz current_time, bool is_pending)
{
if (!u_sess->attr.attr_resource.use_workload_manager ||
u_sess->attr.attr_resource.resource_track_level != RESOURCE_TRACK_OPERATOR || result_plan == NULL ||
!NeedExecuteActiveSql(result_plan->plan)) {
return;
}
if (result_plan->instrument != NULL && result_plan->state->es_can_history_statistics) {
int plan_dop = result_plan->instrument->dop;
if (is_pending) {
ExplainNodePending(result_plan);
} else {
int64 plan_rows = e_rows_convert_to_int64(result_plan->plan->plan_rows);
Plan* node = result_plan->plan;
char *plan_name = NULL;
if (nodeTag(node) == T_VecAgg && ((Agg*)node)->aggstrategy == AGG_HASHED && ((VecAgg*)node)->is_sonichash) {
plan_name = "VectorSonicHashAgg";
} else if (nodeTag(node) == T_VecHashJoin && ((HashJoin*)node)->isSonicHash) {
plan_name = "VectorSonicHashJoin";
} else {
plan_name = nodeTagToString(nodeTag(node));
}
OperatorPlanInfo* opt_plan_info = NULL;
#ifndef ENABLE_MULTIPLE_NODES
if (pstmt != NULL)
opt_plan_info = ExtractOperatorPlanInfo(result_plan, pstmt);
#endif /* ENABLE_MULTIPLE_NODES */
ExplainSetSessionInfo(result_plan->plan->plan_node_id,
result_plan->instrument,
result_plan->plan->exec_type == EXEC_ON_DATANODES,
plan_name,
plan_dop,
plan_rows,
current_time,
opt_plan_info);
}
}
switch (nodeTag(result_plan->plan)) {
case T_MergeAppend:
case T_VecMergeAppend: {
MergeAppendState* ma = (MergeAppendState*)result_plan;
for (int i = 0; i < ma->ms_nplans; i++) {
PlanState* plan = ma->mergeplans[i];
ExplainNodeFinish(plan, pstmt, current_time, is_pending);
}
} break;
case T_Append:
case T_VecAppend: {
AppendState* append = (AppendState*)result_plan;
for (int i = 0; i < append->as_nplans; i++) {
PlanState* plan = append->appendplans[i];
ExplainNodeFinish(plan, pstmt, current_time, is_pending);
}
} break;
case T_ModifyTable:
case T_VecModifyTable: {
ModifyTableState* mt = (ModifyTableState*)result_plan;
for (int i = 0; i < mt->mt_nplans; i++) {
PlanState* plan = mt->mt_plans[i];
ExplainNodeFinish(plan, pstmt, current_time, is_pending);
}
} break;
case T_SubqueryScan:
case T_VecSubqueryScan: {
SubqueryScanState* ss = (SubqueryScanState*)result_plan;
if (ss->subplan)
ExplainNodeFinish(ss->subplan, pstmt, current_time, is_pending);
} break;
case T_BitmapAnd:
case T_CStoreIndexAnd: {
BitmapAndState* ba = (BitmapAndState*)result_plan;
for (int i = 0; i < ba->nplans; i++) {
PlanState* plan = ba->bitmapplans[i];
ExplainNodeFinish(plan, pstmt, current_time, is_pending);
}
} break;
case T_BitmapOr:
case T_CStoreIndexOr: {
BitmapOrState* bo = (BitmapOrState*)result_plan;
for (int i = 0; i < bo->nplans; i++) {
PlanState* plan = bo->bitmapplans[i];
ExplainNodeFinish(plan, pstmt, current_time, is_pending);
}
} break;
default:
if (result_plan->lefttree)
ExplainNodeFinish(result_plan->lefttree, pstmt, current_time, is_pending);
if (result_plan->righttree)
ExplainNodeFinish(result_plan->righttree, pstmt, current_time, is_pending);
break;
}
ListCell* lst = NULL;
foreach (lst, result_plan->initPlan) {
SubPlanState* sps = (SubPlanState*)lfirst(lst);
if (sps->planstate == NULL) {
continue;
}
ExplainNodeFinish(sps->planstate, pstmt, current_time, is_pending);
}
foreach (lst, result_plan->subPlan) {
SubPlanState* sps = (SubPlanState*)lfirst(lst);
if (sps->planstate == NULL) {
continue;
}
ExplainNodeFinish(sps->planstate, pstmt, current_time, is_pending);
}
}
/*
* Target : clean up sensitive information used in encryption or decryption.
* Input : NA
* Output : NA
*/
void cleanup_sensitive_information()
{
/* used derive_keys and user_key in decryption. */
extern THR_LOCAL bool decryption_function_call;
extern THR_LOCAL unsigned char derive_vector_used[NUMBER_OF_SAVED_DERIVEKEYS][RANDOM_LEN];
extern THR_LOCAL unsigned char mac_vector_used[NUMBER_OF_SAVED_DERIVEKEYS][RANDOM_LEN];
extern THR_LOCAL unsigned char user_input_used[NUMBER_OF_SAVED_DERIVEKEYS][RANDOM_LEN];
/* used derive_keys and user_key in encryption. */
extern THR_LOCAL bool encryption_function_call;
extern THR_LOCAL unsigned char derive_vector_saved[RANDOM_LEN];
extern THR_LOCAL unsigned char mac_vector_saved[RANDOM_LEN];
extern THR_LOCAL unsigned char input_saved[RANDOM_LEN];
if (encryption_function_call == true) {
CleanupBuffer(derive_vector_saved, RANDOM_LEN);
CleanupBuffer(input_saved, RANDOM_LEN);
CleanupBuffer(mac_vector_saved, RANDOM_LEN);
encryption_function_call = false;
}
if (decryption_function_call == true) {
for (int i = 0; i < NUMBER_OF_SAVED_DERIVEKEYS; ++i) {
CleanupBuffer(derive_vector_used[i], RANDOM_LEN);
CleanupBuffer(user_input_used[i], RANDOM_LEN);
CleanupBuffer(mac_vector_used[i], RANDOM_LEN);
}
decryption_function_call = false;
}
}
/* ----------------------------------------------------------------
* ExecEndNodeByType
*
* Recursively cleans up all the nodes in the plan rooted
* at 'node'.
*
* After this operation, the query plan will not be able to be
* processed any further. This should be called only after
* the query plan has been fully executed.
* ----------------------------------------------------------------
*/
static void ExecEndNodeByType(PlanState* node)
{
/*
* do nothing when we get to the end of a leaf on tree.
*/
/* clean up sensitive information used in encryption or decryption */
/* As for data node, we should end instrument in this function,
* but in coordinator do in the explain function.
*/
/* on the CN of the compute pool */
switch (nodeTag(node)) {
/*
* control nodes
*/
case T_ResultState:
ExecEndResult((ResultState*)node);
break;
case T_ModifyTableState:
case T_DistInsertSelectState:
ExecEndModifyTable((ModifyTableState*)node);
break;
case T_AppendState:
ExecEndAppend((AppendState*)node);
break;
case T_MergeAppendState:
ExecEndMergeAppend((MergeAppendState*)node);
break;
case T_RecursiveUnionState:
ExecEndRecursiveUnion((RecursiveUnionState*)node);
break;
case T_BitmapAndState:
ExecEndBitmapAnd((BitmapAndState*)node);
break;
case T_BitmapOrState:
ExecEndBitmapOr((BitmapOrState*)node);
break;
/*
* scan nodes
*/
case T_SeqScanState:
ExecEndSeqScan((SeqScanState*)node);
break;
case T_CStoreScanState:
ExecEndCStoreScan((CStoreScanState*)node, false);
break;
case T_DfsScanState:
ExecEndDfsScan((DfsScanState*)node);
break;
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScanState:
ExecEndTsStoreScan((TsStoreScanState *)node, false);
break;
#endif /* ENABLE_MULTIPLE_NODES */
case T_IndexScanState:
ExecEndIndexScan((IndexScanState*)node);
break;
case T_DfsIndexScanState:
ExecEndDfsIndexScan((DfsIndexScanState*)node);
break;
case T_CStoreIndexScanState:
ExecEndCstoreIndexScan((CStoreIndexScanState*)node);
break;
case T_IndexOnlyScanState:
ExecEndIndexOnlyScan((IndexOnlyScanState*)node);
break;
case T_BitmapIndexScanState:
ExecEndBitmapIndexScan((BitmapIndexScanState*)node);
break;
case T_BitmapHeapScanState:
ExecEndBitmapHeapScan((BitmapHeapScanState*)node);
break;
case T_TidScanState:
ExecEndTidScan((TidScanState*)node);
break;
case T_SubqueryScanState:
ExecEndSubqueryScan((SubqueryScanState*)node);
break;
case T_FunctionScanState:
ExecEndFunctionScan((FunctionScanState*)node);
break;
case T_ValuesScanState:
ExecEndValuesScan((ValuesScanState*)node);
break;
case T_CteScanState:
ExecEndCteScan((CteScanState*)node);
break;
case T_WorkTableScanState:
ExecEndWorkTableScan((WorkTableScanState*)node);
break;
case T_ForeignScanState:
ExecEndForeignScan((ForeignScanState*)node);
break;
case T_ExtensiblePlanState:
ExecEndExtensiblePlan((ExtensiblePlanState*)node);
break;
case T_PartIteratorState:
ExecEndPartIterator((PartIteratorState*)node);
break;
/*
* join nodes
*/
case T_NestLoopState:
ExecEndNestLoop((NestLoopState*)node);
break;
case T_MergeJoinState:
ExecEndMergeJoin((MergeJoinState*)node);
break;
case T_HashJoinState:
ExecEndHashJoin((HashJoinState*)node);
break;
/*
* materialization nodes
*/
case T_MaterialState:
ExecEndMaterial((MaterialState*)node);
break;
case T_SortState:
ExecEndSort((SortState*)node);
break;
case T_GroupState:
ExecEndGroup((GroupState*)node);
break;
case T_AggState:
ExecEndAgg((AggState*)node);
break;
case T_WindowAggState:
ExecEndWindowAgg((WindowAggState*)node);
break;
case T_UniqueState:
ExecEndUnique((UniqueState*)node);
break;
case T_HashState:
ExecEndHash((HashState*)node);
break;
case T_SetOpState:
ExecEndSetOp((SetOpState*)node);
break;
case T_LockRowsState:
ExecEndLockRows((LockRowsState*)node);
break;
case T_LimitState:
ExecEndLimit((LimitState*)node);
break;
#ifdef PGXC
case T_RemoteQueryState:
ExecEndRemoteQuery((RemoteQueryState*)node);
break;
#endif
case T_StreamState:
ExecEndStream((StreamState*)node);
break;
case T_VecStreamState:
ExecEndVecStream((VecStreamState*)node);
break;
case T_VecToRowState:
ExecEndVecToRow((VecToRowState*)node);
break;
case T_RowToVecState:
ExecEndRowToVec((RowToVecState*)node);
break;
case T_VecHashJoinState:
ExecEndVecHashJoin((VecHashJoinState*)node);
break;
case T_VecAggState:
ExecEndVecAggregation((VecAggState*)node);
break;
case T_VecRemoteQueryState:
ExecEndVecRemoteQuery((VecRemoteQueryState*)node);
break;
case T_VecSortState:
ExecEndVecSort((VecSortState*)node);
break;
case T_VecMaterialState:
ExecEndVecMaterial((VecMaterialState*)node);
break;
case T_VecResultState:
ExecEndVecResult((VecResultState*)node);
break;
case T_CStoreIndexCtidScanState:
ExecEndCstoreIndexCtidScan((CStoreIndexCtidScanState*)node);
break;
case T_CStoreIndexHeapScanState:
ExecEndCstoreIndexHeapScan((CStoreIndexHeapScanState*)node);
break;
case T_CStoreIndexAndState:
ExecEndCstoreIndexAnd((CStoreIndexAndState*)node);
break;
case T_CStoreIndexOrState:
ExecEndCstoreIndexOr((CStoreIndexOrState*)node);
break;
case T_VecSubqueryScanState:
ExecEndVecSubqueryScan((VecSubqueryScanState*)node);
break;
case T_VecModifyTableState:
ExecEndVecModifyTable((VecModifyTableState*)node);
break;
case T_VecPartIteratorState:
ExecEndVecPartIterator((VecPartIteratorState*)node);
break;
case T_VecAppendState:
ExecEndVecAppend((VecAppendState*)node);
break;
case T_VecForeignScanState:
ExecEndVecForeignScan((VecForeignScanState*)node);
break;
case T_VecGroupState:
ExecEndVecGroup((VecGroupState*)node);
break;
case T_VecUniqueState:
ExecEndVecUnique((VecUniqueState*)node);
break;
case T_VecLimitState:
ExecEndVecLimit((VecLimitState*)node);
break;
case T_VecSetOpState:
ExecEndVecSetOp((VecSetOpState*)node);
break;
case T_VecNestLoopState:
ExecEndVecNestLoop((VecNestLoopState*)node);
break;
case T_VecMergeJoinState:
ExecEndVecMergeJoin((VecMergeJoinState*)node);
break;
case T_VecWindowAggState:
ExecEndVecWindowAgg((VecWindowAggState*)node);
break;
case T_GradientDescentState:
ExecEndGradientDescent((GradientDescentState*)node);
break;
case T_KMeansState:
ExecEndKMeans((KMeansState*)node);
break;
default:
ereport(ERROR,
(errmodule(MOD_EXECUTOR),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized node type: %d when ending executor.", (int)nodeTag(node))));
break;
}
}
void ExecEndNode(PlanState* node)
{
if (node == NULL) {
return;
}
cleanup_sensitive_information();
if (node->chgParam != NULL) {
bms_free_ext(node->chgParam);
node->chgParam = NULL;
}
if (node->instrument != NULL) {
if (IS_PGXC_DATANODE) {
InstrEndLoop(node->instrument);
}
if (NeedExecuteActiveSql(node->plan)) {
removeExplainInfo(node->plan->plan_node_id);
}
}
if (node->instrument != NULL && IS_PGXC_COORDINATOR && StreamTopConsumerAmI()) {
InstrEndLoop(node->instrument);
}
if (planstate_need_stub(node)) {
ExecEndNodeStub(node);
return;
}
ExecEndNodeByType(node);
}