/* ------------------------------------------------------------------------- * * nodeStartWithOp.cpp * routines to handle StartWithOp operator a.w.k. START WITH ... CONNECT BY * * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 2021, openGauss Contributors * * * IDENTIFICATION * src/gausskernel/runtime/executor/nodeStartWithOp.cpp * * ------------------------------------------------------------------------- */ #include "postgres.h" #include "knl/knl_variable.h" #include "executor/exec/execdebug.h" #include "executor/node/nodeCtescan.h" #include "optimizer/planner.h" #include "optimizer/var.h" #include "utils/builtins.h" #include "utils/int8.h" #include "utils/cash.h" #include "miscadmin.h" #define SWCB_DEFAULT_NULLSTR "null" typedef enum StartWithOpExecStatus { SWOP_UNKNOWN = 0, SWOP_BUILD = 1, SWOP_EXECUTE, SWOP_FINISH, SWOP_ESCAPE } StartWithOpExecStatus; #define KEY_START_TAG "{" #define KEY_SPLIT_TAG "/" #define KEY_CLOSE_TAG "}" #define ITRCYCLE_CHECK_LIMIT 50 #define DEPTH_CRITERION 100000 #define ITR_MAX_TUPLES 80000 #define MAX_SIBLINGS_TUPLE 1000000000 char* get_typename(Oid typid); static void ProcessPseudoReturnColumns(StartWithOpState *state); static AttrNumber FetchRUItrTargetEntryResno(StartWithOpState *state); /* functions to check and set pseudo column value */ static void CheckIsCycle(StartWithOpState *state, bool *connect_by_iscycle); static void CheckIsLeaf(StartWithOpState *state, bool *connect_by_isleaf); static void UpdatePseudoReturnColumn(StartWithOpState *state, TupleTableSlot *slot, bool isleaf, bool iscycle); /* functions to get/fetch array_key content */ static List *TupleSlotGetKeyArrayList(TupleTableSlot *slot); static List *GetCurrentArrayColArray(const FunctionCallInfo fcinfo, const char *value, char **raw_array_str, bool *isConstArrayList = NULL); static AttrNumber GetInternalArrayAttnum(TupleDesc tupDesc, AttrNumber origVarAttno); static const char *GetPseudoArrayTargetValueString(TupleTableSlot *slot, AttrNumber attnum); static List *CovertInternalArrayStringToList(char *raw_array_str); /* Local functions used in tuple-conversion from RecursiveUnion -> StartWith */ static Datum ConvertRuScanArrayAttr(RecursiveUnionState *rustate, TargetEntry *te, TupleTableSlot *srcSlot, bool inrecursing); static const char *GetCurrentValue(TupleTableSlot *slot, AttrNumber attnum); static const char *GetCurrentWorkTableScanPath(TupleTableSlot *srcSlot, AttrNumber pseudo_attnum); static bytea *GetCurrentSiblingsArrayPath(TupleTableSlot *srcSlot, AttrNumber pseudo_attnum); static const char *GetKeyEntryArrayStr(RecursiveUnionState *state, TupleTableSlot *scanSlot); static bool PRCCanSkip(StartWithOpState *state, int prcType); /* PRC optimization */ static bytea *GetSiblingsKeyEntry(RecursiveUnionState *state, bytea *path); static inline AttrNumber PseudoResnameGetOriginResno(const char *resname) { /* resname is in attr_key/col_.. format */ AttrNumber attno = atoi((char *)resname + strlen("array_col_")); return attno; } static inline void ResetResultSlotAttValueArray(StartWithOpState *state, Datum *values, bool *isnull) { errno_t rc = EOK; /* confirm the same *values* and *isnull* pointer to avoid mis-use */ Assert (state->sw_values != NULL && state->sw_values == values); Assert (state->sw_isnull != NULL && state->sw_isnull == isnull); int natts = list_length(state->ps.plan->targetlist); size_t valuesArraySize = natts * sizeof(Datum); size_t isnullArraySize = natts * sizeof(bool); rc = memset_s(state->sw_values, valuesArraySize, 0, valuesArraySize); securec_check(rc, "\0", "\0"); rc = memset_s(state->sw_isnull, isnullArraySize, (bool)false, isnullArraySize); securec_check(rc, "\0", "\0"); } extern char* pg_strrstr(char* string, const char* subStr); /* * Compare function for any siblings key entry * Offset represent the start offset index in array_siblings pseudo column */ static int SibglingsKeyEntryCmp(bytea* arg1, bytea* arg2, int offset) { int entryIdx = 4; int cmp = 0; char *c1 = VARDATA_ANY(arg1); char *c2 = VARDATA_ANY(arg2); while (entryIdx > 0) { cmp = c1[offset + entryIdx - 1] - c2[offset + entryIdx - 1]; if (cmp != 0) { break; } entryIdx--; } return cmp; } /* * Dummy compare function for array_siblings pseudo column */ extern int SibglingsKeyCmpFast(Datum x, Datum y, SortSupport ssup) { /* * Let SibglingsKey compare function looks like other sort type function. * Now just return 0 to fast array_siblings compare function, and we * do not have any ways to compare them fast because array_siblings type is customized. */ return 0; } /* * Compare function for array_siblings pseudo column */ extern int SibglingsKeyCmp(Datum x, Datum y, SortSupport ssup) { int i = 0; int cmp = 0; const int bucketSize = 4; bytea* arg1 = DatumGetByteaP(x); bytea* arg2 = DatumGetByteaP(y); if (arg1 == NULL || arg2 == NULL) { return cmp; } int len1 = VARSIZE_ANY_EXHDR(arg1); int len2 = VARSIZE_ANY_EXHDR(arg2); int round = Min(len1, len2); round /= bucketSize; while (i < round) { int entryIdx = i * bucketSize; cmp = SibglingsKeyEntryCmp(arg1, arg2, entryIdx); if (cmp != 0) { break; } i++; } if (cmp == 0) { cmp = (len1 < len2) ? -1 : 1; } /* We can't afford to leak memory here. */ if (PointerGetDatum(arg1) != x) pfree_ext(arg1); if (PointerGetDatum(arg2) != y) pfree_ext(arg2); return cmp; } static bool unsupported_filter_walker(Node *node, Node *context_node) { if (node == NULL) { return false; } if (!IsA(node, SubPlan)) { return expression_tree_walker(node, (bool (*)()) unsupported_filter_walker, node); } /* * Currently we do not support subqueries * pushed down to connect-by clauses. */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Unsupported subquery found in connect by clause."))); return true; } /* * -------------------------------------------------------------------------------------- * - EXPORT FUNCTIONS.. * (1). Standard SQL engine exeuctor functions * (2). Helper fucntions that StartWith execution * (3). builtin functions that support StartWith execution * -------------------------------------------------------------------------------------- */ /* * Executor functions * - ExecInitStartWithOp() * - ExecStartWithOp() * - ExecEndStartWithOp() * - ExecReScanStartWithOp() */ StartWithOpState* ExecInitStartWithOp(StartWithOp* node, EState* estate, int eflags) { /* * Error-out unsupported cases before they * actually cause any harm. */ expression_tree_walker((Node*)node->plan.qual, (bool (*)())unsupported_filter_walker, NULL); /* * create state structure */ StartWithOpState *state = makeNode(StartWithOpState); state->ps.plan = (Plan *)node; state->ps.state = estate; state->sw_context = AllocSetContextCreate(CurrentMemoryContext, "StartWith Inner", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &state->ps); state->ps.ps_TupFromTlist = false; /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &state->ps); /* * initialize child expressions */ state->ps.targetlist = (List*)ExecInitExpr((Expr*)node->plan.targetlist, (PlanState*)state); state->ps.qual = (List*)ExecInitExpr((Expr*)node->plan.qual, (PlanState*)state); /* * initialize child nodes */ outerPlanState(state) = ExecInitNode(outerPlan(node), estate, eflags); /* * we don't use inner plan */ Assert(innerPlan(node) == NULL); /* * initialize tuple type and projection info * no relations are involved in nodeResult, set the default * tableAm type to HEAP */ ExecAssignResultTypeFromTL(&state->ps, TAM_HEAP); ExecAssignProjectionInfo(&state->ps, NULL); /* Setup pseudo return column calculation related variables */ /* 1. assisng psduco column targetEntry */ List *targetlist = node->plan.targetlist; ListCell *lc = NULL; TargetEntry *tle = NULL; foreach (lc, targetlist) { tle = (TargetEntry *)lfirst(lc); if (!IsPseudoReturnTargetEntry(tle)) { continue; } StartWithOpColumnType type = GetPseudoColumnType(tle); state->sw_pseudoCols[type] = tle; } /* * 2. setup keyAttnum * * Note: We expect keyEntryList to be non-NIL. */ if (node->keyEntryList != NIL) { TargetEntry *keyTEntry = (TargetEntry *)list_nth(node->internalEntryList, 1); Assert (pg_strncasecmp(keyTEntry->resname, "array_key_", strlen("array_key_")) == 0); state->sw_keyAttnum = keyTEntry->resno; } /* * 3. setup tuplestore, tupleslot and status controlling variables, * * Note: tuple store of backupTable and resultTable is not setup here, * instead we do their intialization in on demand */ state->swop_status = SWOP_BUILD; state->sw_workingTable = tuplestore_begin_heap(false, false, u_sess->attr.attr_memory.work_mem); /* create a temp tuplestore to help isleaf/iscycle calculation */ state->sw_backupTable = tuplestore_begin_heap( false, false, u_sess->attr.attr_memory.work_mem); state->sw_resultTable = tuplestore_begin_heap( false, false, u_sess->attr.attr_memory.work_mem); /* create the working TupleTableslot */ state->sw_workingSlot = ExecAllocTableSlot(&estate->es_tupleTable, TAM_HEAP); ExecSetSlotDescriptor(state->sw_workingSlot, ExecTypeFromTL(targetlist, false)); int natts = list_length(node->plan.targetlist); state->sw_values = (Datum *)palloc0(natts * sizeof(Datum)); state->sw_isnull= (bool *)palloc0(natts * sizeof(bool)); ResetResultSlotAttValueArray(state, state->sw_values, state->sw_isnull); /* Setup other variables to NULL as they will be initialized later */ state->sw_curKeyArrayStr = NULL; state->sw_cycle_rowmarks = NULL; /* Initialize nocycle controling flag */ state->sw_nocycleStopOrderSiblings = false; /* set underlying RecursiveUnionState pointing to current curernt node */ if (node->swoptions->siblings_orderby_clause != NULL) { SortState *sstate = (SortState *)outerPlanState(state); RecursiveUnionState *rustate = (RecursiveUnionState *)outerPlanState(sstate); rustate->swstate = state; } else { RecursiveUnionState *rustate = (RecursiveUnionState *)outerPlanState(state); rustate->swstate = state; } /* init other elements */ state->sw_level = 0; state->sw_numtuples = 0; return state; } bool CheckCycleExeception(StartWithOpState *node, TupleTableSlot *slot) { RecursiveUnionState *rustate = NULL; StartWithOp *swplan = (StartWithOp *)node->ps.plan; bool incycle = false; if (swplan->swoptions->siblings_orderby_clause != NULL) { SortState *sstate = (SortState *)node->ps.lefttree; rustate = (RecursiveUnionState *)sstate->ss.ps.lefttree; } else { rustate = (RecursiveUnionState *)node->ps.lefttree; } Assert (IsA(rustate, RecursiveUnionState)); if (IsConnectByLevelStartWithPlan(swplan) || node->sw_keyAttnum == 0) { /* for connect by level case, we do not do cycle check */ return false; } /* * If current current iteration is greator than cycle check threshold, we are going * to do real check and error out cycle cases, we do not this check at begin is for * performance considerations */ node->sw_curKeyArrayStr = GetPseudoArrayTargetValueString(slot, node->sw_keyAttnum); CheckIsCycle(node, &incycle); /* compatible with ORA behavior, if NOCYCLE is not set, we report error */ if (!swplan->swoptions->nocycle && incycle) { ereport(ERROR, (errmodule(MOD_EXECUTOR), errmsg("START WITH .. CONNECT BY statement runs into cycle exception"))); } /* * Free per-tuple keyArrayStr in nocycle case, normally we need use sw_curKeyArrayStr * to output the cycle information at caller function and let executor stop to free it * eventually */ if (!incycle) { pfree_ext(node->sw_curKeyArrayStr); } return incycle; } /* * Peeking a tuple's connectable descendants for exactly one level. * It seems to be incurring no significant time / memory overhead compared to * BFS CONNECT BY so far. * * This peek is still done by Recursive Union. But its scale is much smaller * for at each iteration we pushes only one tuple into the outer node * so that RU only returns one tuple's connectables. * * Does not support order siblings by yet. */ static List* peekNextLevel(TupleTableSlot* startSlot, PlanState* outerNode, int level) { List* queue = NULL; RecursiveUnionState* rus = (RecursiveUnionState*) outerNode; StartWithOpState *swnode = rus->swstate; /* clean up RU's old working table */ ExecReScan(outerNode); /* pushing the depth-first tuple into RU's working table */ rus->recursing = true; tuplestore_puttupleslot(rus->working_table, startSlot); /* fetch the depth-first tuple's children exactly one level below */ rus->iteration = level; int begin_iteration = rus->iteration; int begin_rowCount = swnode->sw_rownum; TupleTableSlot* srcSlot = NULL; for (;;) { swnode->sw_rownum = begin_rowCount; srcSlot = ExecProcNode(outerNode); if (TupIsNull(srcSlot) || (rus->iteration != begin_iteration)) break; TupleTableSlot* newSlot = MakeSingleTupleTableSlot(srcSlot->tts_tupleDescriptor); newSlot = ExecCopySlot(newSlot, srcSlot); queue = lappend(queue, newSlot); } return queue; } /* * Construct CONNECT BY result set by depth-first order. * * Does not support order siblings by yet. */ static bool depth_first_connect(int currentLevel, StartWithOpState *node, List* queue, int* dfsRowCount) { CHECK_FOR_INTERRUPTS(); bool isCycle = false; if (queue == NULL) { return isCycle; } Tuplestorestate* outputStore = node->sw_workingTable; PlanState* outerNode = outerPlanState(node); /* loop until all siblings' DFS are done */ for (;;) { if (queue->head == NULL || node->swop_status == SWOP_ESCAPE) { return isCycle; } TupleTableSlot* leader = (TupleTableSlot*) lfirst(queue->head); if (leader == NULL || TupIsNull(leader)) { return isCycle; } /* DFS: output the depth-first tuple to result table */ TupleTableSlot* dstSlot = leader; queue->head = queue->head->next; if (CheckCycleExeception(node, dstSlot)) { // cycled slots are not kept isCycle = true; continue; } tuplestore_puttupleslot(outputStore, dstSlot); (*dfsRowCount)++; int rowCountBefore = *dfsRowCount; /* Go into the depth NOW: sibling tuples won't get processed * until all children are done */ node->sw_rownum = rowCountBefore; bool expectCycle = depth_first_connect(currentLevel + 1, node, peekNextLevel(leader, outerNode, currentLevel), dfsRowCount); if (expectCycle) { node->sw_cycle_rowmarks = lappend_int(node->sw_cycle_rowmarks, rowCountBefore); } } return isCycle; } static List* makeStartTuples(StartWithOpState *node) { PlanState* outerNode = outerPlanState(node); List* startWithQueue = NULL; RecursiveUnionState* rus = (RecursiveUnionState*) outerNode; TupleTableSlot* dstSlot = ExecProcNode(outerNode); int begin_iteration = rus->iteration; for (;;) { if (TupIsNull(dstSlot) || rus->iteration != begin_iteration) { break; } TupleTableSlot* newSlot = MakeSingleTupleTableSlot(dstSlot->tts_tupleDescriptor); newSlot = ExecCopySlot(newSlot, dstSlot); startWithQueue = lappend(startWithQueue, newSlot); dstSlot = ExecProcNode(outerNode); } return startWithQueue; } void markSWLevelBegin(StartWithOpState *node) { if (node->ps.instrument == NULL) { return; } gettimeofday(&(node->iterStats.currentStartTime), NULL); } void markSWLevelEnd(StartWithOpState *node, int64 rowCount) { if (node->ps.instrument == NULL) { return; } node->iterStats.totalIters = node->iterStats.totalIters + 1; int bufIndex = node->iterStats.totalIters - 1; if (bufIndex >= SW_LOG_ROWS_FULL) { bufIndex = SW_LOG_ROWS_HALF + (bufIndex % SW_LOG_ROWS_HALF); } node->iterStats.levelBuf[bufIndex] = node->iterStats.totalIters; node->iterStats.rowCountBuf[bufIndex] = rowCount; node->iterStats.startTimeBuf[bufIndex] = node->iterStats.currentStartTime; gettimeofday(&(node->iterStats.endTimeBuf[bufIndex]), NULL); } /* * @Function: ExecStartWithRowLevelQual() * * @Brief: * Check if LEVEL/ROWNUM conditions still hold for the recursion to * continue. Level and rownum should have been made available * in dstSlot by ConvertStartWithOpOutputSlot() already. * * @Input node: The current recursive union node. * @Input slot: The next slot to be scanned into start with operator. * * @Return: Boolean flag to tell if the iteration should continue. */ bool ExecStartWithRowLevelQual(RecursiveUnionState* node, TupleTableSlot* dstSlot) { StartWithOp* swplan = (StartWithOp*)node->swstate->ps.plan; if (!IsConnectByLevelStartWithPlan(swplan)) { return true; } ExprContext* expr = node->swstate->ps.ps_ExprContext; /* * Level and rownum pseudo attributes are extracted from StartWithOpPlan * node so we set filtering tuple as ecxt_scantuple */ expr->ecxt_scantuple = dstSlot; if (!ExecQual(node->swstate->ps.qual, expr, false)) { return false; } return true; } static bool isStoppedByRowNum(RecursiveUnionState* node, TupleTableSlot* slot) { TupleTableSlot* dstSlot = node->swstate->ps.ps_ResultTupleSlot; bool ret = false; /* 1. roll back to the converted result row */ node->swstate->sw_rownum--; /* 2. roll back to one row before execution to check rownum stop condition */ node->swstate->sw_rownum--; dstSlot = ConvertStartWithOpOutputSlot(node->swstate, slot, dstSlot); if (ExecStartWithRowLevelQual(node, dstSlot)) { ret = true; } /* undo the rollback after conversion (yes, just one line) */ node->swstate->sw_rownum++; return ret; } TupleTableSlot* GetStartWithSlot(RecursiveUnionState* node, TupleTableSlot* slot) { TupleTableSlot* dstSlot = node->swstate->ps.ps_ResultTupleSlot; dstSlot = ConvertStartWithOpOutputSlot(node->swstate, slot, dstSlot); if (!ExecStartWithRowLevelQual(node, dstSlot)) { StartWithOpState *swnode = node->swstate; StartWithOp *swplan = (StartWithOp *)swnode->ps.plan; PlanState *outerNode = outerPlanState(swnode); bool isDfsEnabled = swplan->swoptions->nocycle && !IsA(outerNode, SortState); /* * ROWNUM/LEVEL limit reached: * Tell ExecRecursiveUnion to terminate the recursion by returning NULL * * Specifically for ROWNUM limit reached: * Tell DFS routine to stop immediately by setting SW status to ESCAPE. */ node->swstate->swop_status = isDfsEnabled && isStoppedByRowNum(node, slot) ? SWOP_ESCAPE : node->swstate->swop_status; return NULL; } return dstSlot; } TupleTableSlot* ExecStartWithOp(StartWithOpState *node) { TupleTableSlot *dstSlot = node->ps.ps_ResultTupleSlot; PlanState *outerNode = outerPlanState(node); StartWithOp *swplan = (StartWithOp *)node->ps.plan; /* initialize row num count */ node->sw_rownum = 0; Assert (node->sw_workingTable != NULL); /* start processing */ switch (node->swop_status) { case SWOP_BUILD: { Assert (node->sw_workingTable != NULL); markSWLevelBegin(node); bool isDfsEnabled = swplan->swoptions->nocycle && !IsA(outerNode, SortState); if (isDfsEnabled) { /* For nocycle and non-order-siblings cases we use * depth-first connect by to achieve result consistency. */ /* Kick off dfs with StartWith tuples */ int dfsRowCount = 0; depth_first_connect(1, node, makeStartTuples(node), &dfsRowCount); } /* Otherwise, use breadth-first style connect-by routine */ /* Materialize all content to make internal part ready */ for (; !isDfsEnabled;) { /* * We check for interrupts here because infinite loop might happen. */ CHECK_FOR_INTERRUPTS(); /* * The actual executions and conversions are done * in the underlying recursve union node. */ dstSlot = ExecProcNode(outerNode); if (TupIsNull(dstSlot)) { break; } tuplestore_puttupleslot(node->sw_workingTable, dstSlot); /* * check we need stop infinit recursive iteration if NOCYCLE is specified in * ConnectByExpr, then return. Also, in case of cycle-report-error the ereport * is processed inside of CheckCycleException() */ if (CheckCycleExeception(node, dstSlot)) { break; } } /* report we have done material step for current StartWithOp node */ ereport(DEBUG1, (errmodule(MOD_EXECUTOR), errmsg("[SWCB DEBUG] StartWithOp[%d] finish material-step and forward to PRC-step with level:%d rownum_total:%lu", node->ps.plan->plan_node_id, node->sw_level, node->sw_rownum))); /* calculate pseudo output column */ ProcessPseudoReturnColumns(node); /* * After all tuples is stored, we rewind the tuplestore and mark current * StartWithOp node status into EXECUTE */ tuplestore_rescan(node->sw_workingTable); node->swop_status = SWOP_EXECUTE; } /* fall-thru for first time */ case SWOP_EXECUTE: { dstSlot = node->ps.ps_ResultTupleSlot; (void)tuplestore_gettupleslot(node->sw_resultTable, true, false, dstSlot); if (TupIsNull(dstSlot)) { node->swop_status = SWOP_FINISH; } break; } default: { elog(ERROR, "known status status in %s(), with context:%s", __FUNCTION__, nodeToString(node->ps.plan)); } } return (TupleTableSlot *)dstSlot; } void ExecEndStartWithOp(StartWithOpState *node) { tuplestore_clear(node->sw_workingTable); tuplestore_clear(node->sw_backupTable); tuplestore_clear(node->sw_resultTable); node->swop_status = SWOP_UNKNOWN; pfree_ext(node->sw_values); pfree_ext(node->sw_isnull); /* * clean out the upper tuple table */ (void)ExecClearTuple(node->ps.ps_ResultTupleSlot); if (node->sw_context) { MemoryContextDelete(node->sw_context); } ExecEndNode(outerPlanState(node)); return; } void ExecReScanStartWithOp(StartWithOpState *state) { PlanState* outer_plan = outerPlanState(state); (void)ExecClearTuple(state->ps.ps_ResultTupleSlot); if (outer_plan->chgParam == NULL) { ExecReScan(outer_plan); } state->swop_status = SWOP_BUILD; ResetResultSlotAttValueArray(state, state->sw_values, state->sw_isnull); tuplestore_clear(state->sw_workingTable); tuplestore_clear(state->sw_backupTable); tuplestore_clear(state->sw_resultTable); state->sw_nocycleStopOrderSiblings = false; /* init other elements */ state->sw_level = 0; state->sw_numtuples = 0; return; } /* * Helper functions for StartWith Operation */ StartWithOpColumnType GetPseudoColumnType(const char *resname) { StartWithOpColumnType result = SWCOL_UNKNOWN; /* * some plan node's resname is not assign explicit value, so just return and * consider them as regular column */ if (resname == NULL) { return SWCOL_REGULAR; } if (pg_strcasecmp(resname, "level") == 0) { result = SWCOL_LEVEL; } else if (pg_strcasecmp(resname, "connect_by_isleaf") == 0) { result = SWCOL_ISLEAF; } else if (pg_strcasecmp(resname, "connect_by_iscycle") == 0) { result = SWCOL_ISCYCLE; } else if (pg_strcasecmp(resname, "ruitr") == 0) { result = SWCOL_RUITR; } else if (pg_strcasecmp(resname, "array_siblings") == 0) { result = SWCOL_ARRAY_SIBLINGS; } else if (pg_strncasecmp(resname, "array_key_", strlen("array_key_")) == 0) { result = SWCOL_ARRAY_KEY; } else if (pg_strncasecmp(resname, "array_col_", strlen("array_col_")) == 0) { result = SWCOL_ARRAY_COL; } else if (pg_strcasecmp(resname, "rownum") == 0) { result = SWCOL_ROWNUM; } else { result = SWCOL_REGULAR; } return result; } StartWithOpColumnType GetPseudoColumnType(const TargetEntry *entry) { StartWithOpColumnType result = GetPseudoColumnType(entry->resname); return result; } bool IsPseudoReturnColumn(const char *colname) { Assert (colname != NULL); StartWithOpColumnType type = GetPseudoColumnType(colname); return (type == SWCOL_LEVEL || type == SWCOL_ISLEAF || type == SWCOL_ISCYCLE || type == SWCOL_ROWNUM); } /* * - srcSlot: the slot returned from RecursiveUnion (table column + internal TE) * - dstSlot: the slot will return to CteScan (table column + pseudo return colum + internal TE) */ TupleTableSlot *ConvertStartWithOpOutputSlot(StartWithOpState *node, TupleTableSlot *srcSlot, TupleTableSlot *dstSlot) { /* * Process start-with pseodu return columns from pseodu tagetentry * CteScan: col[1......n], level, is_leaf, is_cycle, RUITR, array_name[1,2,...... m] * | pseudo return column| | <<- pseudo internal column ->>| */ HeapTuple tup = ExecMaterializeSlot(srcSlot); HeapTuple tup_new = NULL; TupleDesc srcTupDesc = srcSlot->tts_tupleDescriptor; TupleDesc dstTupDesc = dstSlot->tts_tupleDescriptor; /* Reset internal value/isnull array */ ResetResultSlotAttValueArray(node, node->sw_values, node->sw_isnull); Datum *values = node->sw_values; bool *isnull = node->sw_isnull; /* Reset dstslot and memorycontext */ (void)MemoryContextReset(node->sw_context); MemoryContext oldcxt = MemoryContextSwitchTo(node->sw_context); /* * Step 1, fill StartWithOp's pseudo internal colum * * Note: As the conversion share the same values/isnull array, we do tail->front * element-copy to avoid overlaping */ heap_deform_tuple(tup, srcTupDesc, values, isnull); StartWithOp *swplan = (StartWithOp *)node->ps.plan; RecursiveUnion *ruplan = (RecursiveUnion *)swplan->ruplan; int plist_len = list_length(swplan->internalEntryList); TargetEntry *srcEntry = NULL; TargetEntry *dstEntry = NULL; Assert (list_length(swplan->internalEntryList) == list_length(ruplan->internalEntryList)); for (int i = 0; i < plist_len; i++) { srcEntry = (TargetEntry *)list_nth(ruplan->internalEntryList, plist_len - i - 1); dstEntry = (TargetEntry *)list_nth(swplan->internalEntryList, plist_len - i - 1); values[dstEntry->resno - 1] = values[srcEntry->resno - 1]; isnull[dstEntry->resno - 1] = isnull[srcEntry->resno - 1]; } /* Step2, fill StartWithOp's pseudo return columns */ ListCell *lc = NULL; foreach (lc, swplan->plan.targetlist) { TargetEntry *te = (TargetEntry *)lfirst(lc); if (!IsPseudoReturnTargetEntry(te)) { continue; } if (pg_strcasecmp(te->resname, "level") == 0) { AttrNumber att = FetchRUItrTargetEntryResno(node); int level = DatumGetInt32(values[att - 1]); /* Set level/ruitr value */ if (node->sw_level != level) { node->sw_level = level; node->sw_numtuples = 0; } values[te->resno - 1] = values[att - 1] + 1; isnull[te->resno - 1] = false; } else if (pg_strcasecmp(te->resname, "connect_by_isleaf") == 0) { values[te->resno - 1] = 0; isnull[te->resno - 1] = true; } else if (pg_strcasecmp(te->resname, "connect_by_iscycle") == 0) { values[te->resno - 1] = 0; isnull[te->resno - 1] = true; } else if (pg_strcasecmp(te->resname, "rownum") == 0) { values[te->resno - 1] = node->sw_rownum + 1; isnull[te->resno - 1] = false; } } tup_new = heap_form_tuple(dstTupDesc, values, isnull); dstSlot = ExecStoreTuple(tup_new, dstSlot, InvalidBuffer, false); MemoryContextSwitchTo(oldcxt); /* update row num count before returning */ node->sw_rownum++; node->sw_numtuples++; return dstSlot; } /* * -------------------------------------------------------------------------------------- * -Internal functinos * -------------------------------------------------------------------------------------- */ /* * Build in functions that support start with */ Datum sys_connect_by_path(PG_FUNCTION_ARGS) { Datum resultDatum = (Datum)0; const char *value = NULL; const char *split = NULL; bool constArrayList = false; /* check invalid spliter */ if (PG_GETARG_DATUM(1) == (Datum)0) { elog(ERROR, "illegal parameter in SYS_CONNECT_BY_PATH function"); } if (PG_GETARG_DATUM(0) == (Datum)0) { value = SWCB_DEFAULT_NULLSTR; } else { value = TextDatumGetCString(PG_GETARG_TEXT_PP(0)); } split = TextDatumGetCString(PG_GETARG_TEXT_PP(1)); /* * step[1]: * Fetch current processed ScanTuple and its internal array_col, then convert * it into token_list and do proper calculation for sys_connect_by_path() */ char *raw_array_str = NULL; List *token_list = GetCurrentArrayColArray(fcinfo, value, &raw_array_str, &constArrayList); ListCell *token = NULL; if (!constArrayList) { bool valid = false; foreach (token, token_list) { const char *curValue = (const char *)lfirst(token); if (strcmp(TrimStr(value), TrimStr(curValue)) == 0) { valid = true; break; } } if (!valid) { elog(ERROR, "node value is not in path (value:%s path:%s)", value, raw_array_str); } } /* * step[2]: * start to build result array string */ StringInfoData si; initStringInfo(&si); foreach (token, token_list) { const char *node = (const char *)lfirst(token); appendStringInfo(&si, "%s%s", split, node); } resultDatum = CStringGetTextDatum(si.data); pfree_ext(si.data); PG_RETURN_DATUM(resultDatum); } /* * ledger_hist_check -- check whether user table hash and history table hash are equal * * parameter1: user table name [type: text] * parameter2: namespace of user table [type: text] **/ Datum connect_by_root(PG_FUNCTION_ARGS) { Datum datum = (Datum)0; const char *value = TextDatumGetCString(PG_GETARG_TEXT_PP(0)); bool constArrayList = false; /* * step[1]: * Fetch current processed ScanTuple and its internal array_col, then convert * it into token_list and do proper calculation for connect_by_root() */ char *raw_array_str = NULL; List *token_list = GetCurrentArrayColArray(fcinfo, value, &raw_array_str, &constArrayList); ListCell *lc = NULL; bool valid = false; foreach (lc, token_list) { const char *curValue = (const char *)lfirst(lc); if (strcmp(TrimStr(value), TrimStr(curValue)) == 0) { valid = true; break; } } if (!valid) { elog(ERROR, "node value is not in path (value:%s path:%s)", value, raw_array_str); } /* * step[2]: * start to build result string value */ datum = CStringGetTextDatum((char *)linitial(token_list)); PG_RETURN_DATUM(datum); } /* * -brief: array_key cotent in List format */ static List *TupleSlotGetKeyArrayList(TupleTableSlot *slot) { Assert (slot != NULL); /* 1. find key attnum */ AttrNumber arrayKeyAttno = InvalidAttrNumber; Datum arrayKeyDatum = (Datum)0; TupleDesc tupDesc = slot->tts_tupleDescriptor; HeapTuple tup = ExecFetchSlotTuple(slot); bool isnull = true; for (int n = 0; n < tupDesc->natts; n++) { Form_pg_attribute attr = TupleDescAttr(tupDesc, n); if (pg_strncasecmp(attr->attname.data, "array_key", strlen("arra_key")) == 0) { arrayKeyAttno = n + 1; break; } } if (arrayKeyAttno == InvalidAttrNumber) { elog(ERROR, "Internal array_key is not found for sys_connect_by_path()"); } /* 2. fetch key str */ arrayKeyDatum = heap_getattr(tup, arrayKeyAttno, tupDesc, &isnull); Assert (!isnull && arrayKeyDatum != 0); char *rawKeyArrayStr = TextDatumGetCString(arrayKeyDatum); List *tokenList = CovertInternalArrayStringToList(rawKeyArrayStr); return tokenList;; } /* * -------------------------------------------------------------------------------------- * @Brief: Get current array_col array from current function invoke context "fcinfo", * value returned in List * * @Note: ScanTuple curernt in processing is held under fcinfo pointer, so we don't pass * the attnum/varattno of which array_col_no to fetch, sounds a little bit * "implicit" but deserving so for more convinience * * isConstArrayList is an output parameter, indicate current we are handling a CONST * expression set in sys_connect_by_path()'s first parameter * -------------------------------------------------------------------------------------- */ static List *GetCurrentArrayColArray(const FunctionCallInfo fcinfo, const char *value, char **raw_array_str, bool *isConstArrayList) { List *token_list = NIL; TupleTableSlot *slot = NULL; Var *variable = NULL; *isConstArrayList = false; ExprContext *econtext = (ExprContext *)fcinfo->swinfo.sw_econtext; ExprState *exprstate = (ExprState *)fcinfo->swinfo.sw_exprstate; /* context check */ Assert (econtext != NULL && exprstate != NULL); /* * Oracle "START WITH ... CONNECT BY" only allow single plaint column to be * specified, so the eval-context's argument only have one argument with *Var* * node ported */ List *vars = pull_var_clause((Node*)exprstate->expr, PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS); /* handle case where */ if (vars == NIL) { TupleTableSlot *slot = econtext->ecxt_scantuple; List *keyTokenList = NIL; if (fcinfo->flinfo->fn_oid == CONNECT_BY_ROOT_FUNCOID && TupIsNull(slot)) { /* * By semantic, connect_by_root() aiming to return the root value of current * recursive tree, if there is a const value set as parameter, its hierachy * is considered as /{v}/{v}/{v}..., so simplely return const value itself * * Note: we can do so only for connect_by_root(), sys_connect_by_path is still * processed as /{v}/{v}/{v} */ token_list = list_make1(pstrdup(value)); } else { keyTokenList = TupleSlotGetKeyArrayList(slot); for (int n = 0; n < list_length(keyTokenList); n++) { token_list = lappend(token_list, pstrdup(value)); } } /* mark current list is const array */ *isConstArrayList = true; return token_list; } /* regular case 1st parm is column name */ variable = (Var *)linitial(vars); if (!IsA(variable, Var)) { elog(ERROR, "connect_by_root pass int invalid argument type:%d", nodeTag(exprstate->expr)); } /* * Get the input slot and attribute number we want */ switch (variable->varno) { case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; /* INDEX_VAR is handled by default case */ default: /* get the tuple from the relation being scanned */ slot = econtext->ecxt_scantuple; break; } Assert(slot != NULL); /* error out invalid case */ if (variable->varattno <= 0) { ereport(ERROR, (errcode(ERRCODE_INVALID_ATTRIBUTE), errmodule(MOD_EXECUTOR), errmsg("attribute number %d exceeds number of columns %d", variable->varattno, slot->tts_tupleDescriptor->natts))); } /* Start do tuple content fetching work */ HeapTuple tup = ExecFetchSlotTuple(slot); TupleDesc tupDesc = slot->tts_tupleDescriptor; bool isnull = true; /* context check */ Assert (econtext != NULL && exprstate != NULL && !TupIsNull(slot)); /* step 1. find proper internal array to support */ AttrNumber origAttnum = variable->varattno; AttrNumber arrayColAttnum = GetInternalArrayAttnum(tupDesc , origAttnum); Datum arrayColDatum = heap_getattr(tup, arrayColAttnum, slot->tts_tupleDescriptor, &isnull); Assert (!isnull && origAttnum != InvalidAttrNumber && arrayColAttnum != InvalidAttrNumber); Assert (arrayColAttnum > origAttnum); /* step 2. process the internal array and get result */ *raw_array_str = TextDatumGetCString(arrayColDatum); token_list = CovertInternalArrayStringToList(pstrdup(*raw_array_str)); return token_list; } static List *CovertInternalArrayStringToList(char *raw_array_str) { List *token_list = NIL; char *token_tmp = NULL; char *token = strtok_s(raw_array_str, "/", &token_tmp); while (token != NULL) { token_list = lappend(token_list, token); token = strtok_s(NULL, "/", &token_tmp); } return token_list; } static AttrNumber GetInternalArrayAttnum(TupleDesc tupDesc, AttrNumber origVarAttno) { NameData arrayAttName; AttrNumber arrayAttNum = InvalidAttrNumber; errno_t rc = 0; rc = memset_s(arrayAttName.data, NAMEDATALEN, 0, NAMEDATALEN); securec_check(rc, "\0", "\0"); rc = sprintf_s(arrayAttName.data, NAMEDATALEN, "array_col_%d", origVarAttno); securec_check_ss(rc, "\0", "\0"); /* find proper internal array to support */ for (int n = 0; n < tupDesc->natts; n++) { Form_pg_attribute attr = TupleDescAttr(tupDesc, n); if (pg_strcasecmp(attr->attname.data, arrayAttName.data) == 0) { arrayAttNum = n + 1; break; } } if (arrayAttNum == InvalidAttrNumber) { elog(ERROR, "Internal array_col for origVarAttno:%d related internal array is not found", origVarAttno); } return arrayAttNum; } /* * fill start with return columns is_leaf, is_cycle */ static const char *GetPseudoArrayTargetValueString(TupleTableSlot *slot, AttrNumber attnum) { const char *raw_array_string = NULL; HeapTuple tup = NULL; TupleDesc tupDesc = slot->tts_tupleDescriptor; Datum datum = 0; bool isnull = true; /* fetch physical tuple */ tup = ExecFetchSlotTuple(slot); datum = heap_getattr(tup, attnum, tupDesc, &isnull); raw_array_string = TextDatumGetCString(datum); Assert (!isnull && raw_array_string != NULL); return raw_array_string; } /* * - brief: A helper function to update tupleSlot with given isleaf/iscycle value */ static void UpdatePseudoReturnColumn(StartWithOpState *state, TupleTableSlot *slot, bool isleaf, bool iscycle) { HeapTuple tup = NULL; TupleDesc tupDesc = slot->tts_tupleDescriptor; ResetResultSlotAttValueArray(state, state->sw_values, state->sw_isnull); Datum *values = state->sw_values; bool *isnull = state->sw_isnull; /* fetch physical tuple */ tup = ExecFetchSlotTuple(slot); heap_deform_tuple(tup, tupDesc, values, isnull); /* check the unset value isleaf, iscycle is not set yet */ AttrNumber attnum2 = state->sw_pseudoCols[SWCOL_ISLEAF]->resno; AttrNumber attnum3 = state->sw_pseudoCols[SWCOL_ISCYCLE]->resno; Assert (values[attnum2 - 1] == 0 && isnull[attnum2 - 1]); Assert (values[attnum3 - 1] == 0 && isnull[attnum3 - 1]); /* set proper value and mark isnull to false */ values[attnum2 - 1] = BoolGetDatum(isleaf); isnull[attnum2 - 1] = false; values[attnum3 - 1] = BoolGetDatum(iscycle); isnull[attnum3 - 1] = false; /* create a local copy tuple and store it to tuplestore, mark shouldFree as 'true ' */ tup = heap_form_tuple(tupDesc, values, isnull); slot = ExecStoreTuple(tup, slot, InvalidBuffer, true); } /* * - brief: check if curernt processing tupleSlot is in cycle * * - return: none, iscycle flag is returned by param "bool *connect_by_iscycle" */ static void CheckIsCycle(StartWithOpState *state, bool *connect_by_iscycle) { List *entry_list = NIL; ListCell *entry = NULL; char *keyArrayStrCopy = NULL; Assert (state != NULL && state->sw_curKeyArrayStr != NULL && connect_by_iscycle != NULL); /* The underlying strok_r() will modify the splited string itself so make a copy */ keyArrayStrCopy = pstrdup(state->sw_curKeyArrayStr); entry_list = CovertInternalArrayStringToList(keyArrayStrCopy); foreach (entry, entry_list) { const char *node1 = (const char *)lfirst(entry); ListCell *rest = lnext(entry); while (rest != NULL) { const char *node2 = (const char *)lfirst(rest); ListCell *next = lnext(rest); if (pg_strcasecmp(node1, node2) == 0) { *connect_by_iscycle = true; return; } rest = next; } } /* Free the tokenized list as we pass int the copy of sw_curKeyArrayStr */ list_free_ext(entry_list); pfree_ext(keyArrayStrCopy); *connect_by_iscycle = false; } /* * - brief: check if curernt processing tupleSlot is leaf * * - return: none, iscycle flag is returned by param "bool *connect_by_isleaf" */ static void CheckIsLeaf(StartWithOpState *state, bool *connect_by_isleaf) { /* PRC optimization is applied */ if (PRCCanSkip(state, SWCOL_ISLEAF)) { return; } Assert (state != NULL && connect_by_isleaf != NULL); const char *curRow = state->sw_curKeyArrayStr; int curRowLen = strlen(curRow); TupleTableSlot *slot = state->sw_workingSlot; const char *scanRow = NULL; tuplestore_rescan(state->sw_backupTable); for (;;) { (void)tuplestore_gettupleslot(state->sw_backupTable, true, false, slot); if (TupIsNull(slot)) { /* * Finish scan all tuples, but we are not found "curRow is not a prefix" * plus "a prefix and string is longer " */ *connect_by_isleaf = true; break; } /* Free the scanRow memory allocate in last loop */ if (scanRow != NULL) { pfree_ext(scanRow); } scanRow = GetPseudoArrayTargetValueString(slot, state->sw_keyAttnum); int scanRowLen = strlen(scanRow); if (strcmp(curRow, scanRow) == 0) { /* continue if curernt row */ continue; } /* * if curRow is the forward prefix of scanRow then set flag connect_by_isleaf false * -- forward prefix * abc is forward prefix of abcde * -- backward predix * cde is backward prefix of abcde * * Attention, we only accept forward prefix in this case. * */ char *pos = pg_strrstr((char *)scanRow, curRow); if (pos != NULL && pos == scanRow && curRowLen < scanRowLen) { *connect_by_isleaf = false; break; } } /* Free text_to_cstring() palloc()-ed string */ if (scanRow != NULL) { pfree_ext(scanRow); } return; } static void CheckIsCycleByRowmarks(StartWithOpState *state, bool* connect_by_iscycle, int row) { if (state->sw_cycle_rowmarks == NULL) { return; } ListCell* mark = NULL; foreach (mark, state->sw_cycle_rowmarks) { int markIndex = lfirst_int(mark); if (row == markIndex) { *connect_by_iscycle = true; } } } /* * - brief: process the recursive-union returned tuples with isleaf/iscycle * pseudo value filled * * - return: none */ static void ProcessPseudoReturnColumns(StartWithOpState *state) { TupleTableSlot *dstSlot = state->ps.ps_ResultTupleSlot; StartWithOp *swplan = (StartWithOp *)state->ps.plan; /* step1. put all tuples itno tmp table */ tuplestore_rescan(state->sw_workingTable); for (;;) { (void)tuplestore_gettupleslot(state->sw_workingTable, true, false, dstSlot); if (TupIsNull(dstSlot)) { break; } tuplestore_puttupleslot(state->sw_backupTable, dstSlot); } /* seek tuplestore offset to begining */ tuplestore_rescan(state->sw_workingTable); tuplestore_rescan(state->sw_backupTable); int rowCount = 0; /* step2. start processing */ for (;;) { /* fetch one tuple from workingTable */ (void)tuplestore_gettupleslot(state->sw_workingTable, true, false, dstSlot); if (TupIsNull(dstSlot)) { state->swop_status = SWOP_FINISH; break; } rowCount++; bool connect_by_isleaf = true; bool connect_by_iscycle = true; if (swplan->keyEntryList != NIL) { /* fetch current key array string for later isleaf/iscycle determination */ state->sw_curKeyArrayStr = GetPseudoArrayTargetValueString(dstSlot, state->sw_keyAttnum); /* Calculate connect_by_iscycle */ CheckIsCycle(state, &connect_by_iscycle); CheckIsCycleByRowmarks(state, &connect_by_iscycle, rowCount); /* Calculate connect_by_isleaf */ CheckIsLeaf(state, &connect_by_isleaf); /* Free per-tuple keyArrayStr */ pfree_ext(state->sw_curKeyArrayStr); } else { /* * !!Note: It is a rare case when gets here because keyEntryList is not * created for current START WITH clause, simplely it is an abnormal ConnectByExpr * specified in "CONNECT BY" clause e.g. "CONNECT BY col1 = abs(x)", we set * isleaf/iscycle both as FALSE value for more failure-tolerance and reasonble */ connect_by_isleaf = false; connect_by_iscycle = false; } /* update result, fill the result tuple */ UpdatePseudoReturnColumn(state, dstSlot, connect_by_isleaf, connect_by_iscycle); tuplestore_puttupleslot(state->sw_resultTable, dstSlot, false); ExecClearTuple(dstSlot); } /* rewind resultTable to let later EXECUTE stage can be processed */ tuplestore_rescan(state->sw_resultTable); } static AttrNumber FetchRUItrTargetEntryResno(StartWithOpState *state) { AttrNumber attnum = InvalidAttrNumber; StartWithOp *plan = (StartWithOp *)state->ps.plan; List *plist = plan->internalEntryList; ListCell *lc = NULL; foreach (lc, plist) { TargetEntry *te = (TargetEntry *)lfirst(lc); if (pg_strcasecmp(te->resname, "RUITR") == 0) { attnum = te->resno; break; } } Assert (attnum != InvalidAttrNumber && attnum <= list_length(plan->plan.targetlist)); return attnum; } /* * functinos to process in RuScan * * Note: these function are mainly invoked in recursive union operator, but we put them in * startwith.cpp as they are special logic for start-with converted RecursiveUnion */ static const char *GetCurrentWorkTableScanPath(TupleTableSlot *srcSlot, AttrNumber pseudo_attnum) { TupleDesc tupDesc = srcSlot->tts_tupleDescriptor; HeapTuple tup = ExecFetchSlotTuple(srcSlot); bool isnull = true; /* if srcSlot is from non-recursive(no pseudo column), then just return */ if (pseudo_attnum > tupDesc->natts) { return NULL; } if (tup != NULL) { return TextDatumGetCString(heap_getattr(tup, pseudo_attnum, tupDesc, &isnull)); } return NULL; } static bytea *GetCurrentSiblingsArrayPath(TupleTableSlot *srcSlot, AttrNumber pseudo_attnum) { TupleDesc tupDesc = srcSlot->tts_tupleDescriptor; HeapTuple tup = ExecFetchSlotTuple(srcSlot); bool isnull = true; /* if srcSlot is from non-recursive(no pseudo column), then just return */ if (pseudo_attnum > tupDesc->natts) { return NULL; } if (tup != NULL) { return DatumGetByteaP(heap_getattr(tup, pseudo_attnum, tupDesc, &isnull)); } return NULL; } static Datum ConvertRuScanArrayAttr(RecursiveUnionState *rustate, TargetEntry *te, TupleTableSlot *srcSlot, bool inrecursing) { /* * !!Reminding: * In the future, we need improve he array_key and array_col into native array * format rather then use bare literal type */ Datum datum = (Datum)0; const char *spliter = "/"; const char *node_path = NULL; const char *value = NULL; if (!IsA(te->expr, Var)) { elog(ERROR, "Invalid TargetEntry expr type %s", nodeToString(te->expr)); } node_path = GetCurrentWorkTableScanPath(srcSlot, te->resno); StartWithOpColumnType type = GetPseudoColumnType(te); switch (type) { case SWCOL_ARRAY_KEY: { value = GetKeyEntryArrayStr(rustate, srcSlot); break; } case SWCOL_ARRAY_SIBLINGS: { bytea *path = GetCurrentSiblingsArrayPath(srcSlot, te->resno); bytea *res = GetSiblingsKeyEntry(rustate, path); return PointerGetDatum(res); break; } case SWCOL_ARRAY_COL: { TupleDesc tupDesc = srcSlot->tts_tupleDescriptor; AttrNumber attno = PseudoResnameGetOriginResno(te->resname); if (!inrecursing && tupDesc->natts < attno) { /* * None-recursive branch * * Handle a case where we need generate arry_col for PRC, we need handle * it properly as in none-recursing stage, PRC is not attached in inner * plan tree's targetlist, thus there is no way to invoke heap_getattr(), * instead we need handle it manually, * * e.g. select sys_connect_by_path(level, 'xx') * * PRC: abbrev for "pseudo return column" level/isleaf/iscycle/rownum */ Datum valueDatum = rustate->swstate->sw_values[attno]; value = pstrdup(DatumGetCString(DirectFunctionCall1(int8out, valueDatum))); } else { /* * Recursive Branch * * for recursive case, PRC it attached to RecursiveUnion's inner plan * targetlist, this we can hanle normally */ value = GetCurrentValue(srcSlot, attno); } break; } default: { /* fall-thru */ } } StringInfo si = makeStringInfo(); if (GetPseudoColumnType(te) == SWCOL_ARRAY_SIBLINGS) { if (node_path == NULL) { appendStringInfo(si, "%s", value); } else { appendStringInfo(si, "%s%s", node_path, value); } } else { if (node_path == NULL) { appendStringInfo(si, "%s%s", spliter, value); } else { appendStringInfo(si, "%s%s%s", node_path, spliter, value); } } /* Drop memory allocation created in current function, free as early as possible */ datum = CStringGetTextDatum(si->data); pfree_ext(si->data); pfree_ext(node_path); pfree_ext(value); return datum; } /* * -------------------------------------------------------------------------------------- * @Brief: Invoked at return point of RecursiveUnion, add the key/array information store * in worktable, so that next round of recursive iteration can append new node * value to previous nodepath * -------------------------------------------------------------------------------------- */ TupleTableSlot *ConvertRuScanOutputSlot(RecursiveUnionState *rustate, TupleTableSlot *scanSlot, bool inrecursing) { if (TupIsNull(scanSlot)) { return scanSlot; } /* Origin RuScan output tuple */ HeapTuple scanTup = ExecFetchSlotTuple(scanSlot); TupleDesc scanTupDesc = scanSlot->tts_tupleDescriptor; /* * New RuScan output tuple will containd RUITR, array_key/col_nn, .... * * RecursiveUnion plan node's "result slot" tupDesc contains RUITR, array_key/col_nn * as they were generated from RecursiveUnion's targetlist */ HeapTuple outTup = NULL; TupleDesc outTupDesc = rustate->ps.ps_ResultTupleSlot->tts_tupleDescriptor; TupleTableSlot *outSlot = rustate->ps.ps_ResultTupleSlot; StartWithOpState *swstate = rustate->swstate; ResetResultSlotAttValueArray(swstate, swstate->sw_values, swstate->sw_isnull); Datum *values = swstate->sw_values; bool *isnull = swstate->sw_isnull; /* Reset scanslot and memorycontext */ (void)MemoryContextReset(rustate->convertContext); MemoryContext oldcxt = MemoryContextSwitchTo(rustate->convertContext); /* * scanTuple does not contain RUITR, array_name, deformed by old desc and use new * values/isbull to accept deformed content */ heap_deform_tuple(scanTup, scanTupDesc, values, isnull); ListCell *lc = NULL; RecursiveUnion *ruplan = (RecursiveUnion *)rustate->ps.plan; foreach (lc, ruplan->internalEntryList) { TargetEntry *te = (TargetEntry *)lfirst(lc); AttrNumber attnum = te->resno; if (pg_strcasecmp(te->resname, "RUITR") == 0) { values[attnum - 1] = rustate->iteration; } else if (pg_strncasecmp(te->resname, "array_key_", strlen("array_key_")) == 0) { values[attnum - 1] = ConvertRuScanArrayAttr(rustate, te, scanSlot, inrecursing); } else { values[attnum - 1] = ConvertRuScanArrayAttr(rustate, te, scanSlot, inrecursing); } /* for converted slot is always filled with valid value(none-empty) */ isnull[attnum - 1] = false; } /* build converted tuple */ outTup = heap_form_tuple(outTupDesc, values, isnull); outSlot = ExecStoreTuple(outTup, outSlot, InvalidBuffer, false); MemoryContextSwitchTo(oldcxt); /* * Start With dfx * * - print key information for each iteration in RuSacn **/ if (u_sess->attr.attr_sql.enable_startwith_debug) { bool _isnull = true; Datum d = heap_getattr(outTup, rustate->swstate->sw_keyAttnum, outTupDesc, &_isnull); elog(LOG, "StartWithDebug: LEVEL:%d array_key_1:%s", rustate->iteration + 1, TextDatumGetCString(d)); } /* sw_tuple_idx++ after one tuple finished. */ rustate->sw_tuple_idx++; return outSlot; } static bytea* bytea_catenate(bytea* t1, bytea* t2) { bytea* result = NULL; int len1, len2, len; char* ptr = NULL; int rc = 0; len1 = VARSIZE_ANY_EXHDR(t1); len2 = VARSIZE_ANY_EXHDR(t2); if (len1 < 0) { len1 = 0; } if (len2 < 0) { len2 = 0; } len = len1 + len2 + VARHDRSZ; result = (bytea*)palloc(len); /* Set size of result string... */ SET_VARSIZE(result, len); /* Fill data field of result string... */ ptr = VARDATA(result); if (len1 > 0) { rc = memcpy_s(ptr, len1, VARDATA_ANY(t1), len1); securec_check(rc, "\0", "\0"); } if (len2 > 0) { rc = memcpy_s(ptr + len1, len2, VARDATA_ANY(t2), len2); securec_check(rc, "\0", "\0"); } return result; } /* * -------------------------------------------------------------------------------------- * @Brief: return order pseudo order siblings key for further order * pseudo column array like this: * [00001][00001][00001][00002] --> every buckets use byea type * [00001] -> tuple ordered position in current level, array index represent level * -------------------------------------------------------------------------------------- */ static bytea *GetSiblingsKeyEntry(RecursiveUnionState *rustate, bytea *old_path) { bytea *result = NULL; bytea *newKey = NULL; const int bucketSize = 4; const int maxEntrySize = 127; const uint64 maxSize = pow(maxEntrySize, 4); /* * We use four bytea size for one bucket sibings key * so we could support max 127^4 tuple to order */ newKey = (bytea *)palloc0(bucketSize + VARHDRSZ); SET_VARSIZE(newKey, bucketSize + VARHDRSZ); char *keyVar = VARDATA_ANY(newKey); if (rustate->sw_tuple_idx >= maxSize) { ereport(ERROR, (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), errmsg("Exceed max numbers of siblings tuples"))); } /* Fill correct order position into buckets */ uint64 tupleIdx = rustate->sw_tuple_idx; const uint64 lastMod = (const uint64)pow(maxEntrySize, 3); keyVar[3] = tupleIdx / lastMod; tupleIdx = tupleIdx % lastMod; const uint64 thirdMod = (const uint64)pow(maxEntrySize, 2); keyVar[2] = tupleIdx / thirdMod; tupleIdx = tupleIdx % thirdMod; keyVar[1] = tupleIdx / maxEntrySize; tupleIdx = tupleIdx % maxEntrySize; keyVar[0] = tupleIdx; /* Reach level 0 if path is NULL then return */ if (old_path == NULL) { elog(LOG, "siblings key is %s current level is %d tuple position is %lu.", DatumGetCString(DirectFunctionCall1(byteaout, PointerGetDatum(newKey))), rustate->iteration + 1, rustate->sw_tuple_idx); return newKey; } /* Otherwise we catenate new siblings key to old key path */ result = bytea_catenate(old_path, newKey); if (u_sess->attr.attr_sql.enable_startwith_debug) { elog(LOG, "siblings key is %s current level is %d tuple position is %lu", DatumGetCString(DirectFunctionCall1(byteaout, PointerGetDatum(result))), rustate->iteration + 1, rustate->sw_tuple_idx); } return result; } /* * -------------------------------------------------------------------------------------- * @Brief: return a key/keys in string format, e.g {1,2}, {1}, the next step is * to construct path like /{1,A}/{2,B}/{3,3} * -------------------------------------------------------------------------------------- */ static const char *GetKeyEntryArrayStr(RecursiveUnionState *state, TupleTableSlot *scanSlot) { const char *result = NULL; List *keyEntryList = ((StartWithOp *)state->swstate->ps.plan)->keyEntryList; /* build key string {key,key,key} */ StringInfoData si; initStringInfo(&si); appendStringInfo(&si, "%s", KEY_START_TAG); { for (int i = 0; i < list_length(keyEntryList); i++) { TargetEntry *te = (TargetEntry *)list_nth(keyEntryList, i); const char *curKeyValue = GetCurrentValue(scanSlot, te->resno); if (curKeyValue == NULL) { /* * ArrayKeyValue could return NULL, for example, base table's column * contains NULL and startWithExpr is set condition "TRUE" */ elog(WARNING, "The internal key column[%d]:%s with NULL value.", te->resno, scanSlot->tts_tupleDescriptor->attrs[i]->attname.data); } if (i == 0) { appendStringInfo(&si, "%s", curKeyValue); } else { appendStringInfo(&si, ",%s", curKeyValue); } } } appendStringInfo(&si, "%s", KEY_CLOSE_TAG); result = pstrdup(si.data); pfree_ext(si.data); return result; } /* * -------------------------------------------------------------------------------------- * @Brief: Helper function to output column value in string format, caller need to free * the return string * -------------------------------------------------------------------------------------- */ static const char *GetCurrentValue(TupleTableSlot *slot, AttrNumber attnum) { TupleDesc tupDesc = slot->tts_tupleDescriptor; HeapTuple tup = ExecFetchSlotTuple(slot); bool isnull = true; Oid atttypid = tupDesc->attrs[attnum - 1]->atttypid; Datum d = heap_getattr(tup, attnum, tupDesc, &isnull); char *value_str = NULL; /* * Return string SWCB_DEFAULT_NULLSTR to represent NULL in internal array_key/col's element, * * Note, slot value int array_key/array_col list is to represent iteration level's rather * than its real value */ if (isnull) { return pstrdup(SWCB_DEFAULT_NULLSTR); } switch (atttypid) { case INT8OID: value_str = DatumGetCString(DirectFunctionCall1(int8out, d)); break; case INT1OID: value_str = DatumGetCString(DirectFunctionCall1(int1out, d)); break; case INT2OID: value_str = DatumGetCString(DirectFunctionCall1(int2out, d)); break; case OIDOID: value_str = DatumGetCString(DirectFunctionCall1(oidout, d)); break; case INT4OID: value_str = DatumGetCString(DirectFunctionCall1(int4out, d)); break; case BOOLOID: value_str = DatumGetCString(DirectFunctionCall1(boolout, d)); break; case CHAROID: value_str = DatumGetCString(DirectFunctionCall1(charout, d)); break; case NAMEOID: value_str = DatumGetCString(DirectFunctionCall1(nameout, d)); break; case FLOAT4OID: value_str = DatumGetCString(DirectFunctionCall1(float4out, d)); break; case FLOAT8OID: value_str = DatumGetCString(DirectFunctionCall1(float8out, d)); break; case ABSTIMEOID: value_str = DatumGetCString(DirectFunctionCall1(abstimeout, d)); break; case RELTIMEOID: value_str = DatumGetCString(DirectFunctionCall1(reltimeout, d)); break; case DATEOID: value_str = DatumGetCString(DirectFunctionCall1(date_out, d)); break; case CASHOID: value_str = DatumGetCString(DirectFunctionCall1(cash_out, d)); break; case TIMEOID: value_str = DatumGetCString(DirectFunctionCall1(time_out, d)); break; case TIMESTAMPOID: value_str = DatumGetCString(DirectFunctionCall1(timestamp_out, d)); break; case TIMESTAMPTZOID: value_str = DatumGetCString(DirectFunctionCall1(timestamptz_out, d)); break; case SMALLDATETIMEOID: value_str = DatumGetCString(DirectFunctionCall1(smalldatetime_out, d)); break; case UUIDOID: value_str = DatumGetCString(DirectFunctionCall1(uuid_out, d)); break; case INTERVALOID: value_str = DatumGetCString(DirectFunctionCall1(interval_out, d)); break; case TIMETZOID: value_str = DatumGetCString(DirectFunctionCall1(timetz_out, d)); break; case INT2VECTOROID: value_str = DatumGetCString(DirectFunctionCall1(int2vectorout, d)); break; case CLOBOID: value_str = DatumGetCString(DirectFunctionCall1(textout, d)); break; case NVARCHAR2OID: value_str = DatumGetCString(DirectFunctionCall1(nvarchar2out, d)); break; case VARCHAROID: value_str = DatumGetCString(DirectFunctionCall1(varcharout, d)); break; case TEXTOID: value_str = DatumGetCString(DirectFunctionCall1(textout, d)); break; case OIDVECTOROID: value_str = DatumGetCString(DirectFunctionCall1(oidvectorout, d)); break; case BPCHAROID: value_str = DatumGetCString(DirectFunctionCall1(bpcharout, d)); break; case RAWOID: value_str = DatumGetCString(DirectFunctionCall1(rawout, d)); break; case BYTEAOID: value_str = DatumGetCString(DirectFunctionCall1(byteaout, d)); break; case NUMERICOID: value_str = DatumGetCString(DirectFunctionCall1(numeric_out, d)); break; default: { elog(ERROR, "unspported type for attname:%s (typid:%u typname:%s)", tupDesc->attrs[attnum - 1]->attname.data, atttypid, get_typename(atttypid)); } } return pstrdup(value_str); } static bool PRCCanSkip(StartWithOpState *state, int prcType) { StartWithOp *swplan = (StartWithOp *)state->ps.plan; Assert (IsA(swplan, StartWithOp)); bool result = false; switch (prcType) { case SWCOL_ISCYCLE: { result = (IsSkipIsCycle(swplan->swExecOptions) != 0); break; } case SWCOL_ISLEAF: { result = (IsSkipIsLeaf(swplan->swExecOptions) != 0); break; } case SWCOL_LEVEL: case SWCOL_ROWNUM: /* * fall-thru level/rownum is light weight, we do not apply PRC-skip * optimization on them */ default: { /* other PRC is always not-skipable */ result = false; } } return result; }