Files
openGauss-server/src/gausskernel/optimizer/plan/planner.cpp

13866 lines
521 KiB
C++

/* -------------------------------------------------------------------------
*
* planner.cpp
* The query optimizer external interface.
*
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/gausskernel/optimizer/plan/planner.cpp
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "knl/knl_variable.h"
#include <limits.h>
#include <math.h>
#include "access/transam.h"
#include "catalog/indexing.h"
#include "catalog/pg_cast.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_constraint.h"
#include "catalog/pgxc_group.h"
#include "catalog/pgxc_node.h"
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "executor/nodeRecursiveunion.h"
#include "gaussdb_version.h"
#include "knl/knl_instance.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/primnodes.h"
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/dynsmp.h"
#include "optimizer/nodegroups.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planmain.h"
#include "optimizer/planmem_walker.h"
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
#include "optimizer/tlist.h"
#include "parser/analyze.h"
#include "optimizer/gtmfree.h"
#include "parser/parsetree.h"
#include "parser/parse_agg.h"
#include "parser/parse_oper.h"
#include "parser/parse_hint.h"
#include "parser/parse_type.h"
#include "rewrite/rewriteManip.h"
#include "securec.h"
#include "utils/rel.h"
#include "utils/rel_gs.h"
#ifdef PGXC
#include "commands/prepare.h"
#include "pgxc/pgxc.h"
#include "optimizer/pgxcplan.h"
#include "optimizer/streamplan.h"
#include "workload/cpwlm.h"
#include "workload/workload.h"
#endif
#include "optimizer/streamplan.h"
#include "utils/relcache.h"
#include "utils/selfuncs.h"
#include "utils/fmgroids.h"
#include "utils/syscache.h"
#include "utils/snapmgr.h"
#include "vecexecutor/vecfunc.h"
#include "optimizer/randomplan.h"
#include "optimizer/optimizerdebug.h"
#include "optimizer/dataskew.h"
#ifdef ENABLE_MULTIPLE_NODES
#include "tsdb/optimizer/planner.h"
#endif
#include "optimizer/stream_remove.h"
#ifndef MIN
#define MIN(A, B) ((B) < (A) ? (B) : (A))
#endif
#ifdef ENABLE_UT
bool estimate_acceleration_cost_for_HDFS(Plan* plan, const char* relname);
#else
static bool estimate_acceleration_cost_for_HDFS(Plan* plan, const char* relname);
#endif
static int g_agglist[] = {AGG_HASHED, AGG_SORTED};
#define TWOLEVELWINFUNSELECTIVITY (1.0 / 3.0)
const char* ESTIMATION_ITEM = "EstimationItem";
/* From experiment, we assume 2.5 times dn number of distinct value can give all dn work to do */
#define DN_MULTIPLIER_FOR_SATURATION 2.5
#define PLAN_HAS_DELTA(plan) \
((IsA((plan), CStoreScan) && HDFS_STORE == ((CStoreScan*)(plan))->relStoreLocation) \
||(IsA((plan), CStoreIndexScan) && HDFS_STORE == ((CStoreIndexScan*)(plan)->relStoreLocation) \
|| IsA((plan), DfsScan) || IsA((plan), DfsIndexScan))
/* For performance reasons, memory context will be dropped only when the totalSpace larger than 1MB. */
#define MEMORY_CONTEXT_DELETE_THRESHOLD (1024 * 1024)
#define IS_NEED_FREE_MEMORY_CONTEXT(MemContext) \
((MemContext) != NULL && ((AllocSetContext*)(MemContext))->totalSpace > MEMORY_CONTEXT_DELETE_THRESHOLD)
const static Oid VectorEngineUnsupportType[] = {
POINTOID,
LSEGOID,
BOXOID,
LINEOID,
CIRCLEOID,
POLYGONOID,
PATHOID
};
extern PGXCNodeAllHandles* connect_compute_pool(int srvtype);
extern uint64 get_datasize(Plan* plan, int srvtype, int* filenum);
extern RangeTblEntry* make_dummy_remote_rte(char* relname, Alias* alias);
extern ForeignOptions* setForeignOptions(Oid relid);
extern List* reassign_nodelist(RangeTblEntry* rte, List* ori_node_list);
extern Node* preprocess_expression(PlannerInfo* root, Node* expr, int kind);
static Plan* inheritance_planner(PlannerInfo* root);
static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction);
static void preprocess_rowmarks(PlannerInfo* root);
static void estimate_limit_offset_count(PlannerInfo* root, int64* offset_est, int64* count_est);
static double preprocess_limit(PlannerInfo* root, double tuple_fraction, int64* offset_est, int64* count_est);
static bool grouping_is_can_hash(Query* parse, AggClauseCosts* agg_costs);
static Size compute_hash_entry_size(bool vectorized, Path* cheapest_path, int path_width, AggClauseCosts* agg_costs);
static bool choose_hashed_grouping(PlannerInfo* root, double tuple_fraction, double limit_tuples, int path_width,
Path* cheapest_path, Path* sorted_path, const double* dNumGroups, AggClauseCosts* agg_costs, Size* hash_entry_size);
static void compute_distinct_sorted_path_cost(Path* sorted_p, List* sorted_pathkeys, Query* parse, PlannerInfo* root,
int numDistinctCols, Cost sorted_startup_cost, Cost sorted_total_cost, double path_rows,
Distribution* sorted_distribution, int path_width, double dNumDistinctRows, double limit_tuples);
static bool choose_hashed_distinct(PlannerInfo* root, double tuple_fraction, double limit_tuples, double path_rows,
int path_width, Cost cheapest_startup_cost, Cost cheapest_total_cost, Distribution* cheapest_distribution,
Cost sorted_startup_cost, Cost sorted_total_cost, Distribution* sorted_distribution, List* sorted_pathkeys,
double dNumDistinctRows, Size hashentrysize);
static List* make_subplanTargetList(PlannerInfo* root, List* tlist, AttrNumber** groupColIdx, bool* need_tlist_eval);
static void locate_grouping_columns(PlannerInfo* root, List* tlist, List* sub_tlist, AttrNumber* groupColIdx);
static List* postprocess_setop_tlist(List* new_tlist, List* orig_tlist);
static List* make_windowInputTargetList(PlannerInfo* root, List* tlist, List* activeWindows);
static void get_column_info_for_window(PlannerInfo* root, WindowClause* wc, List* tlist, int numSortCols,
AttrNumber* sortColIdx, int* partNumCols, AttrNumber** partColIdx, Oid** partOperators, int* ordNumCols,
AttrNumber** ordColIdx, Oid** ordOperators);
static List* add_groupingIdExpr_to_tlist(List* tlist);
static List* get_group_expr(List* sortrefList, List* tlist);
static void build_grouping_itst_keys(PlannerInfo* root, List* active_windows);
static Plan* build_grouping_chain(PlannerInfo* root, Query* parse, List** tlist, bool need_sort_for_grouping,
List* rollup_groupclauses, List* rollup_lists, AttrNumber* groupColIdx, AggClauseCosts* agg_costs, long numGroups,
Plan* result_plan, WindowLists* wflists, bool need_stream);
static bool group_member(List* list, Expr* node);
static Plan* build_groupingsets_plan(PlannerInfo* root, Query* parse, List** tlist, bool need_sort_for_grouping,
List* rollup_groupclauses, List* rollup_lists, AttrNumber** groupColIdx, AggClauseCosts* agg_costs, long numGroups,
Plan* result_plan, WindowLists* wflists, bool* need_hash, List* collectiveGroupExpr);
static bool vector_engine_preprocess_walker(Node* node, void* rtables);
static void init_optimizer_context(PlannerGlobal* glob);
static void deinit_optimizer_context(PlannerGlobal* glob);
static void check_index_column();
static bool check_sort_for_upsert(PlannerInfo* root);
#ifdef PGXC
static void separate_rowmarks(PlannerInfo* root);
#endif
#ifdef STREAMPLAN
typedef struct {
Node* expr;
double multiple;
} ExprMultipleData;
/* Passthrough data for standard_qp_callback */
typedef struct {
List* tlist; /* preprocessed query targetlist */
List* activeWindows; /* active windows, if any */
List* groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
typedef enum path_key { windows_func_pathkey = 0, distinct_pathkey, sort_pathkey } path_key;
typedef struct {
List* queries;
List* vars;
} ImplicitCastVarContext;
THR_LOCAL List* g_index_vars;
static SAggMethod g_hashagg_option_list[] = {DN_REDISTRIBUTE_AGG, DN_AGG_REDISTRIBUTE_AGG, DN_AGG_CN_AGG};
#define ALL_HASHAGG_OPTION 3
#define HASHAGG_OPTION_WITH_STREAM 2
typedef struct {
List* rqs;
bool include_all_plans;
bool has_modify_table;
bool under_mergeinto;
int elevel;
} FindRQContext;
/*
* This struct is used to find:
* 1) How many RemoteQuery(VecRemoteQuery) in a plan?
* 2) Does this plan contain write operations?
* 3) How many DNs will be involved in?
*
* This struct is only used to check if we should allow a query to be executed in GTM-Free mode.
* In GTM-Free mode:
* if 1) the query need to split into multiple queries and 2) the query need to write to the database:
* Report error
* if the query needs more than one DN to be involved in:
* Without multinode hint:
* if application_type is:
* not_perfect_sharding_type(Default): allow the query to be executed, no warning/error
* perfect_sharding_type: report ERROR
* With multinode hint:
* allow the query to continue, report no warnings or errors.
*/
typedef struct
{
int remote_query_count;
bool has_modify_table;
/*
* All nodes involved in the query. It stores IDs(int type) of DNs, such as 1,2,3...
* The same name exists in ExecNodes.
*/
List *nodeList;
} FindNodesContext;
static bool needs_two_level_groupagg(PlannerInfo* root, Plan* plan, Node* distinct_node, List* distributed_key,
bool* need_redistribute, bool* need_local_redistribute);
static Plan* mark_agg_stream(PlannerInfo* root, List* tlist, Plan* plan, List* group_or_distinct_cls,
AggOrientation agg_orientation, bool* has_second_agg_sort);
static Plan* mark_top_agg(
PlannerInfo* root, List* tlist, Plan* agg_plan, Plan* sub_plan, AggOrientation agg_orientation);
static Plan* mark_group_stream(PlannerInfo* root, List* tlist, Plan* result_plan);
static Plan* mark_distinct_stream(
PlannerInfo* root, List* tlist, Plan* plan, List* groupcls, Index query_level, List* current_pathkeys);
static List* get_optimal_distribute_key(PlannerInfo* root, List* groupClause, Plan* plan, double* multiple);
static bool vector_engine_expression_walker(Node* node, DenseRank_context* context);
static bool vector_engine_walker(Plan* result_plan, bool check_rescan);
static Plan* fallback_plan(Plan* result_plan);
static Plan* vectorize_plan(Plan* result_plan, bool ignore_remotequery);
static Plan* build_vector_plan(Plan* plan);
static Plan* mark_windowagg_stream(
PlannerInfo* root, Plan* plan, List* tlist, WindowClause* wc, List* pathkeys, WindowLists* wflists);
static uint32 get_hashagg_skew(AggSkewInfo* skew_info, List* distribute_keys);
static SAggMethod get_optimal_hashagg(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, List* distributed_key, List* target_list, double final_groups, double multiple,
List* distribute_key_less_skew, double multiple_less_skew, AggOrientation agg_orientation, Cost* final_cost,
Distribution** final_distribution, bool need_stream, AggSkewInfo* skew_info,
uint32 aggmethod_filter = ALLOW_ALL_AGG);
static Plan* generate_hashagg_plan(PlannerInfo* root, Plan* plan, List* final_list, AggClauseCosts* agg_costs,
int numGroupCols, const double* numGroups, WindowLists* wflists, AttrNumber* groupColIdx, Oid* groupColOps,
bool* needs_stream, Size hash_entry_size, AggOrientation agg_orientation, RelOptInfo* rel_info);
static Plan* get_count_distinct_partial_plan(PlannerInfo* root, Plan* result_plan, List** final_tlist,
Node* distinct_node, AggClauseCosts agg_costs, const double* numGroups, WindowLists* wflists,
AttrNumber* groupColIdx, bool* needs_stream, Size hash_entry_size, RelOptInfo* rel_info);
static Node* get_multiple_from_expr(
PlannerInfo* root, Node* expr, double rows, double* skew_multiple, double* bias_multiple);
static void set_root_matching_key(PlannerInfo* root, List* targetlist);
static List* add_groupId_to_groupExpr(List* query_group, List* tlist);
static Path* cost_agg_convert_to_path(Plan* plan);
static StreamPath* cost_agg_do_redistribute(Path* subpath, List* distributed_key, double multiple,
Distribution* target_distribution, int width, bool vec_output, int dop, bool needs_stream);
static StreamPath* cost_agg_do_gather(Path* subpath, int width, bool vec_output);
static Path* cost_agg_do_agg(Path* subpath, PlannerInfo* root, AggStrategy agg_strategy, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, Size hashentrysize, QualCost total_cost, int width, bool vec_output, int dop);
static void get_hashagg_gather_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, QualCost total_cost, Size hashentrysize,
AggStrategy agg_strategy, bool needs_stream, Path* result_path);
static void get_redist_hashagg_gather_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, List* distributed_key_less_skew, double multiple_less_skew,
Distribution* target_distribution, QualCost total_cost, Size hashentrysize, AggStrategy agg_strategy,
bool needs_stream, Path* result_path);
static void get_redist_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts, int numGroupCols,
double numGroups, double final_groups, List* distributed_key, double multiple, Distribution* target_distribution,
QualCost total_cost, Size hashentrysize, bool needs_stream, Path* result_path);
static void get_hashagg_redist_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, List* distributed_key, double multiple,
Distribution* target_distribution, QualCost total_cost, Size hashentrysize, bool needs_stream, Path* result_path);
static void get_redist_hashagg_redist_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, List* distributed_key_less_skew, double multiple_less_skew,
Distribution* target_distribution, List* distributed_key, double multiple, QualCost total_cost, Size hashentrysize,
bool needs_stream, Path* result_path);
static void get_count_distinct_param(PlannerInfo* root, Plan** result_plan, List* tlist, Node* distinct_node,
int* numGrpColsNew, AttrNumber* groupColIdx, AttrNumber** groupColIdx_new, Oid** groupingOps_new, List** orig_tlist,
List** duplicate_tlist, List** newtlist);
static List* get_count_distinct_newtlist(PlannerInfo* root, List* tlist, Node* distinct_node, List** orig_tlist,
List** duplicate_tlist, Oid* distinct_eq_op);
static void make_dummy_targetlist(Plan* plan);
static void passdown_itst_keys_to_subroot(PlannerInfo* root, ItstDisKey* diskeys);
static List* add_itst_node_to_list(List* result_list, List* target_list, Expr* node, bool is_matching_key);
static void copy_path_costsize(Path* dest, Path* src);
static bool walk_plan(Plan* plantree, PlannerInfo* root);
static bool walk_normal_plan(Plan* plantree, PlannerInfo* root);
static void walk_set_plan(Plan* plantree, PlannerInfo* root);
static Plan* insert_gather_node(Plan* child, PlannerInfo* root);
static bool has_dfs_node(Plan* plantree, PlannerGlobal* glob);
static Plan* try_accelerate_plan(Plan* plantree, PlannerInfo* root, PlannerGlobal* glob);
static Plan* try_deparse_agg(Plan* plan, PlannerInfo* root, PlannerGlobal* glob);
static bool dfs_node_exists(Plan* plan);
static bool is_dfs_node(Plan* plan);
static void add_metadata(Plan* plan, PlannerInfo* root);
static bool precheck_before_accelerate();
static bool is_pushdown_node(Plan *plan);
static bool estimate_acceleration_cost(Plan *plan);
#ifdef ENABLE_MULTIPLE_NODES
static bool walk_plan_for_coop_analyze(Plan *plan, PlannerInfo *root);
static void walk_set_plan_for_coop_analyze(Plan *plan, PlannerInfo *root);
static bool walk_normal_plan_for_coop_analyze(Plan *plan, PlannerInfo *root);
static bool find_right_agg(Plan *plan);
static bool has_pgfdw_rel(PlannerInfo* root);
extern Plan *deparse_agg_node(Plan *agg, PlannerInfo *root);
#endif
static void find_remotequery(Plan *plan, PlannerInfo *root);
static void gtm_process_top_node(Plan *plan, void *context);
void GetRemoteQuery(PlannedStmt *plan, const char *queryString);
void GetRemoteQueryWalker(Plan* plan, void* context, const char *queryString);
void PlanTreeWalker(Plan* plan, void (*walker)(Plan*, void*, const char*), void*, const char *queryString);
static void find_implicit_cast_var(Query *query);
static bool implicit_cast_var_walker(Node *node, void *context);
static void save_implicit_cast_var(Node *node, void *context);
#endif
void preprocess_const_params(PlannerInfo* root, Node* jtnode);
static Node* preprocess_const_params_worker(PlannerInfo* root, Node* expr, int kind);
/*****************************************************************************
*
* Query optimizer entry point
*
* To support loadable plugins that monitor or modify planner behavior,
* we provide a hook variable that lets a plugin get control before and
* after the standard planning process. The plugin would normally
* call standard_planner().
*
* Note to plugin authors: standard_planner() scribbles on its Query input,
* so you'd better copy that data structure if you want to plan more than once.
*
*****************************************************************************/
PlannedStmt* planner(Query* parse, int cursorOptions, ParamListInfo boundParams)
{
PlannedStmt* result = NULL;
instr_time starttime;
double totaltime = 0;
INSTR_TIME_SET_CURRENT(starttime);
#ifdef PGXC
/*
* streaming engine hook for agg rewrite.
*/
if (t_thrd.streaming_cxt.streaming_planner_hook)
(*(planner_hook_type) t_thrd.streaming_cxt.streaming_planner_hook)\
(parse, cursorOptions, boundParams);
/*
* A Coordinator receiving a query from another Coordinator
* is not allowed to go into PGXC planner.
*/
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && !IsConnFromCoord())
result = pgxc_planner(parse, cursorOptions, boundParams);
else
#endif
result = standard_planner(parse, cursorOptions, boundParams);
totaltime += elapsed_time(&starttime);
result->plannertime = totaltime;
if (u_sess->attr.attr_common.max_datanode_for_plan > 0 && IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
GetRemoteQuery(result, NULL);
}
return result;
}
static bool queryIsReadOnly(Query* query)
{
if (IsA(query, Query)) {
switch (query->commandType) {
case CMD_SELECT: {
/* SELECT FOR [KEY] UPDATE/SHARE */
if (query->rowMarks != NIL)
return false;
/* data-modifying CTE */
if (query->hasModifyingCTE)
return false;
}
return true;
case CMD_UTILITY:
case CMD_UPDATE:
case CMD_INSERT:
case CMD_DELETE:
case CMD_MERGE:
return false;
default: {
ereport(ERROR,
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized commandType: %d", (int)query->commandType)));
} break;
}
}
return false;
}
/*
* @Description: fill bucketmap info into planstmt.
*
* @param[IN] result: plan info
* @param[IN] node_group_info_context: bucketmap info
* @return: void
*/
static void FillPlanBucketmap(PlannedStmt *result,
NodeGroupInfoContext *nodeGroupInfoContext)
{
#ifdef ENABLE_MULTIPLE_NODES
/* bucketmap is not needed, just return. */
if (!IsBucketmapNeeded(result)) {
result->num_bucketmaps = 0;
return;
}
#endif
result->num_bucketmaps = nodeGroupInfoContext->num_bucketmaps;
for (int i = 0; i < MAX_SPECIAL_BUCKETMAP_NUM; i++) {
result->bucketMap[i] = nodeGroupInfoContext->bucketMap[i];
}
pfree_ext(nodeGroupInfoContext);
if (IS_PGXC_COORDINATOR && result->num_bucketmaps == 0) {
result->bucketMap[0] = GetGlobalStreamBucketMap(result);
if (result->bucketMap[0] != NULL) {
result->num_bucketmaps = 1;
}
}
}
PlannedStmt* standard_planner(Query* parse, int cursorOptions, ParamListInfo boundParams)
{
PlannedStmt* result = NULL;
PlannerGlobal* glob = NULL;
double tuple_fraction;
PlannerInfo* root = NULL;
Plan* top_plan = NULL;
ListCell* lp = NULL;
ListCell* lr = NULL;
int max_mem = 0;
int available_mem = 0;
int esti_op_mem = 0;
bool use_query_mem = false;
bool use_tenant = false;
List* parse_hint_warning = NIL;
/*
* Dynamic smp
*/
if (IsDynamicSmpEnabled()) {
InitDynamicSmp();
int hashTableCount = 0;
if (isIntergratedMachine) {
GetHashTableCount(parse, parse->cteList, &hashTableCount);
}
ChooseStartQueryDop(hashTableCount);
}
if (enable_check_implicit_cast())
find_implicit_cast_var(parse);
#ifndef ENABLE_MULTIPLE_NODES
if (IS_STREAM) {
shipping_context context;
stream_walker_context_init(&context);
(void)stream_walker((Node*)parse, (void*)(&context));
disable_unshipped_log(parse, &context);
}
#endif
/* Initilizing the work mem used by optimizer */
if (IS_STREAM_PLAN) {
dywlm_client_get_memory_info(&max_mem, &available_mem, &use_tenant);
if (max_mem != 0) {
use_query_mem = true;
esti_op_mem = (double)available_mem / 2.0;
u_sess->opt_cxt.op_work_mem = Min(esti_op_mem, OPT_MAX_OP_MEM);
AssertEreport(u_sess->opt_cxt.op_work_mem > 0,
MOD_OPT,
"invalid operator work memory when initilizing the work memory used by optimizer");
} else {
u_sess->opt_cxt.op_work_mem = u_sess->attr.attr_memory.work_mem;
esti_op_mem = u_sess->opt_cxt.op_work_mem;
}
} else {
u_sess->opt_cxt.op_work_mem = u_sess->attr.attr_memory.work_mem;
esti_op_mem = u_sess->opt_cxt.op_work_mem;
}
/* Cursor options may come from caller or from DECLARE CURSOR stmt */
if (parse->utilityStmt && IsA(parse->utilityStmt, DeclareCursorStmt))
cursorOptions = (uint32)cursorOptions | (uint32)(((DeclareCursorStmt*)parse->utilityStmt)->options);
/*
* Set up global state for this planner invocation. This data is needed
* across all levels of sub-Query that might exist in the given command,
* so we keep it in a separate struct that's linked to by each per-Query
* PlannerInfo.
*/
glob = makeNode(PlannerGlobal);
glob->boundParams = boundParams;
glob->subplans = NIL;
glob->subroots = NIL;
glob->rewindPlanIDs = NULL;
glob->finalrtable = NIL;
glob->finalrowmarks = NIL;
glob->resultRelations = NIL;
glob->relationOids = NIL;
glob->invalItems = NIL;
glob->nParamExec = 0;
glob->lastPHId = 0;
glob->lastRowMarkId = 0;
glob->transientPlan = false;
glob->dependsOnRole = false;
glob->insideRecursion = false;
glob->bloomfilter.bloomfilter_index = -1;
glob->bloomfilter.add_index = true;
glob->estiopmem = esti_op_mem;
if (IS_STREAM_PLAN)
glob->vectorized = !vector_engine_preprocess_walker((Node*)parse, parse->rtable);
else
glob->vectorized = false;
/* Assume work mem is at least 1/4 of query mem */
glob->minopmem = Min(available_mem / 4, OPT_MAX_OP_MEM);
parse_hint_warning = retrieve_query_hint_warning((Node*)parse);
/*
* Set up default exec_nodes, we fist build re-cursively iterate parse->rtable
* to check if we are refering base relations from different node group(error-out),
* then fetch 1st RTE entry to build default ExecNodes and put the reference to
* the top-most PlannerInfo->glob
*/
bool ngbk_is_multiple_nodegroup_scenario = false;
int ngbk_different_nodegroup_count = 1;
Distribution* ngbk_in_redistribution_group_distribution = NULL;
Distribution* ngbk_compute_permission_group_distribution = NULL;
Distribution* ngbk_query_union_set_group_distribution = NULL;
Distribution* ngbk_single_node_distribution = NULL;
ng_backup_nodegroup_options(&ngbk_is_multiple_nodegroup_scenario,
&ngbk_different_nodegroup_count,
&ngbk_in_redistribution_group_distribution,
&ngbk_compute_permission_group_distribution,
&ngbk_query_union_set_group_distribution,
&ngbk_single_node_distribution);
ng_init_nodegroup_optimizer(parse);
/* Must assign value after call ng_init_nodegroup_optimizer(). */
u_sess->opt_cxt.is_dngather_support =
u_sess->opt_cxt.is_dngather_support && ng_get_single_node_distribution() != NULL;
/* Determine what fraction of the plan is likely to be scanned */
if ((uint32)cursorOptions & CURSOR_OPT_FAST_PLAN) {
/*
* We have no real idea how many tuples the user will ultimately FETCH
* from a cursor, but it is often the case that he doesn't want 'em
* all, or would prefer a fast-start plan anyway so that he can
* process some of the tuples sooner. Use a GUC parameter to decide
* what fraction to optimize for.
*/
tuple_fraction = u_sess->attr.attr_sql.cursor_tuple_fraction;
/*
* We document cursor_tuple_fraction as simply being a fraction, which
* means the edge cases 0 and 1 have to be treated specially here. We
* convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
*/
if (tuple_fraction >= 1.0) {
tuple_fraction = 0.0;
} else if (tuple_fraction <= 0.0) {
tuple_fraction = 1e-10;
}
} else {
/* Default assumption is we need all the tuples */
tuple_fraction = 0.0;
}
/* reset u_sess->analyze_cxt.need_autoanalyze */
u_sess->analyze_cxt.need_autoanalyze = false;
MemoryContext old_context = CurrentMemoryContext;
init_optimizer_context(glob);
old_context = MemoryContextSwitchTo(glob->plannerContext->plannerMemContext);
/* primary planning entry point (may recurse for subqueries) */
top_plan = subquery_planner(glob, parse, NULL, false, tuple_fraction, &root);
MemoryContextSwitchTo(old_context);
/* Are there OBS/HDFS ForeignScan node(s) in the plan tree? */
u_sess->opt_cxt.srvtype = T_INVALID;
u_sess->opt_cxt.has_obsrel = has_dfs_node(top_plan, glob);
/*
* try to accelerate the query for HDFS/OBS foreign table by pushing
* scan/agg node down to the compute pool.
*/
if (u_sess->opt_cxt.has_obsrel) {
AssertEreport(u_sess->opt_cxt.srvtype != T_INVALID,
MOD_OPT,
"invalid server type when push scan/agg node down to the compute pool to the accelerate the query.");
top_plan = try_accelerate_plan(top_plan, root, glob);
}
/*
* If creating a plan for a scrollable cursor, make sure it can run
* backwards on demand. Add a Material node at the top at need.
*/
if ((unsigned int)cursorOptions & CURSOR_OPT_SCROLL) {
if (!ExecSupportsBackwardScan(top_plan))
top_plan = materialize_finished_plan(top_plan);
}
/* final cleanup of the plan */
AssertEreport(glob->finalrtable == NIL,
MOD_OPT,
"finalrtable is not empty when finish creating a plan for a scrollable cursor");
AssertEreport(glob->finalrowmarks == NIL,
MOD_OPT,
"finalrowmarks is not empty when finish creating a plan for a scrollable cursor");
AssertEreport(glob->resultRelations == NIL,
MOD_OPT,
"resultRelations is not empty when finish creating a plan for a scrollable cursor");
#ifdef STREAMPLAN
if ((IS_STREAM_PLAN || (IS_PGXC_DATANODE && (!IS_STREAM || IS_STREAM_DATANODE))) && root->query_level == 1) {
/* remote query and windowagg do not support vectorize rescan, so fallback to row plan */
top_plan = try_vectorize_plan(top_plan, parse, cursorOptions & CURSOR_OPT_HOLD);
}
#endif
top_plan = set_plan_references(root, top_plan);
delete_redundant_streams_of_remotequery((RemoteQuery *)top_plan);
/*
* just for cooperation analysis on client cluster,
* try deparse agg node to remote sql in ForeignScan node.
* NOTE: call try_deparse_agg() must be after set_plan_references().
*/
top_plan = try_deparse_agg(top_plan, root, glob);
/*
* just for cooperation analysis on source data cluster,
* reassign dn list scaned of RemoteQuery node for the request from client cluster.
*/
find_remotequery(top_plan, root);
if (IS_PGXC_COORDINATOR && root->query_level == 1) {
bool materialize = false;
bool sort_to_store = false;
/*
* if is with hold cursor, remotequery tuplestore should be used,
* and because we do not rescan sortstore in execRemoteQueryResacn,
* tuples in sortstore should be stored into tuplestore, to avoid missing tuples.
*/
if (cursorOptions & CURSOR_OPT_HOLD) {
materialize = true;
sort_to_store = true;
}
materialize_remote_query(top_plan, &materialize, sort_to_store);
}
/*
* Handle subplan situation.
* We have to put this under set_plan_references() function,
* otherwise we will mis-identify the subplan.
*/
if (u_sess->opt_cxt.query_dop > 1) {
List* subplan_list = NIL;
(void)has_subplan(top_plan, NULL, NULL, true, &subplan_list, true);
}
confirm_parallel_info(top_plan, 1);
#ifdef STREAMPLAN
/*
* Mark plan node id and parent node id for all the plan nodes.
*/
int parent_node_id = INITIAL_PARENT_NODE_ID; /* beginning with INITIAL_PARENT_NODE_ID */
int plan_node_id = INITIAL_PLAN_NODE_ID; /* beginning with INITIAL_PLAN_NODE_ID */
int num_streams = 0;
int num_plannodes = 0;
int total_num_streams = 0;
/* mark gather count for query */
int gather_count = 0;
/*
* When enable_stream_operator = off;
* The current mechanism has such a problem that a CN will split complex SQL into multiple simple SQL and send it to
* the DN for execution. In order to ensure data consistency, the DN needs to use the same Snapshot for visibility
* judgment of such SQL. Therefore, the CN side sends such SQL identifier to the DN. Use
* PlannedStmt->Max_push_sql_num records the maximum number of SQL statements split by a SQL statement.
*/
int max_push_sql_num = 0;
/*
* the array to store parent plan node id of each subplan, start from 1.
* First item is for parent id when traverse the tree.
*/
int* subplan_ids = (int*)palloc0(sizeof(int) * (list_length(glob->subplans) + 1));
List* init_plan = NIL;
int i = 1;
NodeGroupInfoContext* node_group_info_context = (NodeGroupInfoContext*)palloc0(sizeof(NodeGroupInfoContext));
/*
* MPP with-recursive support
*
* Vectorize the each plan nodes under RecursiveUnion
*/
Assert(list_length(glob->subplans) == list_length(glob->subroots));
forboth(lp, glob->subplans, lr, glob->subroots)
{
Plan* subplan = (Plan*)lfirst(lp);
PlannerInfo* subroot = (PlannerInfo*)lfirst(lr);
/* Vectorize the subplan with RecursiveUnion plan node */
if (STREAM_RECURSIVECTE_SUPPORTED && IsA(subplan, RecursiveUnion)) {
subplan = try_vectorize_plan(subplan, subroot->parse, true);
lfirst(lp) = subplan;
}
}
/* Assign plan node id for each plan node */
#ifdef ENABLE_MULTIPLE_NODES
if (IS_PGXC_COORDINATOR && root->query_level == 1) {
#else
if (root->query_level == 1) {
#endif
finalize_node_id(top_plan,
&plan_node_id,
&parent_node_id,
&num_streams,
&num_plannodes,
&total_num_streams,
&max_push_sql_num,
&gather_count,
glob->subplans,
glob->subroots,
&init_plan,
subplan_ids,
true,
false,
false,
queryIsReadOnly(parse),
node_group_info_context);
}
#endif
/* ... and the subplans (both regular subplans and initplans) */
AssertEreport(list_length(glob->subplans) == list_length(glob->subroots),
MOD_OPT,
"The length of subplans is not equal to that of subroots when standardize planner");
forboth(lp, glob->subplans, lr, glob->subroots)
{
Plan* subplan = (Plan*)lfirst(lp);
PlannerInfo* subroot = (PlannerInfo*)lfirst(lr);
#ifdef STREAMPLAN
/* We set reference of some plans in finalize_node_id. For undone plan, set plan references */
if (subplan_ids[i] == 0 || IsA(subplan, RecursiveUnion)) {
if (STREAM_RECURSIVECTE_SUPPORTED && IsA(subplan, RecursiveUnion)) {
RecursiveRefContext context;
errno_t rc = EOK;
rc = memset_s(&context, sizeof(RecursiveRefContext), 0, sizeof(RecursiveRefContext));
securec_check(rc, "\0", "\0");
context.join_type = T_Invalid;
context.ru_plan = (RecursiveUnion*)subplan;
context.control_plan = subplan;
context.nested_stream_depth = 0;
context.set_control_plan_nodeid = false;
context.initplans = init_plan;
context.subplans = glob->subplans;
/* EntryPoint for iterating the underlying plan node */
set_recursive_cteplan_ref(subplan, &context);
} else {
subplan = try_vectorize_plan(subplan, subroot->parse, true);
lfirst(lp) = set_plan_references(subroot, subplan);
}
/*
* When enable_stream_operator = off, Subquery SQL is not processed by finalize_node_id.
* In this case we default each subquery to a SQL statement pushed down to the DN.
* Here we may misjudge the subquery executed only on the CN,
* but in order to maintain The independence of the set_plan_references function,
* there is no further judgment on such subqueries, and it is considered that the sub-query is issued to the
* DN. This operation does not affect the correctness.
*/
max_push_sql_num++;
}
#endif
i++;
}
/* Juse copy these fields only when the memory context total size meets the dropping condition. */
if (IS_NEED_FREE_MEMORY_CONTEXT(glob->plannerContext->plannerMemContext)) {
top_plan = (Plan*)copyObject(top_plan);
glob->finalrtable = (List*)copyObject(glob->finalrtable);
glob->resultRelations = (List*)copyObject(glob->resultRelations);
glob->subplans = (List*)copyObject(glob->subplans);
glob->rewindPlanIDs = bms_copy(glob->rewindPlanIDs);
glob->finalrowmarks = (List*)copyObject(glob->finalrowmarks);
glob->relationOids = (List*)copyObject(glob->relationOids);
glob->invalItems = (List*)copyObject(glob->invalItems);
init_plan = (List*)copyObject(init_plan);
}
glob->hint_warning = list_concat(parse_hint_warning, (List*)copyObject(glob->hint_warning));
/* build the PlannedStmt result */
result = makeNode(PlannedStmt);
result->commandType = parse->commandType;
result->queryId = parse->queryId;
result->uniqueSQLId = parse->uniqueSQLId;
result->hasReturning = (parse->returningList != NIL);
result->hasModifyingCTE = parse->hasModifyingCTE;
result->canSetTag = parse->canSetTag;
result->transientPlan = glob->transientPlan;
result->dependsOnRole = glob->dependsOnRole;
result->planTree = top_plan;
result->rtable = glob->finalrtable;
result->resultRelations = glob->resultRelations;
result->utilityStmt = parse->utilityStmt;
result->subplans = glob->subplans;
result->rewindPlanIDs = glob->rewindPlanIDs;
result->rowMarks = glob->finalrowmarks;
result->relationOids = glob->relationOids;
result->invalItems = glob->invalItems;
result->nParamExec = glob->nParamExec;
result->noanalyze_rellist = (List*)copyObject(t_thrd.postgres_cxt.g_NoAnalyzeRelNameList);
if (IS_PGXC_COORDINATOR &&
(t_thrd.proc->workingVersionNum < 92097 || total_num_streams > 0)) {
result->nodesDefinition = get_all_datanodes_def();
}
result->num_nodes = u_sess->pgxc_cxt.NumDataNodes;
result->num_streams = total_num_streams;
result->max_push_sql_num = max_push_sql_num;
result->gather_count = gather_count;
result->num_plannodes = num_plannodes;
FillPlanBucketmap(result, node_group_info_context);
result->query_string = NULL;
result->MaxBloomFilterNum = root->glob->bloomfilter.bloomfilter_index + 1;
/* record which suplan belongs to which thread */
#ifdef ENABLE_MULTIPLE_NODES
if (IS_STREAM_PLAN) {
#else
if (result->num_streams > 0) {
#endif
for (i = 1; i <= list_length(result->subplans); i++)
result->subplan_ids = lappend_int(result->subplan_ids, subplan_ids[i]);
result->initPlan = init_plan;
}
pfree_ext(subplan_ids);
/* dynamic query dop main entry */
if (IsDynamicSmpEnabled()) {
/* the main plan */
OptimizePlanDop(result);
}
/* Query mem calculation and control main entry */
if (IS_STREAM_PLAN && use_query_mem) {
result->assigned_query_mem[1] = max_mem;
result->assigned_query_mem[0] = available_mem;
ereport(DEBUG2,
(errmodule(MOD_MEM),
errmsg("[standard_planner]Passing in max mem %d and available mem %d", max_mem, available_mem)));
CalculateQueryMemMain(result, use_tenant, false);
ereport(DEBUG2,
(errmodule(MOD_MEM),
errmsg("[standard_planner]Calucated query max %d and min mem %d",
result->query_mem[0],
result->query_mem[1])));
}
/* data redistribution for DFS table. */
if (u_sess->attr.attr_sql.enable_cluster_resize && root->query_level == 1 &&
root->parse->commandType == CMD_INSERT) {
result->dataDestRelIndex = root->dataDestRelIndex;
} else {
result->dataDestRelIndex = 0;
}
result->query_dop = u_sess->opt_cxt.query_dop;
if (u_sess->opt_cxt.has_obsrel) {
result->has_obsrel = true;
}
result->plan_hint_warning = glob->hint_warning;
ng_restore_nodegroup_options(ngbk_is_multiple_nodegroup_scenario,
ngbk_different_nodegroup_count,
ngbk_in_redistribution_group_distribution,
ngbk_compute_permission_group_distribution,
ngbk_query_union_set_group_distribution,
ngbk_single_node_distribution);
deinit_optimizer_context(glob);
if (enable_check_implicit_cast() && g_index_vars != NIL)
check_index_column();
result->isRowTriggerShippable = parse->isRowTriggerShippable;
return result;
}
/*
* We will not rewrite full joins if the query tree contain these members now.
*/
bool fulljoin_2_left_union_right_anti_support(Query* parse)
{
if (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && parse->commandType != CMD_MERGE)
return false;
if (parse->utilityStmt != NULL)
return false;
if (parse->hasRecursive)
return false;
if (parse->hasModifyingCTE)
return false;
if (parse->hasForUpdate)
return false;
if (parse->returningList != NIL)
return false;
if (parse->rowMarks != NIL)
return false;
if (parse->has_to_save_cmd_id)
return false;
if (parse->equalVars != NIL)
return false;
return true;
}
/*
* return true if the funcexpr is a implicit conversion.$
*/
static bool IsImplicitConversion(FuncExpr* expr)
{
if (list_length(expr->args) != 1 || expr->funcformat != COERCE_IMPLICIT_CAST) {
return false;
}
Oid srctype = exprType((Node*)linitial(expr->args));
Oid targettype = expr->funcresulttype;
HeapTuple tuple = SearchSysCache2(CASTSOURCETARGET, ObjectIdGetDatum(srctype), ObjectIdGetDatum(targettype));
if (HeapTupleIsValid(tuple)) {
Form_pg_cast castForm = (Form_pg_cast)GETSTRUCT(tuple);
if (castForm->castfunc == expr->funcid && castForm->castcontext == COERCION_CODE_IMPLICIT) {
ReleaseSysCache(tuple);
return true;
}
ReleaseSysCache(tuple);
}
return false;
}
/*
* preprocessOperator
* Recursively scan the query and do subquery_planner's
* preprocessing work on each opexpr node, regenerate
* these nodes when the string_digit_to_numeric is on.
*/
bool PreprocessOperator(Node* node, void* context)
{
if (node == NULL) {
return false;
}
if (IsA(node, Query)) {
return query_tree_walker((Query*)node, (bool (*)())PreprocessOperator, (void*)context, 0);
} else if (IsA(node, OpExpr)) {
OpExpr* expr = (OpExpr*)node;
/* Only regenerate the operator when opresulttype is bool. */
if (list_length(expr->args) == 2 && expr->opresulttype == BOOLOID && expr->inputcollid == 0) {
Node* ltree = (Node*)list_nth(expr->args, 0);
Node* rtree = (Node*)list_nth(expr->args, 1);
/* Determine if the left and right subtrees are implicit type conversion */
if (IsA(ltree, FuncExpr) && IsImplicitConversion((FuncExpr*)ltree) &&
((FuncExpr*)ltree)->funcresulttype != NUMERICOID) {
ltree = (Node*)linitial(((FuncExpr*)ltree)->args);
}
if (IsA(rtree, FuncExpr) && IsImplicitConversion((FuncExpr*)rtree) &&
((FuncExpr*)rtree)->funcresulttype != NUMERICOID) {
rtree = (Node*)linitial(((FuncExpr*)rtree)->args);
}
Oid ltypeId = exprType(ltree);
Oid rtypeId = exprType(rtree);
if ((IsIntType(ltypeId) && IsCharType(rtypeId)) || (IsIntType(rtypeId) && IsCharType(ltypeId))) {
HeapTuple tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(expr->opno));
if (HeapTupleIsValid(tp)) {
Form_pg_operator optup = (Form_pg_operator)GETSTRUCT(tp);
List* name = list_make1(makeString(NameStr(optup->oprname)));
/* Regenerate the opexpr node. */
OpExpr* newNode = (OpExpr*)make_op(NULL, name, ltree, rtree, expr->location, true);
Node* lexpr = (Node*)list_nth(newNode->args, 0);
Node* rexpr = (Node*)list_nth(newNode->args, 1);
ltypeId = exprType(lexpr);
rtypeId = exprType(rexpr);
if (newNode->opresulttype == BOOLOID && ltypeId == NUMERICOID && rtypeId == NUMERICOID) {
/* Determine if the new subtrees are implicit type conversion */
if (IsA(lexpr, FuncExpr) && IsImplicitConversion((FuncExpr*)lexpr)) {
exprSetInputCollation((Node*)list_nth(newNode->args, 0), exprCollation(ltree));
}
if (IsA(rexpr, FuncExpr) && IsImplicitConversion((FuncExpr*)rexpr)) {
exprSetInputCollation((Node*)list_nth(newNode->args, 1), exprCollation(rtree));
}
errno_t errorno = EOK;
errorno = memcpy_s(node, sizeof(OpExpr), (Node*)newNode, sizeof(OpExpr));
securec_check_c(errorno, "\0", "\0");
}
pfree_ext(newNode);
list_free_ext(name);
ReleaseSysCache(tp);
}
}
}
}
return expression_tree_walker(node, (bool (*)())PreprocessOperator, (void*)context);
}
/**
* Check whether the current nodegroup state support recursive cte.
* This must be called after calling ng_init_nodegroup_optimizer and
* is_dngather_support is assigned.
*/
void check_is_support_recursive_cte(PlannerInfo* root)
{
if (!IS_STREAM_PLAN || !root->is_under_recursive_cte) {
return;
}
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH, "With-Recursive under multi-nodegroup scenario is not shippable");
int different_nodegroup_count = ng_get_different_nodegroup_count();
/* 1. Installation nodegroup, compute nodegroup, single nodegroup. */
if (different_nodegroup_count > 2) {
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
return;
}
/* 2. Installation nodegroup, compute nodegroup. */
if (different_nodegroup_count == 2 && ng_get_single_node_distribution() == NULL) {
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
return;
}
/* 3. Installation nodegroup, single nodegroup which is used */
if (different_nodegroup_count == 2 && u_sess->opt_cxt.is_dngather_support == true) {
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
return;
}
/* 4. Installation nodegroup, single nodegroup but not used. */
/* Installation nodegroup. */
return;
}
/* --------------------
* subquery_planner
* Invokes the planner on a subquery. We recurse to here for each
* sub-SELECT found in the query tree.
*
* glob is the global state for the current planner run.
* parse is the querytree produced by the parser & rewriter.
* parent_root is the immediate parent Query's info (NULL at the top level).
* hasRecursion is true if this is a recursive WITH query.
* tuple_fraction is the fraction of tuples we expect will be retrieved.
* tuple_fraction is interpreted as explained for grouping_planner, below.
*
* If subroot isn't NULL, we pass back the query's final PlannerInfo struct;
* among other things this tells the output sort ordering of the plan.
*
* Basically, this routine does the stuff that should only be done once
* per Query object. It then calls grouping_planner. At one time,
* grouping_planner could be invoked recursively on the same Query object;
* that's not currently true, but we keep the separation between the two
* routines anyway, in case we need it again someday.
*
* subquery_planner will be called recursively to handle sub-Query nodes
* found within the query's expressions and rangetable.
*
* Returns a query plan.
* --------------------
*/
Plan* subquery_planner(PlannerGlobal* glob, Query* parse, PlannerInfo* parent_root, bool hasRecursion,
double tuple_fraction, PlannerInfo** subroot, int options, ItstDisKey* diskeys, List* subqueryRestrictInfo)
{
int num_old_subplans = list_length(glob->subplans);
PlannerInfo* root = NULL;
Plan* plan = NULL;
List* newHaving = NIL;
bool hasOuterJoins = false;
ListCell* l = NULL;
StringInfoData buf;
char RewriteContextName[NAMEDATALEN] = {0};
MemoryContext QueryRewriteContext = NULL;
MemoryContext oldcontext = NULL;
errno_t rc = EOK;
/* We used DEBUG5 log to print SQL after each rewrite */
#define DEBUG_QRW(message) \
do { \
if (log_min_messages <= DEBUG5) { \
initStringInfo(&buf); \
deparse_query(root->parse, &buf, NIL, false, false, NULL, true); \
ereport(DEBUG5, (errmodule(MOD_OPT_REWRITE), errmsg("%s: %s", message, buf.data))); \
pfree_ext(buf.data); \
} \
} while (0)
/* Create a PlannerInfo data structure for this subquery */
root = makeNode(PlannerInfo);
root->parse = parse;
root->glob = glob;
root->query_level = parent_root ? parent_root->query_level + 1 : 1;
root->parent_root = parent_root;
root->plan_params = NIL;
root->planner_cxt = CurrentMemoryContext;
root->init_plans = NIL;
root->cte_plan_ids = NIL;
root->eq_classes = NIL;
root->append_rel_list = NIL;
root->rowMarks = NIL;
root->hasInheritedTarget = false;
root->grouping_map = NULL;
root->subquery_type = options;
root->param_upper = NULL;
root->hasRownumQual = false;
/*
* Apply memory context for query rewrite in optimizer.
* OptimizerContext is NULL in PBE condition which we need to consider.
*/
rc = snprintf_s(RewriteContextName, NAMEDATALEN, NAMEDATALEN - 1, "QueryRewriteContext_%d", root->query_level);
securec_check_ss(rc, "\0", "\0");
QueryRewriteContext = AllocSetContextCreate(CurrentMemoryContext,
RewriteContextName,
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcontext = MemoryContextSwitchTo(QueryRewriteContext);
/*
* Mark the current PlannerInfo is working for a query-block in recursive CTE planning,
* in general we want to let the sub-planning stages to know we are under a recursive-cte,
* planning, two case need
* [1]. Call subquery_planner() to planning the query block inside of with-block
* [2]. Call subquery_planner() to planning each query-block consists of the union
* operation, so consequtially inherit the "is_recursive_cte" properties from
* parent root
*/
if (hasRecursion || (parent_root && parent_root->is_under_recursive_cte)) {
root->is_under_recursive_cte = true;
root->is_under_recursive_tree = parent_root->is_under_recursive_tree;
} else {
root->is_under_recursive_cte = false;
}
check_is_support_recursive_cte(root);
#ifdef PGXC
root->rs_alias_index = 1;
#endif
root->hasRecursion = hasRecursion;
if (hasRecursion)
root->wt_param_id = SS_assign_special_param(root);
else
root->wt_param_id = -1;
root->qualSecurityLevel = 0;
root->non_recursive_plan = NULL;
root->subqueryRestrictInfo = subqueryRestrictInfo;
/* Mark current planner root is correlated as well */
if (parent_root != NULL && parent_root->is_under_recursive_cte && parent_root->is_correlated) {
root->is_correlated = true;
}
DEBUG_QRW("Before rewrite");
preprocess_const_params(root, (Node*)parse->jointree);
DEBUG_QRW("After const params replace ");
/*
* Try to subsitute CTEs with subqueries.
*/
if (IS_STREAM_PLAN)
substitute_ctes_with_subqueries(root, parse, root->is_under_recursive_tree);
DEBUG_QRW("After CTE substitution");
/*
* If there is a WITH list, process each WITH query and build an initplan
* SubPlan structure for it. For stream plan, it's already be replaced, so
* no need to do this.
*
* For recursive cte we still process this in same way
*/
if (parse->cteList) {
SS_process_ctes(root);
}
#ifdef STREAMPLAN
/*
* Since count(distinct) conversion can push down subquery, for sake of
* duplicate of sublink pullup, we put it ahead of sublink pullup
*/
if (IS_STREAM_PLAN && parse->hasAggs) {
convert_multi_count_distinct(root);
DEBUG_QRW("After multi count distinct rewrite");
}
#endif
/*
* Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
* to transform them into joins. Note that this step does not descend
* into subqueries; if we pull up any subqueries below, their SubLinks are
* processed just before pulling them up.
*/
if (parse->hasSubLinks) {
pull_up_sublinks(root);
DEBUG_QRW("After sublink pullup");
}
/* Reduce orderby clause in subquery for join */
reduce_orderby(parse, false);
DEBUG_QRW("After order by reduce");
if (u_sess->attr.attr_sql.enable_constraint_optimization) {
removeNotNullTest(root);
DEBUG_QRW("After soft constraint removal");
}
/*
* Scan the rangetable for set-returning functions, and inline them if
* possible (producing subqueries that might get pulled up next).
* Recursion issues here are handled in the same way as for SubLinks.
*/
inline_set_returning_functions(root);
if ((LAZY_AGG & u_sess->attr.attr_sql.rewrite_rule) && permit_from_rewrite_hint(root, LAZY_AGG)) {
lazyagg_main(parse);
DEBUG_QRW("After lazyagg");
}
/*
* Here we only control the select permission for pan_table_data. Details see in checkPTRelkind().
* The flag will be used in ExecCheckRTEPerms.
*/
if (parse->commandType == CMD_SELECT && checkSelectStmtForPlanTable(parse->rtable)) {
OnlySelectFromPlanTable = true;
}
#ifndef ENABLE_MULTIPLE_NODES
/* Change ROWNUM to LIMIT if possible */
preprocess_rownum(root, parse);
DEBUG_QRW("After preprocess rownum");
#endif
/*
* Check to see if any subqueries in the jointree can be merged into this
* query.
*/
parse->jointree = (FromExpr*)pull_up_subqueries(root, (Node*)parse->jointree);
DEBUG_QRW("After simple subquery pull up");
/*
* If this is a simple UNION ALL query, flatten it into an appendrel. We
* do this now because it requires applying pull_up_subqueries to the leaf
* queries of the UNION ALL, which weren't touched above because they
* weren't referenced by the jointree (they will be after we do this).
*/
if (parse->setOperations) {
flatten_simple_union_all(root);
DEBUG_QRW("After simple union all flatten");
}
/* Transform hint.*/
transform_hints(root, parse, parse->hintState);
DEBUG_QRW("After transform hint");
/*
* Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
* avoid the expense of doing flatten_join_alias_vars(). Also check for
* outer joins --- if none, we can skip reduce_outer_joins(). This must be
* done after we have done pull_up_subqueries, of course.
*/
root->hasJoinRTEs = false;
root->hasLateralRTEs = false;
hasOuterJoins = false;
foreach (l, parse->rtable) {
RangeTblEntry* rte = (RangeTblEntry*)lfirst(l);
if (rte->rtekind == RTE_JOIN) {
root->hasJoinRTEs = true;
if (IS_OUTER_JOIN(rte->jointype)) {
hasOuterJoins = true;
}
}
if (rte->lateral)
root->hasLateralRTEs = true;
}
/*
* Preprocess RowMark information. We need to do this after subquery
* pullup (so that all non-inherited RTEs are present) and before
* inheritance expansion (so that the info is available for
* expand_inherited_tables to examine and modify).
*/
preprocess_rowmarks(root);
#ifdef PGXC
/*
* In Coordinators we separate row marks in two groups
* one comprises of row marks of types ROW_MARK_EXCLUSIVE & ROW_MARK_SHARE
* and the other contains the rest of the types of row marks
* The former is handeled on Coordinator in such a way that
* FOR UPDATE/SHARE gets added in the remote query, whereas
* the later needs to be handeled the way pg does
*
* Notice : This is not a very efficient way of handling row marks
* Consider this join query
* select * from t1, t2 where t1.val = t2.val for update
* It results in this query to be fired at the Datanodes
* SELECT val, val2, ctid FROM ONLY t2 WHERE true FOR UPDATE OF t2
* We are locking the complete table where as we should have locked
* only the rows where t1.val = t2.val is met
*
* We won't really call separate_rowmarks before we support for update with
* reomtequery.
*/
if (!IS_STREAM_PLAN)
separate_rowmarks(root);
#endif
/*
* When the SQL dose not support stream mode in coordinator node, must send remotequery
* to datanode, and need not expand dfs table into dfs main table and delta table.
* Always support dfs table to expanding in data node.
*/
if (u_sess->opt_cxt.is_stream || IS_PGXC_DATANODE) {
/*
* Expand the Dfs table.
*/
expand_dfs_tables(root);
}
/*
* Expand any rangetable entries that are inheritance sets into "append
* relations". This can add entries to the rangetable, but they must be
* plain base relations not joins, so it's OK (and marginally more
* efficient) to do it after checking for join RTEs. We must do it after
* pulling up subqueries, else we'd fail to handle inherited tables in
* subqueries.
*/
expand_inherited_tables(root);
/*
* Set hasHavingQual to remember if HAVING clause is present. Needed
* because preprocess_expression will reduce a constant-true condition to
* an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
*/
root->hasHavingQual = (parse->havingQual != NULL);
/* Clear this flag; might get set in distribute_qual_to_rels */
root->hasPseudoConstantQuals = false;
/*
* Calculate how many tables in current query level, and give a
* rought estimation of work mem for each relation
*/
int work_mem_orig = u_sess->opt_cxt.op_work_mem;
int esti_op_mem_orig = root->glob->estiopmem;
if (root->glob->minopmem > 0) {
int num_rel = 0;
foreach (l, parse->rtable) {
RangeTblEntry* rte = (RangeTblEntry*)lfirst(l);
if (rte->rtekind == RTE_RELATION || rte->rtekind == RTE_SUBQUERY) {
num_rel++;
}
}
if (num_rel <= 1) {
if ((parse->groupClause || parse->sortClause || parse->distinctClause))
num_rel = 2;
else
num_rel = 1;
}
root->glob->estiopmem = Max(root->glob->minopmem, (double)root->glob->estiopmem / ceil(LOG2(num_rel + 1)));
u_sess->opt_cxt.op_work_mem = Min(root->glob->estiopmem, OPT_MAX_OP_MEM);
AssertEreport(u_sess->opt_cxt.op_work_mem > 0,
MOD_OPT,
"invalid operator work mem when roughtly estimating the work memory for each relation");
}
/*
* Do expression preprocessing on targetlist and quals, as well as other
* random expressions in the querytree. Note that we do not need to
* handle sort/group expressions explicitly, because they are actually
* part of the targetlist.
*/
parse->targetList = (List*)preprocess_expression(root, (Node*)parse->targetList, EXPRKIND_TARGET);
parse->returningList = (List*)preprocess_expression(root, (Node*)parse->returningList, EXPRKIND_TARGET);
preprocess_qual_conditions(root, (Node*)parse->jointree);
parse->havingQual = preprocess_expression(root, parse->havingQual, EXPRKIND_QUAL);
foreach (l, parse->windowClause) {
WindowClause* wc = (WindowClause*)lfirst(l);
/* partitionClause/orderClause are sort/group expressions */
wc->startOffset = preprocess_expression(root, wc->startOffset, EXPRKIND_LIMIT);
wc->endOffset = preprocess_expression(root, wc->endOffset, EXPRKIND_LIMIT);
}
parse->limitOffset = preprocess_expression(root, parse->limitOffset, EXPRKIND_LIMIT);
if (parse->limitCount != NULL && !IsA(parse->limitCount, Const)) {
parse->limitCount = preprocess_expression(root, parse->limitCount, EXPRKIND_LIMIT);
}
foreach (l, parse->mergeActionList) {
MergeAction* action = (MergeAction*)lfirst(l);
action->targetList = (List*)preprocess_expression(root, (Node*)action->targetList, EXPRKIND_TARGET);
action->pulluped_targetList =
(List*)preprocess_expression(root, (Node*)(action->pulluped_targetList), EXPRKIND_TARGET);
action->qual = preprocess_expression(root, (Node*)action->qual, EXPRKIND_QUAL);
}
parse->mergeSourceTargetList =
(List*)preprocess_expression(root, (Node*)parse->mergeSourceTargetList, EXPRKIND_TARGET);
if (parse->upsertClause) {
parse->upsertClause->updateTlist = (List*)
preprocess_expression(root, (Node*)parse->upsertClause->updateTlist, EXPRKIND_TARGET);
}
root->append_rel_list = (List*)preprocess_expression(root, (Node*)root->append_rel_list, EXPRKIND_APPINFO);
/* Also need to preprocess expressions for function and values RTEs */
foreach (l, parse->rtable) {
RangeTblEntry* rte = (RangeTblEntry*)lfirst(l);
int kind;
if (rte->rtekind == RTE_RELATION) {
if (rte->tablesample) {
rte->tablesample =
(TableSampleClause*)preprocess_expression(root, (Node*)rte->tablesample, EXPRKIND_TABLESAMPLE);
}
} else if (rte->rtekind == RTE_SUBQUERY) {
/*
* We don't want to do all preprocessing yet on the subquery's
* expressions, since that will happen when we plan it. But if it
* contains any join aliases of our level, those have to get
* expanded now, because planning of the subquery won't do it.
* That's only possible if the subquery is LATERAL.
*/
if (rte->lateral && root->hasJoinRTEs)
rte->subquery = (Query *)flatten_join_alias_vars(root, (Node *) rte->subquery);
} else if (rte->rtekind == RTE_FUNCTION) {
/* Preprocess the function expression fully */
kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
rte->funcexpr = preprocess_expression(root, rte->funcexpr, kind);
} else if (rte->rtekind == RTE_VALUES) {
/* Preprocess the values lists fully */
kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
rte->values_lists = (List*)preprocess_expression(root, (Node*)rte->values_lists, kind);
}
/*
* Process each element of the securityQuals list as if it were a
* separate qual expression (as indeed it is). We need to do it this
* way to get proper canonicalization of AND/OR structure. Note that
* this converts each element into an implicit-AND sublist.
*/
ListCell* cell = NULL;
foreach (cell, rte->securityQuals) {
lfirst(cell) = preprocess_expression(root, (Node*)lfirst(cell), EXPRKIND_QUAL);
}
}
DEBUG_QRW("After preprocess expressions");
u_sess->opt_cxt.op_work_mem = work_mem_orig;
root->glob->estiopmem = esti_op_mem_orig;
/*
* In some cases we may want to transfer a HAVING clause into WHERE. We
* cannot do so if the HAVING clause contains aggregates (obviously) or
* volatile functions (since a HAVING clause is supposed to be executed
* only once per group). Also, it may be that the clause is so expensive
* to execute that we're better off doing it only once per group, despite
* the loss of selectivity. This is hard to estimate short of doing the
* entire planning process twice, so we use a heuristic: clauses
* containing subplans are left in HAVING. Otherwise, we move or copy the
* HAVING clause into WHERE, in hopes of eliminating tuples before
* aggregation instead of after.
*
* If the query has explicit grouping then we can simply move such a
* clause into WHERE; any group that fails the clause will not be in the
* output because none of its tuples will reach the grouping or
* aggregation stage. Otherwise we must have a degenerate (variable-free)
* HAVING clause, which we put in WHERE so that query_planner() can use it
* in a gating Result node, but also keep in HAVING to ensure that we
* don't emit a bogus aggregated row. (This could be done better, but it
* seems not worth optimizing.)
*
* Note that both havingQual and parse->jointree->quals are in
* implicitly-ANDed-list form at this point, even though they are declared
* as Node *.
*/
if (!parse->unique_check) {
newHaving = NIL;
foreach(l, (List *) parse->havingQual) {
Node *havingclause = (Node *)lfirst(l);
/*
* For groupingSets, having clause can only be calculate in havingQual, can not push-down to lefttree's qual.
*
* For example:
* select sum(a), b from group by rollup(a, b) having b > 10;
* this mean: group by a, b
* group by a
* group by ()
*
* For "group by ()", we need calculate sum(a) for all lefttree's rows.
*/
if (contain_agg_clause(havingclause) ||
contain_volatile_functions(havingclause) ||
contain_subplans(havingclause)
|| parse->groupingSets) {
/* keep it in HAVING */
newHaving = lappend(newHaving, havingclause);
} else if (parse->groupClause) {
/* move it to WHERE */
parse->jointree->quals = (Node *)
lappend((List *) parse->jointree->quals, havingclause);
} else {
/* put a copy in WHERE, keep it in HAVING */
parse->jointree->quals = (Node *)
lappend((List *) parse->jointree->quals,
copyObject(havingclause));
newHaving = lappend(newHaving, havingclause);
}
}
parse->havingQual = (Node *) newHaving;
}
DEBUG_QRW("After having qual rewrite");
passdown_itst_keys_to_subroot(root, diskeys);
/*
* If we have any outer joins, try to reduce them to plain inner joins.
* This step is most easily done after we've done expression
* preprocessing.
*/
if (hasOuterJoins) {
reduce_outer_joins(root);
DEBUG_QRW("After outer-to-inner conversion");
if (IS_STREAM_PLAN) {
bool support_rewrite = true;
if (!fulljoin_2_left_union_right_anti_support(root->parse))
support_rewrite = false;
if (contain_volatile_functions((Node*)root->parse))
support_rewrite = false;
contain_func_context context =
init_contain_func_context(list_make3_oid(ECEXTENSIONFUNCOID, ECHADOOPFUNCOID, RANDOMFUNCOID));
if (contains_specified_func((Node*)root->parse, &context)) {
char* func_name = get_func_name(((FuncExpr*)linitial(context.func_exprs))->funcid);
ereport(DEBUG2,
(errmodule(MOD_OPT_REWRITE),
(errmsg("[Not rewrite full Join on true]: %s functions contained.", func_name))));
pfree_ext(func_name);
list_free_ext(context.funcids);
context.funcids = NIL;
list_free_ext(context.func_exprs);
context.func_exprs = NIL;
support_rewrite = false;
}
if (support_rewrite) {
reduce_inequality_fulljoins(root);
DEBUG_QRW("After full join conversion");
}
}
}
/*
* Check if need auto-analyze for current query level.
* No need to do auto-analyze for query on one table without Groupby.
*/
if (u_sess->attr.attr_sql.enable_autoanalyze && !u_sess->analyze_cxt.need_autoanalyze && IS_STREAM_PLAN &&
(list_length(parse->rtable) > 1 || parse->groupClause)) {
/* inherit upper level and check for current query level */
u_sess->analyze_cxt.need_autoanalyze = true;
}
(void)MemoryContextSwitchTo(oldcontext);
/*
* Do the main planning. If we have an inherited target relation, that
* needs special processing, else go straight to grouping_planner.
*/
if (parse->resultRelation && parse->commandType != CMD_INSERT &&
rt_fetch(parse->resultRelation, parse->rtable)->inh)
plan = inheritance_planner(root);
else {
plan = grouping_planner(root, tuple_fraction);
/* If it's not SELECT, we need a ModifyTable node */
if (parse->commandType != CMD_SELECT) {
List* returningLists = NIL;
List* rowMarks = NIL;
Relation mainRel = NULL;
Oid taleOid = rt_fetch(parse->resultRelation, parse->rtable)->relid;
bool partKeyUpdated = targetListHasPartitionKey(parse->targetList, taleOid);
mainRel = RelationIdGetRelation(taleOid);
bool isDfsStore = RelationIsDfsStore(mainRel);
RelationClose(mainRel);
/*
* Set up the RETURNING list-of-lists, if needed.
*/
if (parse->returningList)
returningLists = list_make1(parse->returningList);
else
returningLists = NIL;
/*
* If there was a FOR UPDATE/SHARE clause, the LockRows node will
* have dealt with fetching non-locked marked rows, else we need
* to have ModifyTable do that.
*/
if (parse->rowMarks)
rowMarks = NIL;
else
rowMarks = root->rowMarks;
#ifdef STREAMPLAN
plan = (Plan*)make_modifytable(root,
parse->commandType,
parse->canSetTag,
list_make1_int(parse->resultRelation),
list_make1(plan),
returningLists,
rowMarks,
SS_assign_special_param(root),
partKeyUpdated,
parse->mergeTarget_relation,
parse->mergeSourceTargetList,
parse->mergeActionList,
parse->upsertClause,
isDfsStore);
#else
plan = (Plan*)make_modifytable(parse->commandType,
parse->canSetTag,
list_make1_int(parse->resultRelation),
list_make1(plan),
returningLists,
rowMarks,
SS_assign_special_param(root),
partKeyUpdated,
parse->mergeTarget_relation,
parse->mergeSourceTargetList,
parse->mergeActionList,
parse->upsertClause,
isDfsStore);
#endif
#ifdef PGXC
plan = pgxc_make_modifytable(root, plan);
#endif
}
}
/*
* If any subplans were generated, or if there are any parameters to worry
* about, build initPlan list and extParam/allParam sets for plan nodes,
* and attach the initPlans to the top plan node.
*/
if (plan == NULL)
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_OPTIMIZER_INCONSISTENT_STATE),
(errmsg("Fail to generate subquery plan."))));
if (list_length(glob->subplans) != num_old_subplans || root->glob->nParamExec > 0)
SS_finalize_plan(root, plan, true);
/* Return internal info if caller wants it */
if (subroot != NULL)
*subroot = root;
/* add not-used hints information to warning string */
if (parse->hintState)
desc_hint_in_state(root, parse->hintState);
/* Fix var's if we have changed var */
if (root->var_mappings != NIL) {
fix_vars_plannode(root, plan);
}
return plan;
}
/*
* preprocess_expression
* Do subquery_planner's preprocessing work for an expression,
* which can be a targetlist, a WHERE clause (including JOIN/ON
* conditions), or a HAVING clause.
*/
Node* preprocess_expression(PlannerInfo* root, Node* expr, int kind)
{
/*
* Fall out quickly if expression is empty. This occurs often enough to
* be worth checking. Note that null->null is the correct conversion for
* implicit-AND result format, too.
*/
if (expr == NULL)
return NULL;
/*
* If the query has any join RTEs, replace join alias variables with
* base-relation variables. We must do this before sublink processing,
* else sublinks expanded out from join aliases wouldn't get processed. We
* can skip it in VALUES lists, however, since they can't contain any Vars
* at all.
*/
if (root->hasJoinRTEs && !(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES))
expr = flatten_join_alias_vars(root, expr);
/*
* Simplify constant expressions.
*
* Note: an essential effect of this is to convert named-argument function
* calls to positional notation and insert the current actual values of
* any default arguments for functions. To ensure that happens, we *must*
* process all expressions here. Previous PG versions sometimes skipped
* const-simplification if it didn't seem worth the trouble, but we can't
* do that anymore.
*
* Note: this also flattens nested AND and OR expressions into N-argument
* form. All processing of a qual expression after this point must be
* careful to maintain AND/OR flatness --- that is, do not generate a tree
* with AND directly under AND, nor OR directly under OR.
*/
expr = eval_const_expressions(root, expr);
/*
* If it's a qual or havingQual, canonicalize it.
*/
if (kind == EXPRKIND_QUAL) {
expr = (Node*)canonicalize_qual((Expr*)expr, false);
#ifdef OPTIMIZER_DEBUG
printf("After canonicalize_qual()\n");
pprint(expr);
#endif
}
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
/*
* XXX do not insert anything here unless you have grokked the comments in
* SS_replace_correlation_vars ...
*
* Replace uplevel vars with Param nodes (this IS possible in VALUES)
*/
if (root->query_level > 1)
expr = SS_replace_correlation_vars(root, expr);
/*
* If it's a qual or havingQual, convert it to implicit-AND format. (We
* don't want to do this before eval_const_expressions, since the latter
* would be unable to simplify a top-level AND correctly. Also,
* SS_process_sublinks expects explicit-AND format.)
*/
if (kind == EXPRKIND_QUAL)
expr = (Node*)make_ands_implicit((Expr*)expr);
return expr;
}
/*
* preprocess_qual_conditions
* Recursively scan the query's jointree and do subquery_planner's
* preprocessing work on each qual condition found therein.
*/
void preprocess_qual_conditions(PlannerInfo* root, Node* jtnode)
{
if (jtnode == NULL)
return;
if (IsA(jtnode, RangeTblRef)) {
/* nothing to do here */
} else if (IsA(jtnode, FromExpr)) {
FromExpr* f = (FromExpr*)jtnode;
ListCell* l = NULL;
foreach (l, f->fromlist)
preprocess_qual_conditions(root, (Node*)lfirst(l));
f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
} else if (IsA(jtnode, JoinExpr)) {
JoinExpr* j = (JoinExpr*)jtnode;
preprocess_qual_conditions(root, j->larg);
preprocess_qual_conditions(root, j->rarg);
j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
} else {
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized node type when process qual condition: %d", (int)nodeTag(jtnode))));
}
}
/*
* preprocess_phv_expression
* Do preprocessing on a PlaceHolderVar expression that's been pulled up.
*
* If a LATERAL subquery references an output of another subquery, and that
* output must be wrapped in a PlaceHolderVar because of an intermediate outer
* join, then we'll push the PlaceHolderVar expression down into the subquery
* and later pull it back up during find_lateral_references, which runs after
* subquery_planner has preprocessed all the expressions that were in the
* current query level to start with. So we need to preprocess it then.
*/
Expr *
preprocess_phv_expression(PlannerInfo *root, Expr *expr)
{
return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
}
/*
* preprocess_const_params
* Recursively scan the query's jointree and do subquery_planner's
* preprocessing work on each qual condition found therein to replace
* params with const value if possible
*/
void preprocess_const_params(PlannerInfo* root, Node* jtnode)
{
if (jtnode == NULL)
return;
if (IsA(jtnode, RangeTblRef)) {
/* nothing to do here */
} else if (IsA(jtnode, FromExpr)) {
FromExpr* f = (FromExpr*)jtnode;
ListCell* l = NULL;
foreach (l, f->fromlist)
preprocess_const_params(root, (Node*)lfirst(l));
f->quals = preprocess_const_params_worker(root, f->quals, EXPRKIND_QUAL);
} else if (IsA(jtnode, JoinExpr)) {
JoinExpr* j = (JoinExpr*)jtnode;
preprocess_const_params(root, j->larg);
preprocess_const_params(root, j->rarg);
j->quals = preprocess_const_params_worker(root, j->quals, EXPRKIND_QUAL);
} else {
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized node type: %d", (int)nodeTag(jtnode))));
}
}
/*
* preprocess_const_params_worker
* worker func for params to const replacement
* so much like preprocess_expression() but only do eval_const_expressions
*/
static Node* preprocess_const_params_worker(PlannerInfo* root, Node* expr, int kind)
{
/*
* Fall out quickly if expression is empty. This occurs often enough to
* be worth checking. Note that null->null is the correct conversion for
* implicit-AND result format, too.
*/
if (expr == NULL)
return NULL;
/*
* Simplify constant expressions.
*
* Note: an essential effect of this is to convert named-argument function
* calls to positional notation and insert the current actual values of
* any default arguments for functions. To ensure that happens, we *must*
* process all expressions here. Previous PG versions sometimes skipped
* const-simplification if it didn't seem worth the trouble, but we can't
* do that anymore.
*
* Note: this also flattens nested AND and OR expressions into N-argument
* form. All processing of a qual expression after this point must be
* careful to maintain AND/OR flatness --- that is, do not generate a tree
* with AND directly under AND, nor OR directly under OR.
*/
expr = eval_const_expressions(root, expr);
return expr;
}
/*
* inheritance_planner
* Generate a plan in the case where the result relation is an
* inheritance set.
*
* We have to handle this case differently from cases where a source relation
* is an inheritance set. Source inheritance is expanded at the bottom of the
* plan tree (see allpaths.c), but target inheritance has to be expanded at
* the top. The reason is that for UPDATE, each target relation needs a
* different targetlist matching its own column set. Fortunately,
* the UPDATE/DELETE target can never be the nullable side of an outer join,
* so it's OK to generate the plan this way.
*
* Returns a query plan.
*/
static Plan* inheritance_planner(PlannerInfo* root)
{
Query* parse = root->parse;
int parentRTindex = parse->resultRelation;
List* final_rtable = NIL;
int save_rel_array_size = 0;
RelOptInfo** save_rel_array = NULL;
RangeTblEntry** save_rte_array = NULL;
List* subplans = NIL;
List* resultRelations = NIL;
List* returningLists = NIL;
List* rowMarks = NIL;
ListCell* lc = NULL;
bool isDfsStore = false;
bool partKeyUpdated = false;
Oid taleOid = rt_fetch(parse->resultRelation, parse->rtable)->relid;
Relation mainRel = NULL;
partKeyUpdated = targetListHasPartitionKey(parse->targetList, taleOid);
mainRel = RelationIdGetRelation(taleOid);
AssertEreport(RelationIsValid(mainRel),
MOD_OPT,
"The relation descriptor is invalid"
"when generating a query plan and the result relation is an inherient plan.");
isDfsStore = RelationIsDfsStore(mainRel);
RelationClose(mainRel);
/*
* We generate a modified instance of the original Query for each target
* relation, plan that, and put all the plans into a list that will be
* controlled by a single ModifyTable node. All the instances share the
* same rangetable, but each instance must have its own set of subquery
* RTEs within the finished rangetable because (1) they are likely to get
* scribbled on during planning, and (2) it's not inconceivable that
* subqueries could get planned differently in different cases. We need
* not create duplicate copies of other RTE kinds, in particular not the
* target relations, because they don't have either of those issues. Not
* having to duplicate the target relations is important because doing so
* (1) would result in a rangetable of length O(N^2) for N targets, with
* at least O(N^3) work expended here; and (2) would greatly complicate
* management of the rowMarks list.
*/
foreach (lc, root->append_rel_list) {
AppendRelInfo* appinfo = (AppendRelInfo*)lfirst(lc);
PlannerInfo subroot;
Plan* subplan = NULL;
Index rti;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != (uint)parentRTindex)
continue;
/*
* We need a working copy of the PlannerInfo so that we can control
* propagation of information back to the main copy.
*/
errno_t rc = memcpy_s(&subroot, sizeof(PlannerInfo), root, sizeof(PlannerInfo));
securec_check(rc, "\0", "\0");
/*
* Generate modified query with this rel as target. We first apply
* adjust_appendrel_attrs, which copies the Query and changes
* references to the parent RTE to refer to the current child RTE,
* then fool around with subquery RTEs.
*/
subroot.parse = (Query*)adjust_appendrel_attrs(root, (Node*)parse, appinfo);
/*
* The rowMarks list might contain references to subquery RTEs, so
* make a copy that we can apply ChangeVarNodes to. (Fortunately, the
* executor doesn't need to see the modified copies --- we can just
* pass it the original rowMarks list.)
*/
subroot.rowMarks = (List*)copyObject(root->rowMarks);
/*
* Add placeholders to the child Query's rangetable list to fill the
* RT indexes already reserved for subqueries in previous children.
* These won't be referenced, so there's no need to make them very
* valid-looking.
*/
while (list_length(subroot.parse->rtable) < list_length(final_rtable))
subroot.parse->rtable = lappend(subroot.parse->rtable, makeNode(RangeTblEntry));
/*
* If this isn't the first child Query, generate duplicates of all
* subquery RTEs, and adjust Var numbering to reference the
* duplicates. To simplify the loop logic, we scan the original rtable
* not the copy just made by adjust_appendrel_attrs; that should be OK
* since subquery RTEs couldn't contain any references to the target
* rel.
*/
if (final_rtable != NIL) {
ListCell* lr = NULL;
rti = 1;
foreach (lr, parse->rtable) {
RangeTblEntry* rte = (RangeTblEntry*)lfirst(lr);
if (rte->rtekind == RTE_SUBQUERY) {
Index newrti;
/*
* The RTE can't contain any references to its own RT
* index, so we can save a few cycles by applying
* ChangeVarNodes before we append the RTE to the
* rangetable.
*/
newrti = list_length(subroot.parse->rtable) + 1;
ChangeVarNodes((Node*)subroot.parse, rti, newrti, 0);
ChangeVarNodes((Node*)subroot.rowMarks, rti, newrti, 0);
rte = (RangeTblEntry*)copyObject(rte);
subroot.parse->rtable = lappend(subroot.parse->rtable, rte);
}
rti++;
}
}
/* We needn't modify the child's append_rel_list */
/* There shouldn't be any OJ info to translate, as yet */
AssertEreport(subroot.join_info_list == NIL,
MOD_OPT,
"the list of SpecialJoinInfos is not NIL when generating a modified instance of the original Query for "
"each target relation.");
Assert(subroot.lateral_info_list == NIL);
/* and we haven't created PlaceHolderInfos, either */
AssertEreport(subroot.placeholder_list == NIL,
MOD_OPT,
"the list of PlaceHolderInfos is not NIL when generating a modified instance of the original Query for "
"each target relation.");
/* hack to mark target relation as an inheritance partition */
subroot.hasInheritedTarget = true;
/* Generate plan */
subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */);
AssertEreport(subplan != NULL, MOD_OPT, "subplan is NULL when generating a plan.");
/*
* If this child rel was excluded by constraint exclusion, exclude it
* from the result plan.
*/
if (is_dummy_plan(subplan))
continue;
subplans = lappend(subplans, subplan);
/*
* If this is the first non-excluded child, its post-planning rtable
* becomes the initial contents of final_rtable; otherwise, append
* just its modified subquery RTEs to final_rtable.
*/
if (final_rtable == NIL)
final_rtable = subroot.parse->rtable;
else
final_rtable = list_concat(final_rtable, list_copy_tail(subroot.parse->rtable, list_length(final_rtable)));
/*
* We need to collect all the RelOptInfos from all child plans into
* the main PlannerInfo, since setrefs.c will need them. We use the
* last child's simple_rel_array (previous ones are too short), so we
* have to propagate forward the RelOptInfos that were already built
* in previous children.
*/
AssertEreport(subroot.simple_rel_array_size >= save_rel_array_size,
MOD_OPT,
"the allocated size of array is smaller when collecting all the RelOptInfos.");
for (rti = 1; rti < (unsigned int)save_rel_array_size; rti++) {
RelOptInfo* brel = save_rel_array[rti];
RangeTblEntry* rte = save_rte_array[rti];
if (brel != NULL) {
subroot.simple_rel_array[rti] = brel;
}
if (rte != NULL) {
subroot.simple_rte_array[rti] = rte;
}
}
save_rel_array_size = subroot.simple_rel_array_size;
save_rel_array = subroot.simple_rel_array;
save_rte_array = subroot.simple_rte_array;
/* Make sure any initplans from this rel get into the outer list */
root->init_plans = subroot.init_plans;
/* Build list of target-relation RT indexes */
resultRelations = lappend_int(resultRelations, appinfo->child_relid);
/* Build list of per-relation RETURNING targetlists */
if (parse->returningList)
returningLists = lappend(returningLists, subroot.parse->returningList);
AssertEreport(parse->mergeActionList == NIL,
MOD_OPT,
"list of actions for MERGE is not empty when generating a plan in the case where the result relation is an "
"inheritance set");
}
/* Mark result as unordered (probably unnecessary) */
root->query_pathkeys = NIL;
/*
* If we managed to exclude every child rel, return a dummy plan; it
* doesn't even need a ModifyTable node.
*/
if (subplans == NIL) {
/* although dummy, it must have a valid tlist for executor */
List* tlist = NIL;
tlist = preprocess_targetlist(root, parse->targetList);
return (Plan*)make_result(root, tlist, (Node*)list_make1(makeBoolConst(false, false)), NULL);
}
/*
* Put back the final adjusted rtable into the master copy of the Query.
*/
parse->rtable = final_rtable;
root->simple_rel_array_size = save_rel_array_size;
root->simple_rel_array = save_rel_array;
root->simple_rte_array = save_rte_array;
/*
* If there was a FOR UPDATE/SHARE clause, the LockRows node will have
* dealt with fetching non-locked marked rows, else we need to have
* ModifyTable do that.
*/
if (parse->rowMarks)
rowMarks = NIL;
else
rowMarks = root->rowMarks;
/* And last, tack on a ModifyTable node to do the UPDATE/DELETE work */
#ifdef STREAMPLAN
return make_modifytables(root,
parse->commandType,
parse->canSetTag,
resultRelations,
subplans,
returningLists,
rowMarks,
SS_assign_special_param(root),
partKeyUpdated,
isDfsStore,
0,
NULL,
NULL,
NULL);
#else
return make_modifytables(parse->commandType,
parse->canSetTag,
resultRelations,
subplans,
returningLists,
rowMarks,
SS_assign_special_param(root),
partKeyUpdated,
isDfsStore,
0,
NULL,
NULL,
NULL);
#endif
}
/*
* @Description: set SortGroupClause's groupSet which will be set to true if it appears in group by clause
* and do not appear in collectiveGroupExpr that means it's value will be altered grouping set after,
* and delete this expr from EquivalenceMember.
*
* @in root: Per-query information for planning/optimization
* @in sort_group_clauses: Sort group clause, include sort, group by, partition by, distinct, etc.
* @in tlist: target lists.
* @in collectiveGroupExpr: collective group expr that appear in all group by clause for OLAP function.
*
*/
static void set_groupset_for_sortgroup_items(
PlannerInfo* root, List* sort_group_clauses, List* tlist, List* collectiveGroupExpr)
{
List* groupClause = root->parse->groupClause;
List* groupExpr = get_sortgrouplist_exprs(groupClause, tlist);
SortGroupClause* sortcl = NULL;
Expr* sortExpr = NULL;
/*
* If this expr in partake group but not include in all group clause, set ec_group_set to true.
* It mean we will again make an pathkey to distinct, sort or windows function.
*/
ListCell* cell = NULL;
foreach (cell, sort_group_clauses) {
sortcl = (SortGroupClause*)lfirst(cell);
sortExpr = (Expr*)get_sortgroupclause_expr(sortcl, tlist);
/*
* This expr is in group expr and is not collectiveGroupExpr, set groupSet to false.
* group expr is used to do comparison since sortgroupclause can be different in some property
*/
if (list_member(groupExpr, sortExpr) && !list_member(collectiveGroupExpr, sortExpr)) {
sortcl->groupSet = true;
}
}
list_free_ext(groupExpr);
}
/*
* @Description: If node is member of list.
* @in list: Group expr list.
* @in node: Expr.
*/
static bool group_member(List* list, Expr* node)
{
Var* var1 = locate_distribute_var(node);
ListCell* cell = NULL;
foreach (cell, list) {
Expr* expr = (Expr*)lfirst(cell);
if (equal(expr, node)) {
return true;
} else {
Var* var2 = locate_distribute_var(expr);
if (var2 != NULL && equal(var1, var2)) {
return true;
}
}
}
return false;
}
/*
* @Description: Set sort+group distribute keys.
* @in root - Per-query information for planning/optimization.
* @in result_plan - group agg plan.
* @in collectiveGroupExpr - collective group exprs.
*
*/
static void adjust_plan_dis_key(PlannerInfo* root, Plan* result_plan, List* collectiveGroupExpr)
{
EquivalenceClass* ec = NULL;
ListCell* cell = NULL;
ListCell* lc2 = NULL;
/* Do a copy since distribute key is shared by multiple operators */
result_plan->distributed_keys = list_copy(result_plan->distributed_keys);
foreach (cell, result_plan->distributed_keys) {
Expr* dis_key = (Expr*)lfirst(cell);
/*
* If this distribute key is not in collectiveGroupExpr, we need find it's EquivalenceClass.
* If already found, replace it's members expr which be included in collectiveGroupExpr to this distribut key.
*/
if (!group_member(collectiveGroupExpr, dis_key)) {
/* Find include this dis expr equivalence class. */
ec = get_expr_eqClass(root, dis_key);
AssertEreport(ec != NULL, MOD_OPT, "invalid EquivalenceClass when setting sort+group distribute keys.");
foreach (lc2, ec->ec_members) {
EquivalenceMember* em = (EquivalenceMember*)lfirst(lc2);
/* Replace this dis_key with em_expr. */
if (group_member(collectiveGroupExpr, em->em_expr) &&
judge_node_compatible(root, (Node*)dis_key, (Node*)em->em_expr)) {
lfirst(cell) = copyObject(em->em_expr);
break;
}
}
if (lc2 == NULL) {
result_plan->distributed_keys = NIL;
return;
}
}
}
}
/*
* @Description: We need set SortGroupClause's groupSet when groupingSets is not null,
* avoid sort_pathkeys can be deleted if exist equivalence class.
*
* For exanple:
* select t1.a, t2.a from t1 inner join t2 on t1.a = t2.a
* group by grouping sets(t1.a, t2.a) order by 1, 2;
*
* In this case, sort_pathkeys only have t1.a, t2.a already be removed because t1.a = t2.a,
* but because of grouping sets(Ap Function), some value of t1.a and t2.a can be seted to NULL so that
* t1.a and t2.a is not equal, so t2.a can not be removed. Here we will again build sort path keys.
* @in root - Per-query information for planning/optimization.
* @in activeWindows - windows function list.
* @in collectiveGroupExpr - collective group exprs if have grouping set clause.
*/
template <path_key pathKey>
static void rebuild_pathkey_for_groupingSet(
PlannerInfo* root, List* tlist, List* activeWindows, List* collectiveGroupExpr)
{
Query* parse = root->parse;
if (!parse->groupingSets || !parse->groupClause) {
return;
}
/*
* To window function, if only need set SortGroupClause's groupset, it's pathkey will
* be maked in grouping_planer's activeWindows part.
*/
if (pathKey == windows_func_pathkey) {
if (activeWindows != NIL) {
WindowClause* wc = NULL;
ListCell* l = NULL;
foreach (l, activeWindows) {
wc = (WindowClause*)lfirst(l);
set_groupset_for_sortgroup_items(root, wc->partitionClause, tlist, collectiveGroupExpr);
set_groupset_for_sortgroup_items(root, wc->orderClause, tlist, collectiveGroupExpr);
}
}
} else if (pathKey == distinct_pathkey) {
/* Make distinct pathkeys which groupSet is true. */
if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) {
set_groupset_for_sortgroup_items(root, parse->distinctClause, tlist, collectiveGroupExpr);
root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, true);
}
} else if (pathKey == sort_pathkey) {
/* Make sort pathkeys which groupSet is true. */
if (parse->sortClause) {
set_groupset_for_sortgroup_items(root, parse->sortClause, tlist, collectiveGroupExpr);
root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, true);
}
}
}
static inline Path* choose_best_path(bool use_cheapest_path, PlannerInfo* root, Path* cheapest_path, Path* sorted_path)
{
Path* best_path;
if (use_cheapest_path) {
best_path = cheapest_path;
}
else {
best_path = sorted_path;
ereport(DEBUG2, (errmodule(MOD_OPT), (errmsg("Use presorted path instead of cheapest path."))));
/* print more details */
if (log_min_messages <= DEBUG2)
debug1_print_new_path(root, best_path, false);
}
return best_path;
}
#ifdef ENABLE_MULTIPLE_NODES
static bool has_ts_func(List* tlist)
{
FillWalkerContext fill_context;
error_t rc = memset_s(&fill_context, sizeof(fill_context), 0, sizeof(fill_context));
securec_check(rc, "\0", "\0");
expression_tree_walker((Node*)tlist, (walker)fill_function_call_walker, &fill_context);
if (fill_context.fill_func_calls > 0 || fill_context.fill_last_func_calls > 0 ||
fill_context.column_calls > 0) {
return true;
}
return false;
}
#endif
/* --------------------
* grouping_planner
* Perform planning steps related to grouping, aggregation, etc.
* This primarily means adding top-level processing to the basic
* query plan produced by query_planner.
*
* tuple_fraction is the fraction of tuples we expect will be retrieved
*
* tuple_fraction is interpreted as follows:
* 0: expect all tuples to be retrieved (normal case)
* 0 < tuple_fraction < 1: expect the given fraction of tuples available
* from the plan to be retrieved
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
* expected to be retrieved (ie, a LIMIT specification)
*
* Returns a query plan. Also, root->query_pathkeys is returned as the
* actual output ordering of the plan (in pathkey format).
* --------------------
*/
static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
{
Query* parse = root->parse;
List* tlist = parse->targetList;
int64 offset_est = 0;
int64 count_est = 0;
double limit_tuples = -1.0;
Plan* result_plan = NULL;
List* current_pathkeys = NIL;
double dNumGroups[2] = {1, 1}; /* dNumGroups[0] is local distinct, dNumGroups[1] is global distinct. */
bool use_hashed_distinct = false;
bool tested_hashed_distinct = false;
bool needs_stream = false;
bool has_second_agg_sort = false;
List* collectiveGroupExpr = NIL;
RelOptInfo* rel_info = NULL;
char PlanContextName[NAMEDATALEN] = {0};
MemoryContext PlanGenerateContext = NULL;
MemoryContext oldcontext = NULL;
errno_t rc = EOK;
/*
* Apply memory context for generate plan in optimizer.
* OptimizerContext is NULL in PBE condition which we need to consider.
*/
rc = snprintf_s(PlanContextName, NAMEDATALEN, NAMEDATALEN - 1, "PlanGenerateContext_%d", root->query_level);
securec_check_ss(rc, "\0", "\0");
PlanGenerateContext = AllocSetContextCreate(CurrentMemoryContext,
PlanContextName,
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset) {
tuple_fraction = preprocess_limit(root, tuple_fraction, &offset_est, &count_est);
/*
* If we have a known LIMIT, and don't have an unknown OFFSET, we can
* estimate the effects of using a bounded sort.
*/
if (count_est > 0 && offset_est >= 0)
limit_tuples = (double)count_est + (double)offset_est;
}
if (parse->setOperations) {
List* set_sortclauses = NIL;
/*
* If there's a top-level ORDER BY, assume we have to fetch all the
* tuples. This might be too simplistic given all the hackery below
* to possibly avoid the sort; but the odds of accurate estimates here
* are pretty low anyway.
*/
if (parse->sortClause)
tuple_fraction = 0.0;
/*
* Construct the plan for set operations. The result will not need
* any work except perhaps a top-level sort and/or LIMIT. Note that
* any special work for recursive unions is the responsibility of
* plan_set_operations.
*/
result_plan = plan_set_operations(root, tuple_fraction, &set_sortclauses);
/*
* Calculate pathkeys representing the sort order (if any) of the set
* operation's result. We have to do this before overwriting the sort
* key information...
*/
current_pathkeys = make_pathkeys_for_sortclauses(root, set_sortclauses, result_plan->targetlist, true);
/*
* We should not need to call preprocess_targetlist, since we must be
* in a SELECT query node. Instead, use the targetlist returned by
* plan_set_operations (since this tells whether it returned any
* resjunk columns!), and transfer any sort key information from the
* original tlist.
*/
AssertEreport(
parse->commandType == CMD_SELECT, MOD_OPT, "unexpected command type when performing grouping planner.");
tlist = postprocess_setop_tlist((List*)copyObject(result_plan->targetlist), tlist);
/*
* Can't handle FOR UPDATE/SHARE here (parser should have checked
* already, but let's make sure).
*/
if (parse->rowMarks)
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT"))));
/*
* Calculate pathkeys that represent result ordering requirements
*/
AssertEreport(parse->distinctClause == NIL,
MOD_OPT,
"The distinct clause is not allowed when calculating pathkeys for sortclauses.");
root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, true);
} else {
/* No set operations, do regular planning */
List* sub_tlist = NIL;
double sub_limit_tuples;
AttrNumber* groupColIdx = NULL;
bool need_tlist_eval = true;
Path* cheapest_path = NULL;
Path* sorted_path = NULL;
Path* best_path = NULL;
double numGroups[2] = {1, 1};
long localNumGroup = 1;
AggClauseCosts agg_costs;
int numGroupCols;
double path_rows;
int path_width;
bool use_hashed_grouping = false;
WindowLists* wflists = NULL;
uint32 maxref = 0;
int* tleref_to_colnum_map = NULL;
List* rollup_lists = NIL;
List* rollup_groupclauses = NIL;
bool needSecondLevelAgg = true; /* For olap function*/
List* superset_key = root->dis_keys.superset_keys;
Size hash_entry_size = 0;
char PathContextName[NAMEDATALEN] = {0};
MemoryContext PathGenerateContext = NULL;
/* Apply memory context for generate path in optimizer. */
rc = snprintf_s(PathContextName, NAMEDATALEN, NAMEDATALEN - 1, "PathGenerateContext_%d", root->query_level);
securec_check_ss(rc, "\0", "\0");
PathGenerateContext = AllocSetContextCreate(CurrentMemoryContext,
PathContextName,
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcontext = MemoryContextSwitchTo(PathGenerateContext);
errno_t errorno = memset_s(&agg_costs, sizeof(AggClauseCosts), 0, sizeof(AggClauseCosts));
securec_check(errorno, "\0", "\0");
/* A recursive query should always have setOperations */
AssertEreport(!root->hasRecursion, MOD_OPT, "A recursive query is not allowed when doing regular planning.");
/* Preprocess GROUP BY clause, if any */
/* Preprocess Grouping set, if any */
if (parse->groupingSets)
parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
if (parse->groupClause) {
ListCell* lc = NULL;
foreach (lc, parse->groupClause) {
SortGroupClause* gc = (SortGroupClause*)lfirst(lc);
if (gc->tleSortGroupRef > maxref)
maxref = gc->tleSortGroupRef;
}
}
tleref_to_colnum_map = (int*)palloc((maxref + 1) * sizeof(int));
if (parse->groupingSets) {
ListCell* lc = NULL;
ListCell* lc2 = NULL;
ListCell* lc_set = NULL;
List* sets = extract_rollup_sets(parse->groupingSets);
bool isfirst = true;
/* Keep all groupby columns in sets, each cell of sets is a rollup, the cell include many list */
foreach (lc_set, sets) {
List* current_sets =
reorder_grouping_sets((List*)lfirst(lc_set), (list_length(sets) == 1 ? parse->sortClause : NIL));
List* groupclause = preprocess_groupclause(root, (List*)linitial(current_sets));
if (isfirst) {
collectiveGroupExpr = get_group_expr((List*)llast(current_sets), tlist);
} else if (collectiveGroupExpr != NIL) {
/* Last group idxs intersection */
collectiveGroupExpr =
list_intersection(collectiveGroupExpr, get_group_expr((List*)llast(current_sets), tlist));
}
isfirst = false;
int ref = 0;
/*
* Now that we've pinned down an order for the groupClause for
* this list of grouping sets, we need to remap the entries in
* the grouping sets from sortgrouprefs to plain indices
* (0-based) into the groupClause for this collection of
* grouping sets.
*/
foreach (lc, groupclause) {
SortGroupClause* gc = (SortGroupClause*)lfirst(lc);
tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
}
foreach (lc, current_sets) {
foreach (lc2, (List*)lfirst(lc)) {
lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
}
}
rollup_lists = lcons(current_sets, rollup_lists);
rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
}
} else {
/* Preprocess GROUP BY clause, if any */
if (parse->groupClause)
parse->groupClause = preprocess_groupclause(root, NIL);
rollup_groupclauses = list_make1(parse->groupClause);
}
numGroupCols = list_length(parse->groupClause);
/* Preprocess targetlist */
tlist = preprocess_targetlist(root, tlist);
if (parse->upsertClause) {
UpsertExpr* upsertClause = parse->upsertClause;
upsertClause->updateTlist =
preprocess_upsert_targetlist(upsertClause->updateTlist, parse->resultRelation, parse->rtable);
}
/*
* Locate any window functions in the tlist. (We don't need to look
* anywhere else, since expressions used in ORDER BY will be in there
* too.) Note that they could all have been eliminated by constant
* folding, in which case we don't need to do any more work.
*/
if (parse->hasWindowFuncs) {
wflists = make_windows_lists(list_length(parse->windowClause));
find_window_functions((Node*)tlist, wflists);
if (wflists->numWindowFuncs > 0)
select_active_windows(root, wflists);
else
parse->hasWindowFuncs = false;
}
/*
* Check this query if is correlation subquery, if is we will
* set correlated flag from correlative root to current root.
*/
check_plan_correlation(root, (Node*)parse);
/*
* Generate appropriate target list for subplan; may be different from
* tlist if grouping or aggregation is needed.
*/
sub_tlist = make_subplanTargetList(root, tlist, &groupColIdx, &need_tlist_eval);
/* Set matching and superset key for planner info of current query level */
if (IS_STREAM_PLAN) {
set_root_matching_key(root, tlist);
build_grouping_itst_keys(root, wflists ? wflists->activeWindows : NULL);
}
/*
* Do aggregate preprocessing, if the query has any aggs.
*
* Note: think not that we can turn off hasAggs if we find no aggs. It
* is possible for constant-expression simplification to remove all
* explicit references to aggs, but we still have to follow the
* aggregate semantics (eg, producing only one output row).
*/
if (parse->hasAggs) {
/*
* Collect statistics about aggregates for estimating costs. Note:
* we do not attempt to detect duplicate aggregates here; a
* somewhat-overestimated cost is okay for our present purposes.
*/
count_agg_clauses(root, (Node*)tlist, &agg_costs);
count_agg_clauses(root, parse->havingQual, &agg_costs);
/*
* Preprocess MIN/MAX aggregates, if any. Note: be careful about
* adding logic between here and the optimize_minmax_aggregates
* call. Anything that is needed in MIN/MAX-optimizable cases
* will have to be duplicated in planagg.c.
*/
/* Set u_sess->opt_cxt.query_dop to forbidden the parallel of subplan. */
int dop_tmp = u_sess->opt_cxt.query_dop;
u_sess->opt_cxt.query_dop = 1;
preprocess_minmax_aggregates(root, tlist);
/* Reset u_sess->opt_cxt.query_dop. */
u_sess->opt_cxt.query_dop = dop_tmp;
}
/*
* Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize
* them after EquivalenceClasses have been formed. The sortClause is
* certainly sort-able, but GROUP BY and DISTINCT might not be, in
* which case we just leave their pathkeys empty.
*/
/* To groupingSet, we need build it's groupPathKey according to it's lower levels sort clause.*/
if (parse->groupingSets) {
List* groupcls = (List*)llast(rollup_groupclauses);
if (groupcls && grouping_is_sortable(groupcls)) {
root->group_pathkeys = make_pathkeys_for_sortclauses(root, groupcls, tlist, false);
} else {
root->group_pathkeys = NIL;
}
} else if (parse->groupClause && grouping_is_sortable(parse->groupClause)) {
root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, tlist, false);
} else {
root->group_pathkeys = NIL;
}
/* We consider only the first (bottom) window in pathkeys logic */
if (wflists != NULL && wflists->activeWindows != NIL) {
WindowClause* wc = NULL;
wc = (WindowClause*)linitial(wflists->activeWindows);
root->window_pathkeys = make_pathkeys_for_window(root, wc, tlist, false);
} else {
root->window_pathkeys = NIL;
}
if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) {
root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, false);
} else {
root->distinct_pathkeys = NIL;
}
root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, false);
/*
* Figure out whether we want a sorted result from query_planner.
*
* If we have a sortable GROUP BY clause, then we want a result sorted
* properly for grouping. Otherwise, if we have window functions to
* evaluate, we try to sort for the first window. Otherwise, if
* there's a sortable DISTINCT clause that's more rigorous than the
* ORDER BY clause, we try to produce output that's sufficiently well
* sorted for the DISTINCT. Otherwise, if there is an ORDER BY
* clause, we want to sort by the ORDER BY clause.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
* superset of GROUP BY, it would be tempting to request sort by ORDER
* BY --- but that might just leave us failing to exploit an available
* sort order at all. Needs more thought. The choice for DISTINCT
* versus ORDER BY is much easier, since we know that the parser
* ensured that one is a superset of the other.
*/
if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
else if (root->window_pathkeys)
root->query_pathkeys = root->window_pathkeys;
else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys))
root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
else
root->query_pathkeys = NIL;
/*
* Figure out whether there's a hard limit on the number of rows that
* query_planner's result subplan needs to return. Even if we know a
* hard limit overall, it doesn't apply if the query has any
* grouping/aggregation operations.
*/
if (parse->groupClause || parse->groupingSets || parse->distinctClause || parse->hasAggs ||
parse->hasWindowFuncs || root->hasHavingQual)
sub_limit_tuples = -1.0;
else
sub_limit_tuples = limit_tuples;
/*
* Generate the best unsorted and presorted paths for this Query (but
* note there may not be any presorted path). query_planner will also
* estimate the number of groups in the query, and canonicalize all
* the pathkeys.
*/
query_planner(root,
sub_tlist,
tuple_fraction,
sub_limit_tuples,
&cheapest_path,
&sorted_path,
dNumGroups,
rollup_groupclauses,
rollup_lists);
/* restore superset keys */
root->dis_keys.superset_keys = superset_key;
/*
* Extract rowcount and width estimates for possible use in grouping
* decisions. Beware here of the possibility that
* cheapest_path->parent is NULL (ie, there is no FROM clause). Also,
* if the final rel has been proven dummy, its rows estimate will be
* zero; clamp it to one to avoid zero-divide in subsequent
* calculations.
*/
if (cheapest_path->parent) {
path_rows = clamp_row_est(PATH_LOCAL_ROWS(cheapest_path));
path_width = cheapest_path->parent->width;
} else {
path_rows = 1; /* assume non-set result */
path_width = 100; /* arbitrary */
}
/* If grouping sets are present, we can currently do only sorted
* grouping.
*/
if (parse->groupingSets) {
use_hashed_grouping = false;
/* We need to set numGroups, HashAgg is necessary above sortAgg
* in stream plan.
*/
numGroups[0] = dNumGroups[0];
numGroups[1] = dNumGroups[1];
localNumGroup = (long)Min(dNumGroups[0], (double)LONG_MAX);
} else if (parse->groupClause ||
(IS_STREAM_PLAN && list_length(agg_costs.exprAggs) == 1 && !agg_costs.hasDnAggs)) {
/*
* If grouping, decide whether to use sorted or hashed grouping.
*/
use_hashed_grouping = choose_hashed_grouping(root,
tuple_fraction,
limit_tuples,
path_width,
cheapest_path,
sorted_path,
dNumGroups,
&agg_costs,
&hash_entry_size);
/* Also convert # groups to long int --- but 'ware overflow! */
numGroups[0] = dNumGroups[0];
numGroups[1] = dNumGroups[1];
localNumGroup = (long)Min(dNumGroups[0], (double)LONG_MAX);
} else if (parse->distinctClause && sorted_path && !root->hasHavingQual && !parse->hasAggs &&
(wflists == NULL || !wflists->activeWindows)) {
Size hashentrysize;
/*
* Don't do it if it doesn't look like the hashtable will fit into
* work_mem.
*/
if (root->glob->vectorized)
hashentrysize = get_path_actual_total_width(cheapest_path, root->glob->vectorized, OP_HASHAGG);
else
hashentrysize = get_hash_entry_size(path_width);
/*
* We'll reach the DISTINCT stage without any intermediate
* processing, so figure out whether we will want to hash or not
* so we can choose whether to use cheapest or sorted path.
*/
use_hashed_distinct = choose_hashed_distinct(root,
tuple_fraction,
limit_tuples,
path_rows,
path_width,
cheapest_path->startup_cost,
cheapest_path->total_cost,
ng_get_dest_distribution(cheapest_path),
sorted_path->startup_cost,
sorted_path->total_cost,
ng_get_dest_distribution(sorted_path),
sorted_path->pathkeys,
dNumGroups[0],
hashentrysize);
tested_hashed_distinct = true;
}
/*
* Select the best path. If we are doing hashed grouping, we will
* always read all the input tuples, so use the cheapest-total path.
* Otherwise, trust query_planner's decision about which to use.
*/
best_path = choose_best_path((use_hashed_grouping || use_hashed_distinct || sorted_path == NULL),
root, cheapest_path, sorted_path);
(void)MemoryContextSwitchTo(PlanGenerateContext);
/* record the param */
root->param_upper = PATH_REQ_UPPER(cheapest_path);
/*
* Check to see if it's possible to optimize MIN/MAX aggregates. If
* so, we will forget all the work we did so far to choose a "regular"
* path ... but we had to do it anyway to be able to tell which way is
* cheaper.
*/
result_plan = optimize_minmax_aggregates(root, tlist, &agg_costs, best_path);
if (result_plan != NULL) {
/*
* optimize_minmax_aggregates generated the full plan, with the
* right tlist, and it has no sort order.
*/
current_pathkeys = NIL;
} else {
/*
* Normal case --- create a plan according to query_planner's
* results.
*/
bool need_sort_for_grouping = false;
result_plan = create_plan(root, best_path);
rel_info = best_path->parent;
/*
* For dummy plan, we should return it quickly. Meanwhile, we should
* eliminate agg node, or an error will thrown out later
*
* groupClause == NULL can lead to return 1 rows result, e.g. count(*)
* parse->groupingSets != NULL also can lead to 1 rows result, e.g. include group by ().
* So these situation can not enter this branch.
*
*/
if (is_dummy_plan(result_plan) && parse->groupingSets == NIL && parse->groupClause != NIL) {
if (parse->hasAggs || parse->hasWindowFuncs) {
ListCell* lc = NULL;
foreach (lc, tlist) {
TargetEntry* tle = (TargetEntry*)lfirst(lc);
List* exprList = pull_var_clause(
(Node*)tle->expr, PVC_INCLUDE_AGGREGATES_OR_WINAGGS, PVC_RECURSE_PLACEHOLDERS);
ListCell* lc2 = NULL;
Node* node = NULL;
foreach (lc2, exprList) {
node = (Node*)lfirst(lc2);
if (IsA(node, Aggref) || IsA(node, GroupingFunc) || IsA(node, WindowFunc))
break;
}
/*
* For aggref, grouping or windows expr, we need replace them by NULL, else error will
* happen, because AggRef not in agg node.
*/
if (lc2 != NULL) {
tle->expr = (Expr*)makeNullConst(exprType(node), exprTypmod(node), exprCollation(node));
}
list_free_ext(exprList);
}
}
result_plan->targetlist = tlist;
return result_plan;
}
if (use_hashed_grouping && list_length(agg_costs.exprAggs) == 1 &&
(!is_execute_on_datanodes(result_plan) || is_replicated_plan(result_plan))) {
if (!grouping_is_sortable(parse->groupClause)) {
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support "
"sorting."))));
}
use_hashed_grouping = false;
if (sorted_path != NULL && sorted_path != cheapest_path) {
best_path = sorted_path;
result_plan = create_plan(root, best_path);
}
}
current_pathkeys = best_path->pathkeys;
/* Detect if we'll need an explicit sort for grouping */
if (parse->groupClause && !use_hashed_grouping &&
!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) {
need_sort_for_grouping = true;
/*
* Always override create_plan's tlist, so that we don't sort
* useless data from a "physical" tlist.
*/
need_tlist_eval = true;
}
/*
* create_plan returns a plan with just a "flat" tlist of required
* Vars. Usually we need to insert the sub_tlist as the tlist of
* the top plan node. However, we can skip that if we determined
* that whatever create_plan chose to return will be good enough.
*/
if (need_tlist_eval) {
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
*/
if (!is_projection_capable_plan(result_plan) ||
(is_vector_scan(result_plan) && vector_engine_unsupport_expression_walker((Node*)sub_tlist))) {
result_plan = (Plan*)make_result(root, sub_tlist, NULL, result_plan);
} else {
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
*/
result_plan->targetlist = sub_tlist;
if (IsA(result_plan, PartIterator)) {
/*
* If is a PartIterator + Scan, push the PartIterator's
* tlist to Scan.
*/
result_plan->lefttree->targetlist = sub_tlist;
}
#ifdef PGXC
/*
* If the Join tree is completely shippable, adjust the
* target list of the query according to the new targetlist
* set above. For now do this only for SELECT statements.
*/
if (IsA(result_plan, RemoteQuery) && parse->commandType == CMD_SELECT) {
pgxc_rqplan_adjust_tlist(
root, (RemoteQuery*)result_plan, ((RemoteQuery*)result_plan)->is_simple ? false : true);
if (((RemoteQuery*)result_plan)->is_simple)
AssertEreport(((RemoteQuery*)result_plan)->sql_statement == NULL,
MOD_OPT,
"invalid sql statement of result plan when adjusting the targetlist of remote query.");
}
#endif /* PGXC */
}
/*
* Also, account for the cost of evaluation of the sub_tlist.
* See comments for add_tlist_costs_to_plan() for more info.
*/
add_tlist_costs_to_plan(root, result_plan, sub_tlist);
} else {
/*
* Since we're using create_plan's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
*
* We don't want any excess columns for hashagg, since we support hashagg write-out-to-disk now
*/
if (use_hashed_grouping)
disuse_physical_tlist(result_plan, best_path);
locate_grouping_columns(root, tlist, result_plan->targetlist, groupColIdx);
}
/* shuffle to another node group in FORCE mode (CNG_MODE_FORCE) */
if (IS_STREAM_PLAN && !parse->hasForUpdate &&
(parse->hasAggs || parse->groupClause != NIL || parse->groupingSets != NIL ||
parse->distinctClause != NIL || parse->sortClause != NIL ||
(wflists != NULL && wflists->activeWindows))) {
Plan* old_result_plan = result_plan;
List* groupcls = parse->groupClause;
Path* subpath = NULL;
bool can_shuffle = true;
/* deal with window agg if no group clause */
if (groupcls == NIL && wflists != NULL && wflists->activeWindows) {
WindowClause* wc1 = (WindowClause*)linitial(wflists->activeWindows);
groupcls = wc1->partitionClause;
/* need to reduce targetlist here if no group clause */
subpath = best_path;
/* not shuffle if partitionClause contains aggregates */
can_shuffle = check_windowagg_can_shuffle(wc1->partitionClause, tlist);
}
if (can_shuffle)
result_plan = ng_agg_force_shuffle(root, groupcls, result_plan, tlist, subpath);
/*
* 1. the stream will make data unsorted, mark it.
* 2. The groupClause cannot be NIL when add sort for group which will
* generate sort key from group key.
*/
if (old_result_plan != result_plan) {
current_pathkeys = NIL;
if (parse->groupClause != NIL) {
need_sort_for_grouping = true;
}
}
}
/*
* groupColIdx is now cast in stone, so record a mapping from
* tleSortGroupRef to column index. setrefs.c needs this to
* finalize GROUPING() operations.
*/
if (parse->groupingSets) {
AttrNumber* grouping_map = (AttrNumber*)palloc0(sizeof(AttrNumber) * (maxref + 1));
ListCell* lc = NULL;
int i = 0;
/* All take part in group columns */
foreach (lc, parse->groupClause) {
SortGroupClause* gc = (SortGroupClause*)lfirst(lc);
grouping_map[gc->tleSortGroupRef] = groupColIdx[i++];
}
root->grouping_map = grouping_map;
result_plan = build_groupingsets_plan(root,
parse,
&tlist,
need_sort_for_grouping,
rollup_groupclauses,
rollup_lists,
&groupColIdx,
&agg_costs,
localNumGroup,
result_plan,
wflists,
&needSecondLevelAgg,
collectiveGroupExpr);
/* Delete eq class expr after grouping */
delete_eq_member(root, tlist, collectiveGroupExpr);
numGroupCols = list_length(parse->groupClause);
/*
* these are destroyed by build_grouping_chain, so make sure
* we don't try and touch them again
*/
rollup_groupclauses = NIL;
rollup_lists = NIL;
if (grouping_is_hashable(parse->groupClause)) {
/* for advantage, hashagg is my first choice */
use_hashed_grouping = true;
} else if (grouping_is_sortable(parse->groupClause)) {
/* or do sortagg */
use_hashed_grouping = false;
} else {
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, "
"while others only support sorting"))));
}
if (IS_STREAM_PLAN && (is_hashed_plan(result_plan) || is_rangelist_plan(result_plan))) {
if (expression_returns_set((Node*)tlist)) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"set-valued function + groupingsets");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
if (check_subplan_in_qual(tlist, result_plan->qual)) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"var in quals doesn't exist in targetlist");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
}
}
if (IS_STREAM_PLAN) {
if (is_execute_on_coordinator(result_plan) || is_execute_on_allnodes(result_plan) ||
is_replicated_plan(result_plan)) {
needs_stream = false;
} else {
needs_stream = needs_agg_stream(root, tlist, result_plan->distributed_keys, &result_plan->exec_nodes->distribution);
}
}
/*
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
bool contain_sets_expression =
expression_returns_set((Node*)tlist) || expression_returns_set((Node*)parse->havingQual);
bool next_is_second_level_group = false;
/* Don't need second level agg when distribute key is in group clause of groupingsets. */
if (!needSecondLevelAgg) {
/* need do nothing*/
} else if (use_hashed_grouping) {
/* Hashed aggregate plan --- no sort needed */
if (IS_STREAM_PLAN &&
is_execute_on_datanodes(result_plan) &&
!is_replicated_plan(result_plan)) {
if (agg_costs.hasDnAggs || list_length(agg_costs.exprAggs) == 0) {
result_plan = generate_hashagg_plan(root,
result_plan,
tlist,
&agg_costs,
numGroupCols,
numGroups,
wflists,
groupColIdx,
extract_grouping_ops(parse->groupClause),
&needs_stream,
hash_entry_size,
AGG_LEVEL_1_INTENT,
rel_info);
} else {
Node *node = (Node *) linitial(agg_costs.exprAggs);
AssertEreport(list_length(agg_costs.exprAggs) == 1,
MOD_OPT,
"invalid length of distinct expression when generating plan for hashed aggregate.");
result_plan = get_count_distinct_partial_plan(root,
result_plan,
&tlist,
node,
agg_costs,
numGroups,
wflists,
groupColIdx,
&needs_stream,
hash_entry_size,
rel_info);
}
next_is_second_level_group = true;
} else if (!parse->groupingSets) {
/*
* To Ap function, need not do hashagg if it is not stream plan,
* in this case, all work always is finished in sort agg.
*/
result_plan = (Plan *) make_agg(root,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
&agg_costs,
numGroupCols,
groupColIdx,
extract_grouping_ops(parse->groupClause),
localNumGroup,
result_plan,
wflists,
needs_stream,
true,
NIL,
false,
hash_entry_size);
next_is_second_level_group = true;
}
if (IS_STREAM_PLAN &&
needs_stream &&
is_execute_on_datanodes(result_plan) &&
!is_replicated_plan(result_plan)) {
if (next_is_second_level_group && contain_sets_expression) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"set-valued expression in qual/targetlist + two-level Groupagg\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
if (wflists != NULL && wflists->activeWindows) {
result_plan->targetlist = make_windowInputTargetList(root, result_plan->targetlist,
wflists->activeWindows);
}
result_plan = (Plan *)mark_agg_stream(root, tlist, result_plan, parse->groupClause,
AGG_LEVEL_1_INTENT, &has_second_agg_sort);
if (!has_second_agg_sort) {
current_pathkeys = NIL;
} else {
current_pathkeys = root->group_pathkeys;
}
} else {
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
} else if (parse->hasAggs) {
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;
bool count_distinct_optimization =
IS_STREAM_PLAN && is_execute_on_datanodes(result_plan) && !is_replicated_plan(result_plan);
Node* distinct_node = NULL;
List* distinct_node_list = NIL;
Plan* partial_plan = NULL;
bool two_level_sort = false;
bool distinct_needs_stream = false;
bool distinct_needs_local_stream = false;
List* orig_list = NIL;
List* replace_list = NIL;
double multiple = 0.0;
List* distributed_key = NIL;
/* check whether two_level_sort is needed, only in two case:
* 1. there's no group by clause, AGG_PLAIN used.
* 2. there's group by clause, and a redistribution on group by clause is needed.
* If so, we should remove aggdistinct and then restore it back */
if (count_distinct_optimization && list_length(agg_costs.exprAggs) == 1 &&
(!(parse->groupClause && !needs_stream)) && !agg_costs.hasDnAggs && !agg_costs.hasdctDnAggs &&
agg_costs.numOrderedAggs == 0) {
ListCell* lc = NULL;
List* var_list = NIL;
List* duplicate_list = NIL;
distributed_key = get_distributekey_from_tlist(
root, tlist, parse->groupClause, result_plan->plan_rows, &multiple);
var_list = make_agg_var_list(root, tlist, &duplicate_list);
distinct_node = (Node*)linitial(agg_costs.exprAggs);
distinct_node_list = list_make1(distinct_node);
two_level_sort = needs_two_level_groupagg(root,
result_plan,
distinct_node,
distributed_key,
&distinct_needs_stream,
&distinct_needs_local_stream);
if (two_level_sort) {
foreach (lc, var_list) {
Aggref* node = (Aggref*)lfirst(lc);
if (IsA(node, Aggref) && node->aggdistinct != NIL) {
List* aggdistinct = node->aggdistinct;
Aggref* n = NULL;
node->aggdistinct = NIL;
n = (Aggref*)copyObject(node);
if (need_adjust_agg_inner_func_type(n))
n->aggtype = n->aggtrantype;
orig_list = lappend(orig_list, copyObject(n));
n->aggdistinct = aggdistinct;
replace_list = lappend(replace_list, n);
}
}
foreach (lc, duplicate_list) {
Aggref* node = (Aggref*)lfirst(lc);
if (IsA(node, Aggref) && node->aggdistinct != NIL) {
pfree_ext(node->aggdistinct);
node->aggdistinct = NIL;
}
}
}
list_free_ext(var_list);
list_free_ext(duplicate_list);
}
if (parse->groupClause) {
/* if there's count(distinct), we now only support redistribute by group clause */
if (count_distinct_optimization && needs_stream &&
(agg_costs.exprAggs != NIL || agg_costs.hasDnAggs)) {
distributed_key = get_distributekey_from_tlist(
root, tlist, parse->groupClause, result_plan->plan_rows, &multiple);
/* we can apply local sortagg if count(distinct) expr is distribute column */
if (two_level_sort) {
if (distinct_needs_local_stream) {
result_plan =
create_local_redistribute(root, result_plan, list_make1(distinct_node), 0);
}
if (distinct_needs_stream) {
result_plan =
make_redistribute_for_agg(root, result_plan, list_make1(distinct_node), 0);
need_sort_for_grouping = true;
}
if (need_sort_for_grouping)
result_plan =
(Plan*)make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan);
result_plan = (Plan*)make_agg(root,
tlist,
(List*)parse->havingQual,
AGG_SORTED,
&agg_costs,
numGroupCols,
groupColIdx,
extract_grouping_ops(parse->groupClause),
localNumGroup,
result_plan,
wflists,
needs_stream,
true,
NIL,
0,
true);
if (wflists != NULL && wflists->activeWindows) {
/* If have windows we need alter agg's targetlist. */
result_plan->targetlist =
make_windowInputTargetList(root, result_plan->targetlist, wflists->activeWindows);
}
partial_plan = result_plan;
next_is_second_level_group = true;
}
if (distributed_key != NIL) {
result_plan = make_redistribute_for_agg(root, result_plan, distributed_key, multiple);
needs_stream = false;
} else {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Count(Distinct) + Group by\" on redistribution unsupported data type");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
need_sort_for_grouping = true;
}
/* Add local redistribute for a local group cols. */
if (IS_STREAM_PLAN && !needs_stream && result_plan->dop > 1)
result_plan = create_local_redistribute(root, result_plan, result_plan->distributed_keys, 0);
if (need_sort_for_grouping && partial_plan == NULL &&
(IS_STREAM_PLAN || parse->groupingSets == NULL)) {
result_plan =
(Plan*)make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan);
current_pathkeys = root->group_pathkeys;
}
aggstrategy = AGG_SORTED;
} else {
if (IS_STREAM_PLAN && count_distinct_optimization) {
if (list_length(agg_costs.exprAggs) > 1 ||
agg_costs.hasDnAggs ||
agg_costs.numOrderedAggs > 0) {
/*
* Add gather here for listagg&array_agg is confused with sort in result_plan,
* as gather may lead to disordered in MPP scenario, but user can add order by
* in agg function which can avoid this changed and is recommended in SQL standard.
*/
if ((u_sess->attr.attr_sql.rewrite_rule & PARTIAL_PUSH) && permit_from_rewrite_hint(root, PARTIAL_PUSH)) {
needs_stream = false;
result_plan = make_simple_RemoteQuery(result_plan, root, false);
} else {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"multi count(distinct) or agg which need order can not ship.");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
} else if (agg_costs.exprAggs != NIL) {
if (distinct_needs_local_stream)
result_plan =
create_local_redistribute(root, result_plan, list_make1(distinct_node), 0);
if (distinct_needs_stream) {
result_plan =
make_redistribute_for_agg(root, result_plan, list_make1(distinct_node), 0);
} else if (!two_level_sort) {
/* we don't support non-distributable count(distinct) expr to push down */
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Count(Distinct)\" on redistribution unsupported data type");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
} else if (result_plan->dop > 1) {
result_plan =
create_local_redistribute(root, result_plan, result_plan->distributed_keys, 0);
}
}
}
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
if (IS_STREAM_PLAN && needs_stream && agg_costs.hasPolymorphicType) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Aggregate on polymorphic argument type \"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
/* If two level agg is needs and we have non-exist var in subplan of qual, push down is unsupported */
if (IS_STREAM_PLAN && (partial_plan != NULL || needs_stream) &&
check_subplan_in_qual(tlist, (List*)parse->havingQual)) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Subplan in having qual + two-level Groupagg\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
/* If two level agg is needs and we have non-exist var in subplan of qual, push down is unsupported */
if (IS_STREAM_PLAN && next_is_second_level_group && contain_sets_expression) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"set-valued expression in qual/targetlist + two-level Groupagg\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
if (partial_plan != NULL) {
result_plan = mark_top_agg(root, tlist, partial_plan, result_plan, AGG_LEVEL_1_INTENT);
} else if (parse->groupingSets == NIL || IS_STREAM) {
result_plan = (Plan*)make_agg(root,
tlist,
(List*)parse->havingQual,
aggstrategy,
&agg_costs,
numGroupCols,
groupColIdx,
extract_grouping_ops(parse->groupClause),
localNumGroup,
result_plan,
wflists,
needs_stream,
true,
NIL,
0,
true);
}
next_is_second_level_group = true;
/* Save the agg plan to restore aggdistinct node */
if (distinct_node != NULL) {
list_free_ext(distinct_node_list);
if (needs_stream)
partial_plan = result_plan;
}
#ifdef STREAMPLAN
if (IS_STREAM_PLAN && needs_stream && is_execute_on_datanodes(result_plan) &&
!is_replicated_plan(result_plan)) {
if (next_is_second_level_group && contain_sets_expression) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"set-valued expression in qual/targetlist + two-level Groupagg\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
if (wflists != NULL && wflists->activeWindows)
result_plan->targetlist =
make_windowInputTargetList(root, result_plan->targetlist, wflists->activeWindows);
result_plan = (Plan*)mark_agg_stream(
root, tlist, result_plan, parse->groupClause, AGG_LEVEL_1_INTENT, &has_second_agg_sort);
if (!has_second_agg_sort)
current_pathkeys = NIL;
else
current_pathkeys = root->group_pathkeys;
}
#endif
if (partial_plan != NULL) {
partial_plan->targetlist = (List*)replace_node_clause((Node*)partial_plan->targetlist,
(Node*)orig_list,
(Node*)replace_list,
RNC_COPY_NON_LEAF_NODES);
list_free_ext(replace_list);
list_free_deep(orig_list);
}
} else if (parse->groupClause) {
/*
* GROUP BY without aggregation, so insert a group node (plus
* the appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come out
* the way the GROUP node needs it.
*/
if (IS_STREAM || parse->groupingSets == NIL) {
/*
* For SQL that is not shippable, we have done group operator with
* function build_groupingsets_plan, so we skip add group operator
* here, and if query is not shippable, needs_stream must be false,
* and group operatror will not be added
*/
if (!needs_stream && result_plan->dop > 1)
result_plan = create_local_redistribute(root, result_plan, result_plan->distributed_keys, 0);
if (need_sort_for_grouping) {
result_plan =
(Plan*)make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan);
current_pathkeys = root->group_pathkeys;
}
/*
* If need two level sort+group and non-var exists, this group's targetlist
* should be result_plan->targetlist rather than tlist, if not Error(Var can not
* be found) can happend.
*/
result_plan = (Plan*)make_group(root,
needs_stream && need_tlist_eval ? result_plan->targetlist : tlist,
(List*)parse->havingQual,
numGroupCols,
groupColIdx,
extract_grouping_ops(parse->groupClause),
dNumGroups[0],
result_plan);
next_is_second_level_group = true;
}
#ifdef STREAMPLAN
if (needs_stream) {
if (next_is_second_level_group && contain_sets_expression) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"set-valued expression in qual/targetlist + two-level Groupagg\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
if (wflists != NULL && wflists->activeWindows)
result_plan->targetlist =
make_windowInputTargetList(root, result_plan->targetlist, wflists->activeWindows);
result_plan = (Plan*)mark_group_stream(root, tlist, result_plan);
current_pathkeys = root->group_pathkeys;
}
#endif
} else if (root->hasHavingQual || parse->groupingSets) {
int nrows = list_length(parse->groupingSets);
/*
* No aggregates, and no GROUP BY, but we have a HAVING qual
* or grouping sets (which by elimination of cases above must
* consist solely of empty grouping sets, since otherwise
* groupClause will be non-empty).
*
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row for each grouping set depending on
* whether HAVING succeeds. Furthermore, there cannot be any
* variables in either HAVING or the targetlist, so we
* actually do not need the FROM table at all! We can just
* throw away the plan-so-far and generate a Result node. This
* is a sufficiently unusual corner case that it's not worth
* contorting the structure of this routine to avoid having to
* generate the plan in the first place.
*/
result_plan = (Plan*)make_result(root, tlist, parse->havingQual, NULL);
/*
* Doesn't seem worthwhile writing code to cons up a
* generate_series or a values scan to emit multiple rows.
* Instead just clone the result in an Append.
*/
if (nrows > 1) {
List* plans = list_make1(result_plan);
while (--nrows > 0)
plans = lappend(plans, copyObject(result_plan));
result_plan = (Plan*)make_append(plans, tlist);
}
}
#ifdef PGXC
/*
* Grouping will certainly not increase the number of rows
* Coordinator fetches from Datanode, in fact it's expected to
* reduce the number drastically. Hence, try pushing GROUP BY
* clauses and aggregates to the Datanode, thus saving bandwidth.
*/
if (IS_PGXC_COORDINATOR && !IsConnFromCoord() && !IS_STREAM)
result_plan = create_remotegrouping_plan(root, result_plan);
#endif /* PGXC */
} /* end of non-minmax-aggregate case */
/*
* Since each window function could require a different sort order, we
* stack up a WindowAgg node for each window, with sort steps between
* them as needed.
*/
if (wflists != NULL && wflists->activeWindows) {
List* window_tlist = NIL;
ListCell* l = NULL;
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the desired
* tlist. (In some cases this might not really be required, but
* it's not worth trying to avoid it.) Note that on second and
* subsequent passes through the following loop, the top-level
* node will be a WindowAgg which we know can project; so we only
* need to check once.
*/
if (!is_projection_capable_plan(result_plan)) {
result_plan = (Plan*)make_result(root, NIL, NULL, result_plan);
}
/*
* The "base" targetlist for all steps of the windowing process is
* a flat tlist of all Vars and Aggs needed in the result. (In
* some cases we wouldn't need to propagate all of these all the
* way to the top, since they might only be needed as inputs to
* WindowFuncs. It's probably not worth trying to optimize that
* though.) We also add window partitioning and sorting
* expressions to the base tlist, to ensure they're computed only
* once at the bottom of the stack (that's critical for volatile
* functions). As we climb up the stack, we'll add outputs for
* the WindowFuncs computed at each level.
*/
window_tlist = make_windowInputTargetList(root, tlist, wflists->activeWindows);
/*
* The copyObject steps here are needed to ensure that each plan
* node has a separately modifiable tlist. (XXX wouldn't a
* shallow list copy do for that?)
*/
if (window_tlist != NULL)
result_plan->targetlist = (List*)copyObject(window_tlist);
if (IsA(result_plan, PartIterator)) {
/*
* If is a PartIterator + Scan, push the PartIterator's
* tlist to Scan.
*
* Notes : In cstore schema, when window_tlist is NULL, we should make sure
* the CStoreScan's targetlist not become NULL again. So only when the wondow_tlist
* is not NULL, we do the copy. We can make this, becasuse when the CStoreScan's
* targetlist is NULL, we choose the first column as the targetlist:create_cstore_plan.
*/
if (window_tlist != NULL)
result_plan->lefttree->targetlist = (List*)copyObject(window_tlist);
}
/* Set group_set and again for windows function. */
rebuild_pathkey_for_groupingSet<windows_func_pathkey>(
root, tlist, wflists->activeWindows, collectiveGroupExpr);
foreach (l, wflists->activeWindows) {
WindowClause* wc = (WindowClause*)lfirst(l);
List* window_pathkeys = NIL;
int partNumCols;
AttrNumber* partColIdx = NULL;
Oid* partOperators = NULL;
int ordNumCols;
AttrNumber* ordColIdx = NULL;
Oid* ordOperators = NULL;
window_pathkeys = make_pathkeys_for_window(root, wc, tlist, true);
/*
* This is a bit tricky: we build a sort node even if we don't
* really have to sort. Even when no explicit sort is needed,
* we need to have suitable resjunk items added to the input
* plan's tlist for any partitioning or ordering columns that
* aren't plain Vars. (In theory, make_windowInputTargetList
* should have provided all such columns, but let's not assume
* that here.) Furthermore, this way we can use existing
* infrastructure to identify which input columns are the
* interesting ones.
*/
if (window_pathkeys != NIL) {
Sort* sort_plan = NULL;
/*
* If the window func has 'partitin by',
* then we can parallelize it.
*/
sort_plan =
make_sort_from_pathkeys(root, result_plan, window_pathkeys, -1.0, (wc->partitionClause != NIL));
if (!pathkeys_contained_in(window_pathkeys, current_pathkeys)) {
/* we do indeed need to sort */
result_plan = (Plan*)sort_plan;
current_pathkeys = window_pathkeys;
}
/* In either case, extract the per-column information */
get_column_info_for_window(root,
wc,
tlist,
sort_plan->numCols,
sort_plan->sortColIdx,
&partNumCols,
&partColIdx,
&partOperators,
&ordNumCols,
&ordColIdx,
&ordOperators);
} else {
/* empty window specification, nothing to sort */
partNumCols = 0;
partColIdx = NULL;
partOperators = NULL;
ordNumCols = 0;
ordColIdx = NULL;
ordOperators = NULL;
}
if (lnext(l)) {
/* Add the current WindowFuncs to the running tlist */
window_tlist = add_to_flat_tlist(window_tlist, wflists->windowFuncs[wc->winref]);
} else {
/* Install the original tlist in the topmost WindowAgg */
window_tlist = tlist;
}
/* ... and make the WindowAgg plan node */
result_plan = (Plan*)make_windowagg(root,
(List*)copyObject(window_tlist),
wflists->windowFuncs[wc->winref],
wc->winref,
partNumCols,
partColIdx,
partOperators,
ordNumCols,
ordColIdx,
ordOperators,
wc->frameOptions,
wc->startOffset,
wc->endOffset,
result_plan);
#ifdef STREAMPLAN
if (IS_STREAM_PLAN && is_execute_on_datanodes(result_plan) && !is_replicated_plan(result_plan)) {
result_plan = (Plan*)mark_windowagg_stream(root, result_plan, tlist, wc, current_pathkeys, wflists);
}
#endif
}
}
(void)MemoryContextSwitchTo(oldcontext);
} /* end of if (setOperations) */
oldcontext = MemoryContextSwitchTo(PlanGenerateContext);
/*
* If there is a DISTINCT clause, add the necessary node(s).
*/
bool next_is_second_level_distinct = false; /* flag for DISTINCT agg */
bool contain_sets_expression = expression_returns_set((Node*)tlist);
if (parse->distinctClause) {
double dNumDistinctRows[2];
double numDistinctRows[2];
/*
* If there was grouping or aggregation, use the current number of
* rows as the estimated number of DISTINCT rows (ie, assume the
* result was already mostly unique). If not, use the number of
* distinct-groups calculated by query_planner.
*/
if (parse->groupClause || parse->groupingSets || root->hasHavingQual || parse->hasAggs) {
dNumDistinctRows[0] = PLAN_LOCAL_ROWS(result_plan);
dNumDistinctRows[1] = result_plan->plan_rows;
} else {
dNumDistinctRows[0] = dNumGroups[0];
dNumDistinctRows[1] = dNumGroups[1];
}
/* Also convert to long int --- but 'ware overflow! */
numDistinctRows[0] = dNumDistinctRows[0];
numDistinctRows[1] = dNumDistinctRows[1];
/* Choose implementation method if we didn't already */
if (!tested_hashed_distinct) {
Size hashentrysize;
/*
* Don't do it if it doesn't look like the hashtable will fit into
* work_mem.
*/
if (root->glob->vectorized)
hashentrysize = get_plan_actual_total_width(result_plan, root->glob->vectorized, OP_HASHAGG);
else
hashentrysize = get_hash_entry_size(result_plan->plan_width);
/*
* At this point, either hashed or sorted grouping will have to
* work from result_plan, so we pass that as both "cheapest" and
* "sorted".
*/
use_hashed_distinct = choose_hashed_distinct(root,
tuple_fraction,
limit_tuples,
PLAN_LOCAL_ROWS(result_plan),
result_plan->plan_width,
result_plan->startup_cost,
result_plan->total_cost,
ng_get_dest_distribution(result_plan),
result_plan->startup_cost,
result_plan->total_cost,
ng_get_dest_distribution(result_plan),
current_pathkeys,
dNumDistinctRows[0],
hashentrysize);
}
/* need to judge if we should redistribute according to distinct clause */
if (IS_STREAM_PLAN) {
if (is_execute_on_coordinator(result_plan) || is_execute_on_allnodes(result_plan) ||
is_replicated_plan(result_plan))
needs_stream = false;
else {
List* distinct_expr = get_sortgrouplist_exprs(parse->distinctClause, parse->targetList);
needs_stream = needs_agg_stream(root, distinct_expr, result_plan->distributed_keys);
list_free_ext(distinct_expr);
}
}
if (use_hashed_distinct) {
Size hash_entry_size = MAXALIGN(result_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
/* Hashed aggregate plan --- no sort needed */
if (IS_STREAM_PLAN && is_execute_on_datanodes(result_plan) && !is_replicated_plan(result_plan)) {
result_plan = generate_hashagg_plan(root,
result_plan,
result_plan->targetlist,
NULL,
list_length(parse->distinctClause),
numDistinctRows,
NULL,
NULL,
extract_grouping_ops(parse->distinctClause),
&needs_stream,
hash_entry_size,
DISTINCT_INTENT,
rel_info);
} else {
result_plan = (Plan*)make_agg(root,
result_plan->targetlist,
NIL,
AGG_HASHED,
NULL,
list_length(parse->distinctClause),
extract_grouping_cols(parse->distinctClause, result_plan->targetlist),
extract_grouping_ops(parse->distinctClause),
(long)Min(numDistinctRows[0], (double)LONG_MAX),
result_plan,
NULL,
false,
false,
NIL,
hash_entry_size);
}
next_is_second_level_distinct = true;
if (IS_STREAM_PLAN && needs_stream && is_execute_on_datanodes(result_plan) &&
!is_replicated_plan(result_plan)) {
if (next_is_second_level_distinct && contain_sets_expression) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"set-valued expression in qual/targetlist + two-level distinct\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
result_plan = (Plan*)mark_agg_stream(
root, tlist, result_plan, parse->distinctClause, DISTINCT_INTENT, &has_second_agg_sort);
if (!has_second_agg_sort)
current_pathkeys = NIL;
else
current_pathkeys = root->group_pathkeys;
} else /* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
} else {
/*
* Set group_set and again build pathkeys, data's value can be altered groupingSet after,
* so equal expr can not be deleted from pathkeys. Rebuild pathkey EquivalenceClass's ec_group_set
* is true.
*/
rebuild_pathkey_for_groupingSet<distinct_pathkey>(root, tlist, NULL, collectiveGroupExpr);
/*
* Use a Unique node to implement DISTINCT. Add an explicit sort
* if we couldn't make the path come out the way the Unique node
* needs it. If we do have to sort, always sort by the more
* rigorous of DISTINCT and ORDER BY, to avoid a second sort
* below. However, for regular DISTINCT, don't sort now if we
* don't have to --- sorting afterwards will likely be cheaper,
* and also has the possibility of optimizing via LIMIT. But for
* DISTINCT ON, we *must* force the final sort now, else it won't
* have the desired behavior.
*/
List* needed_pathkeys = NIL;
if (parse->hasDistinctOn && list_length(root->distinct_pathkeys) < list_length(root->sort_pathkeys))
needed_pathkeys = root->sort_pathkeys;
else
needed_pathkeys = root->distinct_pathkeys;
/* we also need to add sort if the sub node is parallized. */
if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys) ||
(result_plan->dop > 1 && needed_pathkeys)) {
if (list_length(root->distinct_pathkeys) >= list_length(root->sort_pathkeys))
current_pathkeys = root->distinct_pathkeys;
else {
current_pathkeys = root->sort_pathkeys;
/* AssertEreport checks that parser didn't mess up... */
AssertEreport(pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys),
MOD_OPT,
"the parser does not mess up when adding sort for pathkeys.");
}
result_plan = (Plan*)make_sort_from_pathkeys(root, result_plan, current_pathkeys, -1.0);
}
result_plan = (Plan*)make_unique(result_plan, parse->distinctClause);
set_plan_rows(
result_plan, get_global_rows(dNumDistinctRows[0], 1.0, ng_get_dest_num_data_nodes(result_plan)));
/* The Unique node won't change sort ordering */
#ifdef STREAMPLAN
if (IS_STREAM_PLAN && needs_stream && is_execute_on_datanodes(result_plan) &&
!is_replicated_plan(result_plan)) {
result_plan = (Plan*)mark_distinct_stream(
root, tlist, result_plan, parse->distinctClause, root->query_level, current_pathkeys);
}
#endif
}
}
/*
* If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we
* intentionally test parse->rowMarks not root->rowMarks here. If there
* are only non-locking rowmarks, they should be handled by the
* ModifyTable node instead.)
*/
if (parse->rowMarks) {
#ifdef ENABLE_MOT
if (!IsMOTEngineUsed()) {
#endif
result_plan = (Plan*)make_lockrows(root, result_plan);
#ifdef ENABLE_MOT
}
#endif
/*
* The result can no longer be assumed sorted, since redistribute add
* for lockrows may cause the data unsorted.
*/
#ifndef PGXC
current_pathkeys = NIL;
#endif
}
/*
* If ORDER BY was given and we were not able to make the plan come out in
* the right order, add an explicit sort step.
*/
if (parse->sortClause) {
/*
* Set group_set and again build pathkeys, data's value can be altered groupingSet after,
* so equal expr can not be deleted from pathkeys. Rebuild pathkey EquivalenceClass's ec_group_set
* is true.
*/
rebuild_pathkey_for_groupingSet<sort_pathkey>(root, tlist, NULL, collectiveGroupExpr);
/* we also need to add sort if the sub node is parallized. */
if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys) ||
(result_plan->dop > 1 && root->sort_pathkeys)) {
result_plan = (Plan*)make_sort_from_pathkeys(root, result_plan, root->sort_pathkeys, limit_tuples);
#ifdef PGXC
#ifdef STREAMPLAN
if (IS_STREAM_PLAN && check_sort_for_upsert(root))
result_plan = make_stream_sort(root, result_plan);
#endif /* STREAMPLAN */
if (IS_PGXC_COORDINATOR && !IS_STREAM && !IsConnFromCoord())
result_plan = (Plan*)create_remotesort_plan(root, result_plan);
#endif /* PGXC */
current_pathkeys = root->sort_pathkeys;
}
}
/*
* Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
*/
if (parse->limitCount || parse->limitOffset) {
#ifdef STREAMPLAN
if (IS_STREAM_PLAN) {
bool needs_sort = !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys);
result_plan = (Plan*)make_stream_limit(root,
result_plan,
parse->limitOffset,
parse->limitCount,
offset_est,
count_est,
limit_tuples,
needs_sort);
if (needs_sort)
current_pathkeys = root->sort_pathkeys;
} else
#endif
result_plan =
(Plan*)make_limit(root, result_plan, parse->limitOffset, parse->limitCount, offset_est, count_est);
#ifdef PGXC
/* See if we can push LIMIT or OFFSET clauses to Datanodes */
if (IS_PGXC_COORDINATOR && !IsConnFromCoord() && !IS_STREAM)
result_plan = (Plan*)create_remotelimit_plan(root, result_plan);
#endif /* PGXC */
}
#ifdef STREAMPLAN
/*
* Add Remote Query for stream plan at the end.
* Don't add gather for non-select statement.
*/
if (IS_STREAM_PLAN && (!IsConnFromCoord()) && root->query_level == 1 && (parse->commandType == CMD_SELECT) &&
is_execute_on_datanodes(result_plan)) {
bool single_node = (result_plan->exec_nodes != NULL && list_length(result_plan->exec_nodes->nodeList) == 1);
result_plan = make_simple_RemoteQuery(result_plan, root, false);
/*
* if result plan is a simple remote query, and we got a sort pathkey in the plan
* we can deduct that we have already add sort in the data node, so we only need
* add a mergesort to remote query if there are multiple dn involved.
*/
if ((IsA(result_plan, RemoteQuery) || IsA(result_plan, Stream))&&
!is_replicated_plan(result_plan->lefttree) && !single_node &&
root->sort_pathkeys != NIL && pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) {
Sort* sortPlan = make_sort_from_pathkeys(root, result_plan, current_pathkeys, limit_tuples);
SimpleSort* streamSort = makeNode(SimpleSort);
streamSort->numCols = sortPlan->numCols;
streamSort->sortColIdx = sortPlan->sortColIdx;
streamSort->sortOperators = sortPlan->sortOperators;
streamSort->nullsFirst = sortPlan->nullsFirst;
streamSort->sortToStore = false;
streamSort->sortCollations = sortPlan->collations;
if (IsA(result_plan, RemoteQuery)) {
((RemoteQuery*)result_plan)->sort = streamSort;
} else if (IsA(result_plan, Stream)) {
((Stream*)result_plan)->sort = streamSort;
}
}
}
/*
* Return the actual output ordering in query_pathkeys for possible use by
* an outer query level.
*/
root->query_pathkeys = current_pathkeys;
#endif
#ifdef ENABLE_MULTIPLE_NODES
if (g_instance.attr.attr_common.enable_tsdb) {
result_plan = tsdb_modifier(root, tlist, result_plan);
} else if (has_ts_func(tlist)) {
ereport(ERROR, (errcode(ERRCODE_INVALID_OPERATION), errmsg("please enable tsdb to use tsdb functions !")));
}
#endif
(void)MemoryContextSwitchTo(oldcontext);
return result_plan;
}
/*
* Given a groupclause for a collection of grouping sets, produce the
* corresponding groupColIdx.
*
* root->grouping_map maps the tleSortGroupRef to the actual column position in
* the input tuple. So we get the ref from the entries in the groupclause and
* look them up there.
*/
static AttrNumber* remap_groupColIdx(PlannerInfo* root, List* groupClause)
{
AttrNumber* grouping_map = root->grouping_map;
AttrNumber* new_grpColIdx = NULL;
ListCell* lc = NULL;
int i;
AssertEreport(grouping_map != NULL, MOD_OPT, "invalid grouping map when generating the corresponding groupColIdx.");
if (list_length(groupClause) > 0) {
new_grpColIdx = (AttrNumber*)palloc0(sizeof(AttrNumber) * list_length(groupClause));
i = 0;
foreach (lc, groupClause) {
SortGroupClause* clause = (SortGroupClause*)lfirst(lc);
new_grpColIdx[i++] = grouping_map[clause->tleSortGroupRef];
}
} else {
new_grpColIdx = NULL;
}
return new_grpColIdx;
}
/* Judge if have distribute keys for groupingsets */
static List* get_group_expr(List* sortrefList, List* tlist)
{
List* group_list = NULL;
ListCell* lc = NULL;
foreach (lc, sortrefList) {
int i = (int)lfirst_int(lc);
TargetEntry* tle = get_sortgroupref_tle(i, tlist);
group_list = lappend(group_list, tle->expr);
}
return group_list;
}
/*
* @Description: set aggref's distinct to NULL
* @in tlist - query's target list.
* @in havingQual - query having clause.
*/
static void set_distinct_to_null(List* tlist, List* havingQual)
{
List* tlist_var_agg = NULL;
List* havin_var_agg = NULL;
List* list_var_agg = NULL;
tlist_var_agg = pull_var_clause((Node*)tlist, PVC_INCLUDE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS);
havin_var_agg = pull_var_clause((Node*)havingQual, PVC_INCLUDE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS);
list_var_agg = list_concat(tlist_var_agg, havin_var_agg);
ListCell* lc = NULL;
/* Set distinct to NULL, in this function, distinct_node only have one. */
foreach (lc, list_var_agg) {
Node* node = (Node*)lfirst(lc);
if (IsA(node, Aggref) && ((Aggref*)node)->aggdistinct) {
Aggref* tmp_agg = (Aggref*)node;
list_free_deep(tmp_agg->aggdistinct);
tmp_agg->aggdistinct = NULL;
}
}
}
/*
* @Description: Build Agg and Sort nodes to implement sorted grouping with one or more
* grouping sets. (A plain GROUP BY or just the presence of aggregates counts
* for this purpose as a single grouping set; the calling code is responsible
* for providing a non-empty rollup_groupclauses list for such cases, though
* rollup_lists may be null.)
*
* The last entry in rollup_groupclauses (which is the one the input is sorted
* on, if at all) is the one used for the returned Agg node. Any additional
* rollups are attached, with corresponding sort info, to subsidiary Agg and
* Sort nodes attached to the side of the real Agg node; these nodes don't
* participate in the plan directly, but they are both a convenient way to
* represent the required data and a convenient way to account for the costs
* of execution.
* @in root - Per-query information for planning/optimization.
* @in parse - Query tree.
* @in tlist - targetlist.
* @in need_sort_for_grouping - If we need a Sort operation on the input.
* @in rollup_groupclauses - is a list of grouping clauses for grouping sets
* @in rollup_lists - is a list of grouping sets
* @in groupColIdx - group column idx
* @in agg_costs - agg costs
* @in numGroups - is the estimated number of groups
* @in result_plan - left plan
* @in wflists - windows fun info.
* @in need_stream - if need stream
* @out - agg plan
*/
static Plan* build_grouping_chain(PlannerInfo* root, Query* parse, List** tlist, bool need_sort_for_grouping,
List* rollup_groupclauses, List* rollup_lists, AttrNumber* groupColIdx, AggClauseCosts* agg_costs, long numGroups,
Plan* result_plan, WindowLists* wflists, bool need_stream)
{
AttrNumber* top_grpColIdx = groupColIdx;
List* chain = NIL;
List* newTlist = *tlist;
/*
* Prepare the grpColIdx for the real Agg node first, because we may need
* it for sorting
*/
if (parse->groupingSets) {
top_grpColIdx = remap_groupColIdx(root, (List*)llast(rollup_groupclauses));
}
/* Need hashagg above sort+group */
if (need_stream) {
/* Append group by expr to targelists of sort agg, because redistribute node and hashagg node will used it */
newTlist = add_groupingIdExpr_to_tlist(*tlist);
} else if (result_plan->dop > 1) {
result_plan = create_local_redistribute(root, result_plan, result_plan->distributed_keys, 0);
}
/* If we need a Sort operation on the input, generate that. */
if (need_sort_for_grouping) {
result_plan =
(Plan*)make_sort_from_groupcols(root, (List*)llast(rollup_groupclauses), top_grpColIdx, result_plan);
}
/*
* Generate the side nodes that describe the other sort and group
* operations besides the top one.
*/
while (list_length(rollup_groupclauses) > 1) {
List* groupClause = (List*)linitial(rollup_groupclauses);
List* gsets = (List*)linitial(rollup_lists);
AttrNumber* new_grpColIdx = NULL;
Plan* sort_plan = NULL;
Plan* agg_plan = NULL;
AssertEreport(groupClause != NIL,
MOD_OPT,
"invalid group clause when generating the side nodes that describe the other sort"
"and group operations besides the top one.");
AssertEreport(gsets != NIL,
MOD_OPT,
"invalid gsets when generating the side nodes that describe the other sort"
"and group operations besides the top one.");
new_grpColIdx = remap_groupColIdx(root, groupClause);
sort_plan = (Plan*)make_sort_from_groupcols(root, groupClause, new_grpColIdx, result_plan);
/*
* sort_plan includes the cost of result_plan over again, which is not
* what we want (since it's not actually running that plan). So
* correct the cost figures.
*/
sort_plan->startup_cost -= result_plan->total_cost;
sort_plan->total_cost -= result_plan->total_cost;
agg_plan = (Plan*)make_agg(root,
*tlist,
(List*)parse->havingQual,
AGG_SORTED,
agg_costs,
list_length((List*)linitial(gsets)),
new_grpColIdx,
extract_grouping_ops(groupClause),
numGroups,
sort_plan,
wflists,
false,
false,
gsets);
sort_plan->lefttree = NULL;
chain = (List*)lappend(chain, agg_plan);
if (rollup_lists != NULL)
rollup_lists = list_delete_first(rollup_lists);
rollup_groupclauses = list_delete_first(rollup_groupclauses);
}
/*
* Now make the final Agg node
*/
{
List* groupClause = (List*)linitial(rollup_groupclauses);
List* gsets = rollup_lists ? (List*)linitial(rollup_lists) : NIL;
int numGroupCols;
ListCell* lc = NULL;
if (gsets != NULL)
numGroupCols = list_length((List*)linitial(gsets));
else
numGroupCols = list_length(parse->groupClause);
result_plan = (Plan*)make_agg(root,
newTlist,
(List*)parse->havingQual,
(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
agg_costs,
numGroupCols,
top_grpColIdx,
extract_grouping_ops(groupClause),
numGroups,
result_plan,
wflists,
need_stream,
!need_stream,
gsets,
0,
true);
if (wflists != NULL && wflists->activeWindows) {
result_plan->targetlist = make_windowInputTargetList(root, result_plan->targetlist, wflists->activeWindows);
}
((Agg*)result_plan)->chain = chain;
if (need_stream) {
result_plan->distributed_keys = NIL;
}
/*
* Add the additional costs. But only the total costs count, since the
* additional sorts aren't run on startup.
*/
foreach (lc, chain) {
Plan* subplan = (Plan*)lfirst(lc);
result_plan->total_cost += subplan->total_cost;
/*
* Nuke stuff we don't need to avoid bloating debug output.
*/
subplan->targetlist = NIL;
subplan->qual = NIL;
subplan->lefttree->targetlist = NIL;
}
}
if (newTlist != *tlist)
*tlist = newTlist;
return result_plan;
}
/*
* @Description: Generate stream redistribute plan to distinct if need; Build sort agg plain for Ap function;
* Generate new group clause to hash agg.
* @in root - Per-query information for planning/optimization.
* @in parse - Query tree.
* @in tlist - targetlist.
* @in need_sort_for_grouping - If we need a Sort operation on the input.
* @in rollup_groupclauses - is a list of grouping clauses for grouping sets
* @in rollup_lists - is a list of grouping sets
* @in groupColIdx - group column idx
* @in agg_costs - agg costs
* @in numGroups - is the estimated number of groups
* @in result_plan - left plan
* @in wflists - windows fun info.
* @in need_stream - if need stream
* @out needSecondLevelAgg - if need second level agg
* @in collectiveGroupExpr - collective group exprs
* @out - agg plan
*/
static Plan* build_groupingsets_plan(PlannerInfo* root, Query* parse, List** tlist, bool need_sort_for_grouping,
List* rollup_groupclauses, List* rollup_lists, AttrNumber** groupColIdx, AggClauseCosts* agg_costs, long numGroups,
Plan* result_plan, WindowLists* wflists, bool* need_hash, List* collectiveGroupExpr)
{
bool hasDistinct = false;
bool need_stream = false;
bool stream_plan = IS_STREAM_PLAN && is_execute_on_datanodes(result_plan) && !is_replicated_plan(result_plan);
/* We need add redistribute for distinct */
if (stream_plan) {
/* Judge above sort agg if need stream and hashagg */
need_stream = needs_agg_stream(root, collectiveGroupExpr, result_plan->distributed_keys);
if (list_length(agg_costs->exprAggs) == 1 && !agg_costs->hasdctDnAggs && !agg_costs->hasDnAggs) {
double multiple;
Node* distinct_node = (Node*)linitial(agg_costs->exprAggs);
List* distinct_node_list = list_make1(distinct_node);
bool need_redis = needs_agg_stream(root, distinct_node_list, result_plan->distributed_keys);
if (need_redis) {
distinct_node_list =
get_distributekey_from_tlist(root, NIL, distinct_node_list, result_plan->plan_rows, &multiple);
if (distinct_node_list != NIL) {
result_plan = make_redistribute_for_agg(root, result_plan, distinct_node_list, 0);
need_stream = needs_agg_stream(root, collectiveGroupExpr, result_plan->distributed_keys);
} else {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Count(Distinct)\" on redistribution unsupported data type");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
}
/*
* Distinct will be compute complete in sort+group operator.
* Upper levels hash agg can not build agg(distinct) plan, here need set exprAggs to null and set
* hasDistinct which will be used under code to true.
*/
hasDistinct = true;
agg_costs->exprAggs = NIL;
} else if (need_stream && (agg_costs->exprAggs != NIL || agg_costs->hasDnAggs)) { /* Array agg */
double multiple;
List* distinct_node_list =
get_distributekey_from_tlist(root, NIL, collectiveGroupExpr, result_plan->plan_rows, &multiple);
if (distinct_node_list != NIL) {
result_plan = make_redistribute_for_agg(root, result_plan, distinct_node_list, 0);
need_stream = false;
} else {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"String_agg\" or \"Array_agg\" or \"Listagg\" + \"Grouping sets\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
}
}
result_plan = build_grouping_chain(root,
parse,
tlist,
need_sort_for_grouping,
rollup_groupclauses,
rollup_lists,
*groupColIdx,
agg_costs,
numGroups,
result_plan,
wflists,
need_stream);
if (stream_plan) {
/* This case hash agg not need */
if (result_plan->distributed_keys) {
*need_hash = false;
/*
* Alter this plan's distribute keys, current distribute keys value can be altered
* grouping set after.
* For example:
* create table t1(location_id integer )distribute by hash(location_id);
* create table t2(item_id varchar(20), location_id integer)distribute by hash(item_id);
* select loc.location_id as c1, ale.location_id as c2
* from t1 as loc, t1 ale
* where c2 = c1
* group by c2, grouping sets(c1, c2);
*
* Plan:
*
* Group By Key: ale.location_id, loc.location_id
* Group By Key: ale.location_id
*
* c1 will be set to NULL grouping sets after, but c2 will keep primary values because it
* in all group clauses. so need set distribute keys of plan to c2 replace of c1.
*/
adjust_plan_dis_key(root, result_plan, collectiveGroupExpr);
}
/* This case mean we need add hashagg nodes. */
if (result_plan->distributed_keys == NIL) {
/*
* Distinct has be compute complete in result_plan therefore
* upper levels hashagg can not need consider distinct questions, it only need count to results of distinct.
* we need will aggdistinct set to NULL.
*/
if (hasDistinct) {
/*
* Set agg's distinct to NULL. In this case, agg(distinct) already be computed in leftnode,
* upper levels node only need do agg.
*/
set_distinct_to_null(*tlist, result_plan->qual);
/* we need adjust sort_pathkeys etc content, otherwise possible find fail */
adjust_all_pathkeys_by_agg_tlist(root, *tlist, wflists);
}
/* GroupClause of query and grouping expr as hashagg groupclause */
parse->groupClause = add_groupId_to_groupExpr(parse->groupClause, *tlist);
/* Keep group keys */
*groupColIdx = (AttrNumber*)palloc0(sizeof(AttrNumber) * list_length(parse->groupClause));
locate_grouping_columns(root, *tlist, result_plan->targetlist, *groupColIdx);
}
}
return result_plan;
}
/*
* add_tlist_costs_to_plan
*
* Estimate the execution costs associated with evaluating the targetlist
* expressions, and add them to the cost estimates for the Plan node.
*
* If the tlist contains set-returning functions, also inflate the Plan's cost
* and plan_rows estimates accordingly. (Hence, this must be called *after*
* any logic that uses plan_rows to, eg, estimate qual evaluation costs.)
*
* Note: during initial stages of planning, we mostly consider plan nodes with
* "flat" tlists, containing just Vars. So their evaluation cost is zero
* according to the model used by cost_qual_eval() (or if you prefer, the cost
* is factored into cpu_tuple_cost). Thus we can avoid accounting for tlist
* cost throughout query_planner() and subroutines. But once we apply a
* tlist that might contain actual operators, sub-selects, etc, we'd better
* account for its cost. Any set-returning functions in the tlist must also
* affect the estimated rowcount.
*
* Once grouping_planner() has applied a general tlist to the topmost
* scan/join plan node, any tlist eval cost for added-on nodes should be
* accounted for as we create those nodes. Presently, of the node types we
* can add on later, only Agg, WindowAgg, and Group project new tlists (the
* rest just copy their input tuples) --- so make_agg(), make_windowagg() and
* make_group() are responsible for calling this function to account for their
* tlist costs.
*/
void add_tlist_costs_to_plan(PlannerInfo* root, Plan* plan, List* tlist)
{
QualCost tlist_cost;
double tlist_rows;
cost_qual_eval(&tlist_cost, tlist, root);
plan->startup_cost += tlist_cost.startup;
plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple * PLAN_LOCAL_ROWS(plan);
tlist_rows = tlist_returns_set_rows(tlist);
if (tlist_rows > 1) {
/*
* We assume that execution costs of the tlist proper were all
* accounted for by cost_qual_eval. However, it still seems
* appropriate to charge something more for the executor's general
* costs of processing the added tuples. The cost is probably less
* than cpu_tuple_cost, though, so we arbitrarily use half of that.
*/
plan->total_cost += PLAN_LOCAL_ROWS(plan) * (tlist_rows - 1) * u_sess->attr.attr_sql.cpu_tuple_cost / 2;
plan->plan_rows *= tlist_rows;
}
}
/*
* Detect whether a plan node is a "dummy" plan created when a relation
* is deemed not to need scanning due to constraint exclusion.
*
* Currently, such dummy plans are Result nodes with constant FALSE
* filter quals (see set_dummy_rel_pathlist and create_append_plan).
*
* XXX this probably ought to be somewhere else, but not clear where.
*/
bool is_dummy_plan(Plan* plan)
{
if (IsA(plan, BaseResult)) {
List* rcqual = (List*)((BaseResult*)plan)->resconstantqual;
if (list_length(rcqual) == 1) {
Const* constqual = (Const*)linitial(rcqual);
if (constqual && IsA(constqual, Const)) {
if (!constqual->constisnull && !DatumGetBool(constqual->constvalue))
return true;
}
}
}
return false;
}
/*
* Create a bitmapset of the RT indexes of live base relations
*
* Helper for preprocess_rowmarks ... at this point in the proceedings,
* the only good way to distinguish baserels from appendrel children
* is to see what is in the join tree.
*/
Bitmapset* get_base_rel_indexes(Node* jtnode)
{
Bitmapset* result = NULL;
if (jtnode == NULL)
return NULL;
if (IsA(jtnode, RangeTblRef)) {
int varno = ((RangeTblRef*)jtnode)->rtindex;
result = bms_make_singleton(varno);
} else if (IsA(jtnode, FromExpr)) {
FromExpr* f = (FromExpr*)jtnode;
ListCell* l = NULL;
result = NULL;
foreach (l, f->fromlist)
result = bms_join(result, get_base_rel_indexes((Node*)lfirst(l)));
} else if (IsA(jtnode, JoinExpr)) {
JoinExpr* j = (JoinExpr*)jtnode;
result = bms_join(get_base_rel_indexes(j->larg), get_base_rel_indexes(j->rarg));
} else {
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized node type when get base relation indexes: %d", (int)nodeTag(jtnode))));
result = NULL; /* keep compiler quiet */
}
return result;
}
/*
* preprocess_rowmarks - set up PlanRowMarks if needed
*/
static void preprocess_rowmarks(PlannerInfo* root)
{
Query* parse = root->parse;
Bitmapset* rels = NULL;
List* prowmarks = NIL;
ListCell* l = NULL;
int i;
if (parse->rowMarks) {
/*
* We've got trouble if FOR UPDATE/SHARE appears inside grouping,
* since grouping renders a reference to individual tuple CTIDs
* invalid. This is also checked at parse time, but that's
* insufficient because of rule substitution, query pullup, etc.
*/
CheckSelectLocking(parse);
} else {
/*
* We only need rowmarks for UPDATE, DELETE, MEREG INTO, or FOR UPDATE/SHARE.
*/
if (parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE &&
(parse->commandType != CMD_MERGE || (u_sess->opt_cxt.is_stream == false && IS_SINGLE_NODE == false)))
return;
}
/*
* We need to have rowmarks for all base relations except the target. We
* make a bitmapset of all base rels and then remove the items we don't
* need or have FOR UPDATE/SHARE marks for.
*/
rels = get_base_rel_indexes((Node*)parse->jointree);
if (parse->resultRelation)
rels = bms_del_member(rels, parse->resultRelation);
/*
* Convert RowMarkClauses to PlanRowMark representation.
*/
prowmarks = NIL;
foreach (l, parse->rowMarks) {
RowMarkClause* rc = (RowMarkClause*)lfirst(l);
RangeTblEntry* rte = rt_fetch(rc->rti, parse->rtable);
PlanRowMark* newrc = NULL;
/*
* Currently, it is syntactically impossible to have FOR UPDATE
* applied to an update/delete target rel. If that ever becomes
* possible, we should drop the target from the PlanRowMark list.
*/
AssertEreport(rc->rti != (uint)parse->resultRelation,
MOD_OPT,
"invalid range table index when converting RowMarkClauses to PlanRowMark representation.");
/*
* Ignore RowMarkClauses for subqueries; they aren't real tables and
* can't support true locking. Subqueries that got flattened into the
* main query should be ignored completely. Any that didn't will get
* ROW_MARK_COPY items in the next loop.
*/
if (rte->rtekind != RTE_RELATION)
continue;
/*
* Similarly, ignore RowMarkClauses for foreign tables; foreign tables
* will instead get ROW_MARK_COPY items in the next loop. (FDWs might
* choose to do something special while fetching their rows, but that
* is of no concern here.)
*/
if (rte->relkind == RELKIND_FOREIGN_TABLE || rte->relkind == RELKIND_STREAM)
continue;
rels = bms_del_member(rels, rc->rti);
newrc = makeNode(PlanRowMark);
newrc->rti = newrc->prti = rc->rti;
newrc->rowmarkId = ++(root->glob->lastRowMarkId);
if (rc->forUpdate)
newrc->markType = ROW_MARK_EXCLUSIVE;
else
newrc->markType = ROW_MARK_SHARE;
newrc->noWait = rc->noWait;
newrc->isParent = false;
newrc->bms_nodeids = ng_get_baserel_data_nodeids(rte->relid, rte->relkind);
prowmarks = lappend(prowmarks, newrc);
}
/*
* Now, add rowmarks for any non-target, non-locked base relations.
*/
i = 0;
foreach (l, parse->rtable) {
RangeTblEntry* rte = (RangeTblEntry*)lfirst(l);
PlanRowMark* newrc = NULL;
i++;
if (!bms_is_member(i, rels))
continue;
newrc = makeNode(PlanRowMark);
newrc->rti = newrc->prti = i;
newrc->rowmarkId = ++(root->glob->lastRowMarkId);
/* real tables support REFERENCE, anything else needs COPY */
if (rte->rtekind == RTE_RELATION && rte->relkind != RELKIND_FOREIGN_TABLE && rte->relkind != RELKIND_STREAM)
newrc->markType = ROW_MARK_REFERENCE;
else
newrc->markType = ROW_MARK_COPY;
newrc->noWait = false; /* doesn't matter */
newrc->isParent = false;
newrc->bms_nodeids = (RTE_RELATION == rte->rtekind && RELKIND_FOREIGN_TABLE != rte->relkind
&& RELKIND_STREAM != rte->relkind)
? ng_get_baserel_data_nodeids(rte->relid, rte->relkind)
: NULL;
prowmarks = lappend(prowmarks, newrc);
}
root->rowMarks = prowmarks;
}
#ifdef PGXC
/*
* separate_rowmarks - In XC Coordinators are supposed to skip handling
* of type ROW_MARK_EXCLUSIVE & ROW_MARK_SHARE.
* In order to do that we simply remove such type
* of row marks from the list. Instead they are saved
* in another list that is then handeled to add
* FOR UPDATE/SHARE in the remote query
* in the function create_remotequery_plan
*/
static void separate_rowmarks(PlannerInfo* root)
{
List* rml_1 = NULL;
List* rml_2 = NULL;
ListCell* rm = NULL;
if (IS_PGXC_DATANODE || IsConnFromCoord() || root->rowMarks == NULL)
return;
foreach (rm, root->rowMarks) {
PlanRowMark* prm = (PlanRowMark*)lfirst(rm);
if (prm->markType == ROW_MARK_EXCLUSIVE || prm->markType == ROW_MARK_SHARE)
rml_1 = lappend(rml_1, prm);
else
rml_2 = lappend(rml_2, prm);
}
list_free_ext(root->rowMarks);
root->rowMarks = rml_2;
root->xc_rowMarks = rml_1;
}
#endif /*PGXC*/
/*
* Try to obtain the clause values. We use estimate_expression_value
* primarily because it can sometimes do something useful with Params.
*/
static void estimate_limit_offset_count(PlannerInfo* root, int64* offset_est, int64* count_est)
{
Query* parse = root->parse;
Node* est = NULL;
/* Should not be called unless LIMIT or OFFSET */
AssertEreport(parse->limitCount || parse->limitOffset,
MOD_OPT,
"invalid result tuples when doing pre-estimation for LIMIT and/or OFFSET clauses.");
if (parse->limitCount) {
est = estimate_expression_value(root, parse->limitCount);
if (est && IsA(est, Const)) {
if (((Const*)est)->constisnull) {
/* NULL indicates LIMIT ALL, ie, no limit */
*count_est = 0; /* treat as not present */
} else {
*count_est = DatumGetInt64(((Const*)est)->constvalue);
if (*count_est <= 0) {
*count_est = 1; /* force to at least 1 */
}
}
} else {
*count_est = -1; /* can't estimate */
}
} else {
*count_est = 0; /* not present */
}
if (parse->limitOffset) {
est = estimate_expression_value(root, parse->limitOffset);
if (est && IsA(est, Const)) {
if (((Const*)est)->constisnull) {
/* Treat NULL as no offset; the executor will too */
*offset_est = 0; /* treat as not present */
} else {
*offset_est = DatumGetInt64(((Const*)est)->constvalue);
if (*offset_est < 0) {
*offset_est = 0; /* less than 0 is same as 0 */
}
}
} else {
*offset_est = -1; /* can't estimate */
}
} else {
*offset_est = 0; /* not present */
}
}
/*
* preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
*
* We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
* results back in *count_est and *offset_est. These variables are set to
* 0 if the corresponding clause is not present, and -1 if it's present
* but we couldn't estimate the value for it. (The "0" convention is OK
* for OFFSET but a little bit bogus for LIMIT: effectively we estimate
* LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
* usual practice of never estimating less than one row.) These values will
* be passed to make_limit, which see if you change this code.
*
* The return value is the suitably adjusted tuple_fraction to use for
* planning the query. This adjustment is not overridable, since it reflects
* plan actions that grouping_planner() will certainly take, not assumptions
* about context.
*/
static double preprocess_limit(PlannerInfo* root, double tuple_fraction, int64* offset_est, int64* count_est)
{
double limit_fraction;
estimate_limit_offset_count(root, offset_est, count_est);
if (*count_est != 0) {
/*
* A LIMIT clause limits the absolute number of tuples returned.
* However, if it's not a constant LIMIT then we have to guess; for
* lack of a better idea, assume 10% of the plan's result is wanted.
*/
if (*count_est < 0 || *offset_est < 0) {
/* LIMIT or OFFSET is an expression ... punt ... */
limit_fraction = 0.10;
} else {
/* LIMIT (plus OFFSET, if any) is max number of tuples needed */
limit_fraction = (double)*count_est + (double)*offset_est;
}
/*
* If we have absolute limits from both caller and LIMIT, use the
* smaller value; likewise if they are both fractional. If one is
* fractional and the other absolute, we can't easily determine which
* is smaller, but we use the heuristic that the absolute will usually
* be smaller.
*/
if (tuple_fraction >= 1.0) {
/*
* if true, both absolute
* else, caller absolute, limit fractional; use caller's value
*/
tuple_fraction = limit_fraction >= 1.0 ?
Min(tuple_fraction, limit_fraction) : tuple_fraction;
} else if (tuple_fraction > 0.0) {
/*
* if true, caller fractional, limit absolute; use limit
* else, both fractional
*/
tuple_fraction = limit_fraction >= 1.0 ?
limit_fraction : Min(tuple_fraction, limit_fraction);
} else {
/* no info from caller, just use limit */
tuple_fraction = limit_fraction;
}
} else if (*offset_est != 0 && tuple_fraction > 0.0) {
/*
* We have an OFFSET but no LIMIT. This acts entirely differently
* from the LIMIT case: here, we need to increase rather than decrease
* the caller's tuple_fraction, because the OFFSET acts to cause more
* tuples to be fetched instead of fewer. This only matters if we got
* a tuple_fraction > 0, however.
*
* As above, use 10% if OFFSET is present but unestimatable.
*/
limit_fraction = *offset_est < 0 ? 0.10 : (double)*offset_est;
/*
* If we have absolute counts from both caller and OFFSET, add them
* together; likewise if they are both fractional. If one is
* fractional and the other absolute, we want to take the larger, and
* we heuristically assume that's the fractional one.
*/
if (tuple_fraction >= 1.0) {
tuple_fraction = limit_fraction >= 1.0 ?
tuple_fraction + limit_fraction : limit_fraction;
} else {
if (limit_fraction >= 1.0) {
/* caller fractional, limit absolute; use caller's value */
} else {
/* both fractional, so add them together */
tuple_fraction += limit_fraction;
/* assume fetch all */
tuple_fraction = tuple_fraction >= 1.0 ? 0.0 : tuple_fraction;
}
}
}
return tuple_fraction;
}
/*
* preprocess_groupclause - do preparatory work on GROUP BY clause
*
* The idea here is to adjust the ordering of the GROUP BY elements
* (which in itself is semantically insignificant) to match ORDER BY,
* thereby allowing a single sort operation to both implement the ORDER BY
* requirement and set up for a Unique step that implements GROUP BY.
*
* In principle it might be interesting to consider other orderings of the
* GROUP BY elements, which could match the sort ordering of other
* possible plans (eg an indexscan) and thereby reduce cost. We don't
* bother with that, though. Hashed grouping will frequently win anyway.
*
* Note: we need no comparable processing of the distinctClause because
* the parser already enforced that that matches ORDER BY.
*/
List* preprocess_groupclause(PlannerInfo* root, List* force)
{
Query* parse = root->parse;
List* new_groupclause = NIL;
bool partial_match = false;
ListCell* sl = NULL;
ListCell* gl = NULL;
/* For grouping sets, we need to force the ordering */
if (force != NIL) {
foreach (sl, force) {
Index ref = lfirst_int(sl);
SortGroupClause* cl = get_sortgroupref_clause(ref, parse->groupClause);
if (!OidIsValid(cl->sortop)) {
Node* expr = get_sortgroupclause_expr(cl, parse->targetList);
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an ordering operator for type %s", format_type_be(exprType(expr))),
errdetail("Grouping set columns must be able to sort their inputs.")));
}
new_groupclause = lappend(new_groupclause, cl);
}
return new_groupclause;
}
/* If no ORDER BY, nothing useful to do here */
if (parse->sortClause == NIL) {
return parse->groupClause;
}
/*
* Scan the ORDER BY clause and construct a list of matching GROUP BY
* items, but only as far as we can make a matching prefix.
*
* This code assumes that the sortClause contains no duplicate items.
*/
foreach (sl, parse->sortClause) {
SortGroupClause* sc = (SortGroupClause*)lfirst(sl);
foreach (gl, parse->groupClause) {
SortGroupClause* gc = (SortGroupClause*)lfirst(gl);
if (equal(gc, sc)) {
new_groupclause = lappend(new_groupclause, gc);
break;
}
}
if (gl == NULL) {
break; /* no match, so stop scanning */
}
}
/* Did we match all of the ORDER BY list, or just some of it? */
partial_match = (sl != NULL);
/* If no match at all, no point in reordering GROUP BY */
if (new_groupclause == NIL) {
return parse->groupClause;
}
/*
* Add any remaining GROUP BY items to the new list, but only if we were
* able to make a complete match. In other words, we only rearrange the
* GROUP BY list if the result is that one list is a prefix of the other
* --- otherwise there's no possibility of a common sort. Also, give up
* if there are any non-sortable GROUP BY items, since then there's no
* hope anyway.
*/
foreach (gl, parse->groupClause) {
SortGroupClause* gc = (SortGroupClause*)lfirst(gl);
if (list_member_ptr(new_groupclause, gc)) {
continue; /* it matched an ORDER BY item */
}
if (partial_match) {
return parse->groupClause; /* give up, no common sort possible */
}
if (!OidIsValid(gc->sortop)) {
return parse->groupClause; /* give up, GROUP BY can't be sorted */
}
new_groupclause = lappend(new_groupclause, gc);
}
/* Success --- install the rearranged GROUP BY list */
AssertEreport(list_length(parse->groupClause) == list_length(new_groupclause),
MOD_OPT,
"the length of new group clause does not match to the group clause of parse tree"
"when doing preparatory work on GROUP BY clause.");
return new_groupclause;
}
/*
* Extract lists of grouping sets that can be implemented using a single
* rollup-type aggregate pass each. Returns a list of lists of grouping sets.
*
* Input must be sorted with smallest sets first. Result has each sublist
* sorted with smallest sets first.
*
* We want to produce the absolute minimum possible number of lists here to
* avoid excess sorts. Fortunately, there is an algorithm for this; the problem
* of finding the minimal partition of a partially-ordered set into chains
* (which is what we need, taking the list of grouping sets as a poset ordered
* by set inclusion) can be mapped to the problem of finding the maximum
* cardinality matching on a bipartite graph, which is solvable in polynomial
* time with a worst case of no worse than O(n^2.5) and usually much
* better. Since our N is at most 4096, we don't need to consider fallbacks to
* heuristic or approximate methods. (Planning time for a 12-d cube is under
* half a second on my modest system even with optimization off and assertions
* on.)
*/
List* extract_rollup_sets(List* groupingSets)
{
int num_sets_raw = list_length(groupingSets);
int num_empty = 0;
int num_sets = 0; /* distinct sets */
int num_chains = 0;
List* result = NIL;
List** results;
List** orig_sets;
Bitmapset** set_masks;
int* chains = NULL;
short** adjacency;
short* adjacency_buf = NULL;
BipartiteMatchState* state = NULL;
int i;
int j;
int j_size;
ListCell* lc1 = list_head(groupingSets);
ListCell* lc = NULL;
/*
* Start by stripping out empty sets. The algorithm doesn't require this,
* but the planner currently needs all empty sets to be returned in the
* first list, so we strip them here and add them back after.
*/
while (lc1 && lfirst(lc1) == NIL) {
++num_empty;
lc1 = lnext(lc1);
}
/* bail out now if it turns out that all we had were empty sets. */
if (lc1 == NULL)
return list_make1(groupingSets);
/* ----------
* We don't strictly need to remove duplicate sets here, but if we don't,
* they tend to become scattered through the result, which is a bit
* confusing (and irritating if we ever decide to optimize them out).
* So we remove them here and add them back after.
*
* For each non-duplicate set, we fill in the following:
*
* orig_sets[i] = list of the original set lists
* set_masks[i] = bitmapset for testing inclusion
* adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
*
* chains[i] will be the result group this set is assigned to.
*
* We index all of these from 1 rather than 0 because it is convenient
* to leave 0 free for the NIL node in the graph algorithm.
* ----------
*/
orig_sets = (List**)palloc0((num_sets_raw + 1) * sizeof(List*));
set_masks = (Bitmapset**)palloc0((num_sets_raw + 1) * sizeof(Bitmapset*));
adjacency = (short**)palloc0((num_sets_raw + 1) * sizeof(short*));
adjacency_buf = (short*)palloc((num_sets_raw + 1) * sizeof(short));
j_size = 0;
j = 0;
i = 1;
for_each_cell(lc, lc1)
{
List* candidate = (List*)lfirst(lc);
Bitmapset* candidate_set = NULL;
ListCell* lc2 = NULL;
int dup_of = 0;
foreach (lc2, candidate) {
candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
}
/* we can only be a dup if we're the same length as a previous set */
if (j_size == list_length(candidate)) {
int k;
for (k = j; k < i; ++k) {
if (bms_equal(set_masks[k], candidate_set)) {
dup_of = k;
break;
}
}
} else if (j_size < list_length(candidate)) {
j_size = list_length(candidate);
j = i;
}
if (dup_of > 0) {
orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
bms_free_ext(candidate_set);
} else {
int k;
int n_adj = 0;
orig_sets[i] = list_make1(candidate);
set_masks[i] = candidate_set;
/* fill in adjacency list; no need to compare equal-size sets */
for (k = j - 1; k > 0; --k) {
if (bms_is_subset(set_masks[k], candidate_set))
adjacency_buf[++n_adj] = k;
}
if (n_adj > 0) {
adjacency_buf[0] = n_adj;
adjacency[i] = (short*)palloc((n_adj + 1) * sizeof(short));
errno_t errorno =
memcpy_s(adjacency[i], (n_adj + 1) * sizeof(short), adjacency_buf, (n_adj + 1) * sizeof(short));
securec_check_c(errorno, "\0", "\0");
} else
adjacency[i] = NULL;
++i;
}
}
num_sets = i - 1;
/*
* Apply the graph matching algorithm to do the work.
*/
state = BipartiteMatch(num_sets, num_sets, adjacency);
/*
* Now, the state->pair* fields have the info we need to assign sets to
* chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
* pair_vu[v] = u (both will be true, but we check both so that we can do
* it in one pass)
*/
chains = (int*)palloc0((num_sets + 1) * sizeof(int));
for (i = 1; i <= num_sets; ++i) {
int u = state->pair_vu[i];
int v = state->pair_uv[i];
if (u > 0 && u < i) {
chains[i] = chains[u];
} else if (v > 0 && v < i) {
chains[i] = chains[v];
} else {
chains[i] = ++num_chains;
}
}
/* build result lists. */
results = (List**)palloc0((num_chains + 1) * sizeof(List*));
for (i = 1; i <= num_sets; ++i) {
int c = chains[i];
AssertEreport(c > 0, MOD_OPT, "invalid chains item when building result lists.");
results[c] = list_concat(results[c], orig_sets[i]);
}
/* push any empty sets back on the first list. */
while (num_empty-- > 0)
results[1] = lcons(NIL, results[1]);
/* make result list */
for (i = 1; i <= num_chains; ++i)
result = lappend(result, results[i]);
/*
* Free all the things.
*
* (This is over-fussy for small sets but for large sets we could have
* tied up a nontrivial amount of memory.)
*/
BipartiteMatchFree(state);
pfree_ext(results);
pfree_ext(chains);
for (i = 1; i <= num_sets; ++i)
if (adjacency[i])
pfree_ext(adjacency[i]);
pfree_ext(adjacency);
pfree_ext(adjacency_buf);
pfree_ext(orig_sets);
for (i = 1; i <= num_sets; ++i)
bms_free_ext(set_masks[i]);
pfree_ext(set_masks);
return result;
}
/*
* Reorder the elements of a list of grouping sets such that they have correct
* prefix relationships.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first.
*
* If we're passed in a sortclause, we follow its order of columns to the
* extent possible, to minimize the chance that we add unnecessary sorts.
* (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
* gets implemented in one pass.)
*/
List* reorder_grouping_sets(List* groupingsets, List* sortclause)
{
ListCell* lc = NULL;
ListCell* lc2 = NULL;
List* previous = NIL;
List* result = NIL;
foreach (lc, groupingsets) {
List* candidate = (List*)lfirst(lc);
List* new_elems = list_difference_int(candidate, previous);
if (list_length(new_elems) > 0) {
while (list_length(sortclause) > list_length(previous)) {
SortGroupClause* sc = (SortGroupClause*)list_nth(sortclause, list_length(previous));
int ref = sc->tleSortGroupRef;
if (list_member_int(new_elems, ref)) {
previous = lappend_int(previous, ref);
new_elems = list_delete_int(new_elems, ref);
} else {
/* diverged from the sortclause; give up on it */
sortclause = NIL;
break;
}
}
foreach (lc2, new_elems) {
previous = lappend_int(previous, lfirst_int(lc2));
}
}
result = lcons(list_copy(previous), result);
list_free_ext(new_elems);
}
list_free_ext(previous);
return result;
}
/*
* get_optimal_hashed_path: get optimal hash path from three hashagg paths.
*
* Parameters:
* @in root: plan info node
* @in path_rows: the parent's rows of cheapest path
* @in path_width: the parent's width of cheapest path
* @in cheapest_path: the cheapest path
* @in needs_stream: we can generate two paths with redistribute if it is true
* @in numGroups: the distinct for group by clause
* @in agg_costs: the execution costs of the aggregates' input expressions
* @in distributed_key: the distribute key for stream
* @in multiple: the multiple for stream
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in/out hashed_p: the optimal hashagg path
*
* Returns: void
*/
static void get_optimal_hashed_path(PlannerInfo* root, Path* cheapest_path, bool needs_stream, int path_width,
AggClauseCosts* agg_costs, int numGroupCols, const double* numGroups, List* distributed_key, double multiple,
Size hashentrysize, AggStrategy agg_strategy, Path* hashed_p)
{
QualCost total_cost;
Plan* subplan = makeNode(Plan);
double best_cost = 0.0;
Path result_path;
errno_t rc = EOK;
rc = memset_s(&result_path, sizeof(Path), 0, sizeof(Path));
securec_check(rc, "\0", "\0");
subplan->startup_cost = cheapest_path->startup_cost;
subplan->total_cost = cheapest_path->total_cost;
subplan->plan_rows = cheapest_path->rows;
subplan->multiple = cheapest_path->multiple;
subplan->plan_width = path_width;
subplan->vec_output = false;
subplan->exec_nodes =
ng_convert_to_exec_nodes(&cheapest_path->distribution, cheapest_path->locator_type, RELATION_ACCESS_READ);
subplan->dop = cheapest_path->dop;
total_cost.startup = 0.0;
total_cost.per_tuple = 0.0;
if (root->query_level == 1) {
/* Get total cost for hashagg (dn) + gather + hashagg (cn). */
get_hashagg_gather_hashagg_path(root,
subplan,
agg_costs,
numGroupCols,
numGroups[0],
numGroups[1],
total_cost,
hashentrysize,
agg_strategy,
needs_stream,
&result_path);
if ((best_cost == 0.0) || (result_path.total_cost < best_cost)) {
best_cost = result_path.total_cost;
copy_path_costsize(hashed_p, &result_path);
}
}
if (needs_stream && (distributed_key != NIL)) {
/* Get total cost for redistribute(dn) + hashagg (dn). */
Distribution* distribution = ng_get_dest_distribution(subplan);
get_redist_hashagg_path(root,
subplan,
agg_costs,
numGroupCols,
numGroups[0],
numGroups[1],
distributed_key,
multiple,
distribution,
total_cost,
hashentrysize,
needs_stream,
&result_path);
if ((best_cost == 0.0) || (result_path.total_cost < best_cost)) {
best_cost = result_path.total_cost;
copy_path_costsize(hashed_p, &result_path);
}
/* Get total cost for hashagg (dn) + redistribute(dn) + hashagg (dn). */
get_hashagg_redist_hashagg_path(root,
subplan,
agg_costs,
numGroupCols,
numGroups[0],
numGroups[1],
distributed_key,
multiple,
distribution,
total_cost,
hashentrysize,
needs_stream,
&result_path);
/* Save the best cost for hashed path. */
if ((best_cost == 0.0) || (result_path.total_cost < best_cost)) {
copy_path_costsize(hashed_p, &result_path);
}
}
pfree_ext(subplan);
subplan = NULL;
}
/*
* compute_hashed_path_cost: compute hashagg path cost for choose.
*
* Parameters:
* @in root: plan info node
* @in limit_tuples: estimate tuples for LIMIT
* @in path_rows: the parent's rows of cheapest path
* @in path_width: the parent's width of cheapest path
* @in cheapest_path: the cheapest path
* @in dNumGroups: the distinct for group by clause
* @in agg_costs: the execution costs of the aggregates' input expressions
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in target_pathkeys: the available pathkeys for plan info
* @in/out hashed_p: result hash path with total cost
*
* Returns: void
*/
static void compute_hashed_path_cost(PlannerInfo* root, double limit_tuples, int path_width, Path* cheapest_path,
const double* dNumGroups, AggClauseCosts* agg_costs, Size hashentrysize, List* target_pathkeys, Path* hashed_p)
{
Query* parse = root->parse;
int numGroupCols = list_length(parse->groupClause);
List* distributed_key = NIL;
double multiple = 0.0;
bool needs_stream = false;
bool need_second_hashagg = false;
/*
* See if the estimated cost is no more than doing it the other way. While
* avoiding the need for sorted input is usually a win, the fact that the
* output won't be sorted may be a loss; so we need to do an actual cost
* comparison.
*
* We need to consider cheapest_path + hashagg [+ final sort] versus
* either cheapest_path [+ sort] + group or agg [+ final sort] or
* presorted_path + group or agg [+ final sort] where brackets indicate a
* step that may not be needed. We assume query_planner() will have
* returned a presorted path only if it's a winner compared to
* cheapest_path for this purpose.
*
* These path variables are dummies that just hold cost fields; we don't
* make actual Paths for these steps.
*
* We need two hashagg for count(distinct) case, so do estimation twice.
* It be affirm a stream plan if agg_costs->exprAggs is not null.
*/
if (agg_costs->exprAggs != NIL) {
List* group_exprs = get_sortgrouplist_exprs(parse->groupClause, parse->targetList);
List* newtlist = NIL;
List* orig_tlist = NIL;
List* duplicate_tlist = NIL;
Oid distinct_eq_op = InvalidOid;
double numGroups[2] = {0};
Node* distinct_node = (Node*)linitial(agg_costs->exprAggs);
AggStrategy strategy = (parse->groupClause != NIL) ? AGG_HASHED : AGG_PLAIN;
group_exprs = lappend(group_exprs, distinct_node);
get_num_distinct(root,
group_exprs,
PATH_LOCAL_ROWS(cheapest_path),
cheapest_path->rows,
ng_get_dest_num_data_nodes(cheapest_path),
numGroups);
/* generate new targetlist for the first level which using to judge whether do redistribute or not. */
newtlist = get_count_distinct_newtlist(
root, parse->targetList, distinct_node, &orig_tlist, &duplicate_tlist, &distinct_eq_op);
/* check whether need stream or not according to distribute_keys. */
if (IS_STREAM_PLAN && cheapest_path->locator_type != LOCATOR_TYPE_REPLICATED)
needs_stream = needs_agg_stream(root, newtlist, cheapest_path->distribute_keys);
root->query_level++;
/* generate the optimizer hashagg path for the first level. */
if (needs_stream) {
distributed_key = get_distributekey_from_tlist(root, newtlist, group_exprs, cheapest_path->rows, &multiple);
if (distributed_key != NIL) {
get_optimal_hashed_path(root,
cheapest_path,
needs_stream,
path_width,
agg_costs,
numGroupCols + 1,
numGroups,
distributed_key,
multiple,
hashentrysize,
AGG_HASHED,
hashed_p);
elog(DEBUG1,
"[choose optimal hashagg]: the total cost of hashagg "
"with redistribute for the first level: %lf",
hashed_p->total_cost);
}
} else {
cost_agg(hashed_p,
root,
AGG_HASHED,
agg_costs,
numGroupCols + 1,
numGroups[0],
cheapest_path->startup_cost,
cheapest_path->total_cost,
PATH_LOCAL_ROWS(cheapest_path),
path_width,
hashentrysize);
elog(DEBUG1,
"[choose optimal hashagg]: the total cost of hashagg "
"with no redistribute for the first level: %lf",
hashed_p->total_cost);
}
/* generate the optimizer hashagg path for the second level. */
root->query_level--;
if (AGG_PLAIN == strategy) {
/* only generate path of plainagg+gather+plainagg for the second level. */
get_optimal_hashed_path(root,
hashed_p,
needs_stream,
path_width,
agg_costs,
numGroupCols,
dNumGroups,
NULL,
multiple,
hashentrysize,
AGG_PLAIN,
hashed_p);
elog(DEBUG1,
"[choose optimize hashagg]: the total cost of plain hashagg "
"for the second level: %lf",
hashed_p->total_cost);
} else {
need_second_hashagg = true;
}
list_free_ext(group_exprs);
list_free_ext(duplicate_tlist);
list_free_ext(orig_tlist);
list_free_ext(newtlist);
}
/*
* get optimal hashagg if only have group by or the second hashagg
* for count(distinct) with group by.
*/
if ((agg_costs->exprAggs == NIL) || need_second_hashagg) {
Path* path = NULL;
distributed_key = NIL;
path = need_second_hashagg ? hashed_p : cheapest_path;
/* need compare three hashagg cost. */
if (IS_STREAM_PLAN && path->locator_type != LOCATOR_TYPE_REPLICATED) {
needs_stream = needs_agg_stream(root, parse->targetList, path->distribute_keys);
if (needs_stream) {
distributed_key =
get_distributekey_from_tlist(root, parse->targetList, parse->groupClause, path->rows, &multiple);
}
/* get the optimizer hashagg path for three path. */
get_optimal_hashed_path(root,
path,
needs_stream,
path_width,
agg_costs,
numGroupCols,
dNumGroups,
distributed_key,
multiple,
hashentrysize,
AGG_HASHED,
hashed_p);
} else {
/* regress to original aggpath if not stream plan or boardcast+hashagg in subplan. */
cost_agg(hashed_p,
root,
AGG_HASHED,
agg_costs,
numGroupCols,
dNumGroups[0],
path->startup_cost,
path->total_cost,
PATH_LOCAL_ROWS(cheapest_path),
path_width,
hashentrysize);
}
if (need_second_hashagg) {
elog(DEBUG1,
"[choose optimize hashagg]: the total cost of hashagg with redistribute for the second level: %lf",
hashed_p->total_cost);
} else {
elog(DEBUG1,
"[choose optimize hashagg]: the total cost of hashagg with no count(distinct): %lf",
hashed_p->total_cost);
}
}
list_free_ext(distributed_key);
/* Result of hashed agg is always unsorted */
if (target_pathkeys != NULL) {
cost_sort(hashed_p,
target_pathkeys,
hashed_p->total_cost,
dNumGroups[0],
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
limit_tuples,
root->glob->vectorized);
}
elog(DEBUG1, "[final hashed path total cost]: %lf", hashed_p->total_cost);
}
/*
* get_optimal_sorted_path: get optimal sort path for choose.
*
* Parameters:
* @in root: plan info node
* @in path_rows: the parent's rows of cheapest path
* @in path_width: the parent's width of cheapest path
* @in dNumGroups: the distinct for group by clause
* @in agg_costs: the execution costs of the aggregates' input expressions
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in numGroupCols: how many cols in group by clause
* @in/out sorted_p: result sort path with total cost
*
* Returns: void
*/
static void get_optimal_sorted_path(PlannerInfo* root, Path* sorted_p, int path_width, AggClauseCosts* agg_costs,
int numGroupCols, const double* dNumGroups, Size hashentrysize, double limit_tuples, bool needs_stream,
bool need_sort_for_grouping)
{
Query* parse = root->parse;
Node* distinct_node = NULL;
List* distributed_key = NIL;
List* distinct_node_list = NIL;
double multiple = 0.0;
bool two_level_groupagg = false;
bool has_stream = false;
bool has_local_stream = false;
bool distinct_needs_stream = false;
bool distinct_needs_local_stream = false;
StreamPath stream_p, stream_local_p;
errno_t rc = EOK;
AggStrategy strategy = (parse->groupClause != NIL) ? AGG_SORTED : AGG_PLAIN;
Path* top_level_path = NULL;
rc = memset_s(&stream_p, sizeof(stream_p), 0, sizeof(stream_p));
securec_check(rc, "\0", "\0");
rc = memset_s(&stream_local_p, sizeof(stream_local_p), 0, sizeof(stream_local_p));
securec_check(rc, "\0", "\0");
if (IS_STREAM_PLAN && needs_stream) {
/* get distribute key and multiple from groupClause */
distributed_key =
get_distributekey_from_tlist(root, parse->targetList, parse->groupClause, sorted_p->rows, &multiple);
}
/* check whether two_level_sort is needed. */
if (IS_STREAM_PLAN && (list_length(agg_costs->exprAggs) == 1) && (!(parse->groupClause && !needs_stream)) &&
!agg_costs->hasDnAggs && !agg_costs->hasdctDnAggs && agg_costs->numOrderedAggs == 0) {
Plan* subplan = makeNode(Plan);
/* construct subplan for sort path. */
subplan->startup_cost = sorted_p->startup_cost;
subplan->total_cost = sorted_p->total_cost;
subplan->plan_rows = sorted_p->rows;
subplan->multiple = sorted_p->multiple;
subplan->plan_width = path_width;
subplan->distributed_keys = sorted_p->distribute_keys;
distinct_node = (Node*)linitial(agg_costs->exprAggs);
distinct_node_list = list_make1(distinct_node);
two_level_groupagg = needs_two_level_groupagg(
root, subplan, distinct_node, distributed_key, &distinct_needs_stream, &distinct_needs_local_stream);
pfree_ext(subplan);
subplan = NULL;
}
/*
* we should consider optimal groupagg path below:
* 1. if we have count(distinct) and group by clause
* (1) need two level groupagg, and distinct expr need stream, generate path
* redistribute(distinct_node) + groupagg + redistribute(groupby_node) + groupagg
* (2) need two level groupagg, and distinct expr need not stream, generate path
* groupagg+redistribute(groupby_node) + groupagg
* (3) need one level groupagg, and there is distribute key for group by clause, generate path
* redistribute(groupby_node) + groupagg
* 2. if we have count(distinct) and have no group by clause
* (1) distinct expr need stream, generate path
* redistribute(distinct_node) + groupagg + gather + agg
* (2) distinct expr need not stream, generate path
* groupagg + gather + agg
* 3. if we have no count(distinct) and have group by clause
*/
if (parse->groupClause) {
/* if there's count(distinct), we now only support redistribute by group clause */
if (IS_STREAM_PLAN && needs_stream && (agg_costs->exprAggs != NIL || agg_costs->hasDnAggs)) {
/* we can apply local sortagg if count(distinct) expr is distribute column */
if (two_level_groupagg) {
Distribution* distribution = ng_get_dest_distribution(sorted_p);
ng_copy_distribution(&stream_p.path.distribution, distribution);
ng_copy_distribution(&stream_p.consumer_distribution, distribution);
ng_copy_distribution(&stream_local_p.path.distribution, distribution);
ng_copy_distribution(&stream_local_p.consumer_distribution, distribution);
if (distinct_needs_stream) {
/* compute stream path of redistribute for distinct expr. */
stream_p.path.distribute_keys = distinct_node_list;
stream_p.subpath = sorted_p;
stream_p.type = STREAM_REDISTRIBUTE;
stream_p.path.multiple = 1.0;
cost_stream(&stream_p, path_width);
need_sort_for_grouping = true;
} else {
copy_path_costsize(&stream_p.path, sorted_p);
}
if (need_sort_for_grouping) {
cost_sort(sorted_p,
root->group_pathkeys,
stream_p.path.total_cost,
PATH_LOCAL_ROWS(&stream_p.path),
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
-1.0,
root->glob->vectorized);
copy_path_costsize(&stream_p.path, sorted_p);
}
/* comput groupagg path. */
cost_agg(sorted_p,
root,
strategy,
agg_costs,
numGroupCols,
dNumGroups[0],
stream_p.path.startup_cost,
stream_p.path.total_cost,
PATH_LOCAL_ROWS(&stream_p.path),
path_width,
hashentrysize);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
(errmsg("[choose optimize groupagg]: the total cost of groupagg with redistribute for the "
"first level: %lf",
sorted_p->total_cost))));
}
/* there's group by clause, and a redistribution on group by clause is needed. */
if (distributed_key != NIL) {
/* compute stream path of redistribute for distinct expr. */
stream_p.path.distribute_keys = distributed_key;
Distribution* distribution = ng_get_dest_distribution(sorted_p);
ng_copy_distribution(&stream_p.path.distribution, distribution);
ng_copy_distribution(&stream_p.consumer_distribution, distribution);
stream_p.subpath = sorted_p;
stream_p.type = STREAM_REDISTRIBUTE;
stream_p.path.multiple = multiple;
cost_stream(&stream_p, path_width);
has_stream = true;
needs_stream = false;
}
}
} else { /* there's no group by clause, AGG_PLAIN used. */
if (IS_STREAM_PLAN && (agg_costs->exprAggs != NIL) && distinct_needs_stream) {
/* compute stream path of redistribute for distinct expr. */
stream_p.path.distribute_keys = distinct_node_list;
Distribution* distribution = ng_get_dest_distribution(sorted_p);
ng_copy_distribution(&stream_p.path.distribution, distribution);
ng_copy_distribution(&stream_p.consumer_distribution, distribution);
stream_p.subpath = sorted_p;
stream_p.type = STREAM_REDISTRIBUTE;
stream_p.path.multiple = 1.0;
cost_stream(&stream_p, path_width);
has_stream = true;
}
}
/* group by has stream. */
if (has_stream && has_local_stream)
top_level_path = &stream_p.path;
else if (!has_stream && has_local_stream)
top_level_path = &stream_local_p.path;
else if (has_stream && !has_local_stream)
top_level_path = &stream_p.path;
else
top_level_path = sorted_p;
/* compute groupagg path. */
if (need_sort_for_grouping) {
cost_sort(sorted_p,
root->group_pathkeys,
top_level_path->total_cost,
PATH_LOCAL_ROWS(top_level_path),
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
-1.0,
root->glob->vectorized);
copy_path_costsize(top_level_path, sorted_p);
}
cost_agg(sorted_p,
root,
strategy,
agg_costs,
numGroupCols,
dNumGroups[0],
top_level_path->startup_cost,
top_level_path->total_cost,
PATH_LOCAL_ROWS(top_level_path),
path_width,
hashentrysize);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
(errmsg(
"[choose optimize groupagg]: the total cost of groupagg with redistribute for the second level: %lf",
sorted_p->total_cost))));
/* compute the cost of gather to CN and AGG_PLAIN. */
Distribution* distribution = ng_get_dest_distribution(sorted_p);
ng_copy_distribution(&stream_p.path.distribution, distribution);
stream_p.subpath = sorted_p;
stream_p.consumer_distribution.group_oid = InvalidOid;
stream_p.consumer_distribution.bms_data_nodeids = ng_get_single_node_group_nodeids();
stream_p.type = STREAM_GATHER;
stream_p.path.locator_type = LOCATOR_TYPE_REPLICATED;
cost_stream(&stream_p, path_width);
copy_path_costsize(sorted_p, &stream_p.path);
if (IS_STREAM_PLAN && needs_stream) {
/* For plain agg, there's no sort needed */
if (parse->groupClause != NIL)
cost_sort(sorted_p,
root->group_pathkeys,
sorted_p->total_cost,
PATH_LOCAL_ROWS(sorted_p),
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
-1.0,
root->glob->vectorized);
cost_agg(sorted_p,
root,
strategy,
agg_costs,
numGroupCols,
dNumGroups[1],
sorted_p->startup_cost,
sorted_p->total_cost,
PATH_LOCAL_ROWS(sorted_p),
path_width,
hashentrysize);
}
if (distinct_node_list != NIL) {
list_free_ext(distinct_node_list);
distinct_node_list = NIL;
}
if (distributed_key != NIL) {
list_free_ext(distributed_key);
distributed_key = NIL;
}
}
/*
* compute_sorted_path_cost: compute sort path cost for choose.
*
* Parameters:
* @in root: plan info node
* @in limit_tuples: estimate tuples for LIMIT
* @in path_rows: the parent's rows of cheapest path
* @in path_width: the parent's width of cheapest path
* @in cheapest_path: the cheapest path
* @in sorted_path: the initial sort path
* @in dNumGroups: the distinct for group by clause
* @in agg_costs: the execution costs of the aggregates' input expressions
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in target_pathkeys: the available pathkeys for plan info
* @in/out sorted_p: result sort path with total cost
*
* Returns: void
*/
static void compute_sorted_path_cost(PlannerInfo* root, double limit_tuples, int path_width, Path* cheapest_path,
Path* sorted_path, const double* dNumGroups, AggClauseCosts* agg_costs, Size hashentrysize, List* target_pathkeys,
Path* sorted_p)
{
Query* parse = root->parse;
int numGroupCols = list_length(parse->groupClause);
List* current_pathkeys = NIL;
StreamPath stream_p;
bool needs_stream = false;
bool need_sort_for_grouping = false;
errno_t rc = EOK;
bool is_replicate = (!IS_STREAM_PLAN || cheapest_path->locator_type == LOCATOR_TYPE_REPLICATED);
rc = memset_s(&stream_p, sizeof(stream_p), 0, sizeof(stream_p));
securec_check(rc, "\0", "\0");
/* use sorted path if it exists, other wise we use cheapest path. */
if (sorted_path != NULL) {
copy_path_costsize(sorted_p, sorted_path);
sorted_p->distribute_keys = sorted_path->distribute_keys;
current_pathkeys = sorted_path->pathkeys;
Distribution* distribution = ng_get_dest_distribution(sorted_path);
ng_copy_distribution(&sorted_p->distribution, distribution);
} else {
copy_path_costsize(sorted_p, cheapest_path);
sorted_p->distribute_keys = cheapest_path->distribute_keys;
current_pathkeys = cheapest_path->pathkeys;
Distribution* distribution = ng_get_dest_distribution(cheapest_path);
ng_copy_distribution(&sorted_p->distribution, distribution);
}
if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) {
current_pathkeys = root->group_pathkeys;
need_sort_for_grouping = true;
}
if (is_replicate || !parse->hasAggs) {
if (need_sort_for_grouping) {
cost_sort(sorted_p,
root->group_pathkeys,
sorted_p->total_cost,
PATH_LOCAL_ROWS(sorted_p),
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
-1.0,
root->glob->vectorized);
}
}
if (!is_replicate)
needs_stream = needs_agg_stream(root, parse->targetList, sorted_p->distribute_keys);
/* get optimal sort path if have count(distinct). */
if (parse->hasAggs) {
if (is_replicate)
cost_agg(sorted_p,
root,
AGG_SORTED,
agg_costs,
numGroupCols,
dNumGroups[0],
sorted_p->startup_cost,
sorted_p->total_cost,
PATH_LOCAL_ROWS(sorted_p),
path_width,
hashentrysize);
else
get_optimal_sorted_path(root,
sorted_p,
path_width,
agg_costs,
numGroupCols,
dNumGroups,
hashentrysize,
limit_tuples,
needs_stream,
need_sort_for_grouping);
} else {
cost_group(sorted_p,
root,
numGroupCols,
dNumGroups[0],
sorted_p->startup_cost,
sorted_p->total_cost,
PATH_LOCAL_ROWS(sorted_p));
if (!is_replicate) {
/* we should consider the cost of gather because sort+group has include it. */
stream_p.subpath = sorted_p;
Distribution* distribution = ng_get_dest_distribution(sorted_p);
ng_copy_distribution(&stream_p.path.distribution, distribution);
ng_copy_distribution(&stream_p.consumer_distribution, distribution);
stream_p.type = STREAM_GATHER;
stream_p.path.locator_type = LOCATOR_TYPE_REPLICATED;
cost_stream(&stream_p, path_width);
copy_path_costsize(sorted_p, &stream_p.path);
/* compute sort+group cost on CN. */
if (needs_stream) {
cost_sort(sorted_p,
root->group_pathkeys,
sorted_p->total_cost,
PATH_LOCAL_ROWS(sorted_p),
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
-1.0,
root->glob->vectorized);
cost_group(sorted_p,
root,
numGroupCols,
dNumGroups[0],
sorted_p->startup_cost,
sorted_p->total_cost,
PATH_LOCAL_ROWS(sorted_p));
}
}
}
/* The Agg or Group node will preserve ordering */
if (target_pathkeys && !pathkeys_contained_in(target_pathkeys, current_pathkeys))
cost_sort(sorted_p,
target_pathkeys,
sorted_p->total_cost,
dNumGroups[0],
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
limit_tuples,
root->glob->vectorized);
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), (errmsg("[final sorted path total cost]: %lf", sorted_p->total_cost))));
}
/*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
* the hash table, and/or running many sorts in parallel, either of which
* seems like a certain loser.)
*/
static bool grouping_is_can_hash(Query* parse, AggClauseCosts* agg_costs)
{
bool can_hash = false;
can_hash = grouping_is_hashable(parse->groupClause);
if (IS_STREAM_PLAN) {
can_hash = can_hash && (agg_costs->numOrderedAggs == 0 &&
(list_length(agg_costs->exprAggs) == 0 ||
(list_length(agg_costs->exprAggs) == 1 && !agg_costs->hasDnAggs &&
!agg_costs->hasdctDnAggs && !agg_costs->unhashable)));
} else {
can_hash = can_hash && (agg_costs->numOrderedAggs == 0 && list_length(agg_costs->exprAggs) == 0);
}
return can_hash;
}
/* Estimate per-hash-entry space at tuple width... and per-hash-entry overhead */
static Size compute_hash_entry_size(bool vectorized, Path* cheapest_path, int path_width, AggClauseCosts* agg_costs)
{
Size hash_entry_size;
if (vectorized){
hash_entry_size =
get_path_actual_total_width(cheapest_path, vectorized, OP_HASHAGG, agg_costs->numAggs);
} else {
hash_entry_size = get_hash_entry_size(path_width, agg_costs->numAggs);
}
/* plus space for pass-by-ref transition values... */
hash_entry_size += agg_costs->transitionSpace;
return hash_entry_size;
}
/*
* choose_hashed_grouping - should we use hashed grouping?
*
* Returns TRUE to select hashing, FALSE to select sorting.
*/
static bool choose_hashed_grouping(PlannerInfo* root, double tuple_fraction, double limit_tuples, int path_width,
Path* cheapest_path, Path* sorted_path, const double* dNumGroups, AggClauseCosts* agg_costs, Size* hash_entry_size)
{
Query* parse = root->parse;
bool can_hash = false;
bool can_sort = false;
Size hashentrysize;
List* target_pathkeys = NIL;
Path hashed_p, sorted_p;
errno_t rc = EOK;
can_hash = grouping_is_can_hash(parse, agg_costs);
can_sort = grouping_is_sortable(parse->groupClause) && !root->parse->unique_check;
/* Quick out if only one choice is workable */
if (!(can_hash && can_sort)) {
if (can_hash) {
return true;
} else if (can_sort) {
return false;
} else {
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting."))));
}
}
/*
* Check count(distinct) case in advance.
* case1: If expr in count distinct is set-returned, only hashagg plan supported.
* Actually aggregate function cannot contain set-returning expr(executor
* does not support). But hashagg plan will pushdown the set-returning expr
* to subplan(only support count(distinct) for now, may support other case future),
* that make set-returning expr in count distinct calculated correctly possible.
*
* case2: If groupClause is not distributable and expr in count distinct is distributable,
* only hashagg plan can be shipped.
*
* case3: If groupClause and expr in count distinct both are not distributable,
* the SQL can't be shipped.
*/
if (list_length(agg_costs->exprAggs) == 1) {
/* case1 */
if (expression_returns_set((Node*)agg_costs->exprAggs)) {
return true;
}
#ifdef ENABLE_MULTIPLE_NODES
if (parse->groupClause) {
bool grp_is_distributable = grouping_is_distributable(parse->groupClause, parse->targetList);
bool expr_is_distributable = IsTypeDistributable(exprType((Node*)linitial(agg_costs->exprAggs)));
/* case2 */
if (!grp_is_distributable) {
if (expr_is_distributable) {
return true;
} else { /* case3 */
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Count(Distinct)\" on redistribution unsupported data type");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
}
}
#endif
}
/* Prefer hashagg or sort when guc is set */
if (!u_sess->attr.attr_sql.enable_hashagg && u_sess->attr.attr_sql.enable_sort)
return false;
if (!u_sess->attr.attr_sql.enable_sort && u_sess->attr.attr_sql.enable_hashagg)
return true;
/* If guc plan_mode_seed is random plan, we should choose random path between AGG_HASHED and AGG_SORTED */
if (u_sess->attr.attr_sql.plan_mode_seed != OPTIMIZE_PLAN) {
int random_option = choose_random_option(lengthof(g_agglist));
return (AGG_HASHED == g_agglist[random_option]);
}
hashentrysize = compute_hash_entry_size(root->glob->vectorized, cheapest_path, path_width, agg_costs);
*hash_entry_size = hashentrysize;
/*
* When we have both GROUP BY and DISTINCT, use the more-rigorous of
* DISTINCT and ORDER BY as the assumed required output sort order. This
* is an oversimplification because the DISTINCT might get implemented via
* hashing, but it's not clear that the case is common enough (or that our
* estimates are good enough) to justify trying to solve it exactly.
*/
target_pathkeys = list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys) ?
root->distinct_pathkeys : root->sort_pathkeys;
/* init hash path and sort path. */
rc = memset_s(&hashed_p, sizeof(hashed_p), 0, sizeof(hashed_p));
securec_check(rc, "\0", "\0");
rc = memset_s(&sorted_p, sizeof(sorted_p), 0, sizeof(sorted_p));
securec_check(rc, "\0", "\0");
/* compute the minimal total cost for hash path. */
Distribution* distribution = ng_get_dest_distribution(cheapest_path);
ng_copy_distribution(&hashed_p.distribution, distribution);
compute_hashed_path_cost(root,
limit_tuples,
path_width,
cheapest_path,
dNumGroups,
agg_costs,
hashentrysize,
target_pathkeys,
&hashed_p);
/* compute the minimal total cost for sort path. */
compute_sorted_path_cost(root,
limit_tuples,
path_width,
cheapest_path,
sorted_path,
dNumGroups,
agg_costs,
hashentrysize,
target_pathkeys,
&sorted_p);
/*
* Now make the decision using the top-level tuple fraction. First we
* have to convert an absolute count (LIMIT) into fractional form.
*/
tuple_fraction = tuple_fraction >= 1.0 ? tuple_fraction / dNumGroups[0] : tuple_fraction;
if (compare_fractional_path_costs(&hashed_p, &sorted_p, tuple_fraction) < 0) {
/* Hashed is cheaper, so use it */
return true;
}
return false;
}
static void compute_distinct_sorted_path_cost(Path* sorted_p, List* sorted_pathkeys, Query* parse, PlannerInfo* root,
int numDistinctCols, Cost sorted_startup_cost, Cost sorted_total_cost, double path_rows,
Distribution* sorted_distribution, int path_width, double dNumDistinctRows, double limit_tuples)
{
List* current_pathkeys = NIL;
List* needed_pathkeys = NIL;
sorted_p->startup_cost = sorted_startup_cost;
sorted_p->total_cost = sorted_total_cost;
ng_copy_distribution(&sorted_p->distribution, sorted_distribution);
current_pathkeys = sorted_pathkeys;
if (parse->hasDistinctOn && list_length(root->distinct_pathkeys) < list_length(root->sort_pathkeys)) {
needed_pathkeys = root->sort_pathkeys;
} else {
needed_pathkeys = root->distinct_pathkeys;
}
if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) {
if (list_length(root->distinct_pathkeys) >= list_length(root->sort_pathkeys)) {
current_pathkeys = root->distinct_pathkeys;
} else {
current_pathkeys = root->sort_pathkeys;
}
cost_sort(sorted_p,
current_pathkeys,
sorted_p->total_cost,
path_rows,
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
-1.0,
root->glob->vectorized);
}
cost_group(
sorted_p, root, numDistinctCols, dNumDistinctRows, sorted_p->startup_cost, sorted_p->total_cost, path_rows);
if (parse->sortClause && !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) {
cost_sort(sorted_p,
root->sort_pathkeys,
sorted_p->total_cost,
dNumDistinctRows,
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
limit_tuples,
root->glob->vectorized);
}
}
/*
* choose_hashed_distinct - should we use hashing for DISTINCT?
*
* This is fairly similar to choose_hashed_grouping, but there are enough
* differences that it doesn't seem worth trying to unify the two functions.
* (One difference is that we sometimes apply this after forming a Plan,
* so the input alternatives can't be represented as Paths --- instead we
* pass in the costs as individual variables.)
*
* But note that making the two choices independently is a bit bogus in
* itself. If the two could be combined into a single choice operation
* it'd probably be better, but that seems far too unwieldy to be practical,
* especially considering that the combination of GROUP BY and DISTINCT
* isn't very common in real queries. By separating them, we are giving
* extra preference to using a sorting implementation when a common sort key
* is available ... and that's not necessarily wrong anyway.
*
* Returns TRUE to select hashing, FALSE to select sorting.
*/
static bool choose_hashed_distinct(PlannerInfo* root, double tuple_fraction, double limit_tuples, double path_rows,
int path_width, Cost cheapest_startup_cost, Cost cheapest_total_cost, Distribution* cheapest_distribution,
Cost sorted_startup_cost, Cost sorted_total_cost, Distribution* sorted_distribution, List* sorted_pathkeys,
double dNumDistinctRows, Size hashentrysize)
{
Query* parse = root->parse;
int numDistinctCols = list_length(parse->distinctClause);
bool can_sort = false;
bool can_hash = false;
Path hashed_p;
Path sorted_p;
errno_t rc = EOK;
rc = memset_s(&hashed_p, sizeof(Path), 0, sizeof(Path));
securec_check(rc, "\0", "\0");
rc = memset_s(&sorted_p, sizeof(Path), 0, sizeof(Path));
securec_check(rc, "\0", "\0");
/*
* If we have a sortable DISTINCT ON clause, we always use sorting. This
* enforces the expected behavior of DISTINCT ON.
*/
can_sort = grouping_is_sortable(parse->distinctClause);
if (can_sort && parse->hasDistinctOn)
return false;
can_hash = grouping_is_hashable(parse->distinctClause);
/* Quick out if only one choice is workable */
if (!(can_hash && can_sort)) {
if (can_hash) {
return true;
} else if (can_sort) {
return false;
} else {
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement DISTINCT"),
errdetail("Some of the datatypes only support hashing, while others only support sorting."))));
}
}
/* Prefer hashagg or sort when guc is set */
if (!u_sess->attr.attr_sql.enable_hashagg && u_sess->attr.attr_sql.enable_sort)
return false;
if (!u_sess->attr.attr_sql.enable_sort && u_sess->attr.attr_sql.enable_hashagg)
return true;
/* If guc plan_mode_seed is random plan, we should choose random path between AGG_HASHED and AGG_SORTED */
if (u_sess->attr.attr_sql.plan_mode_seed != OPTIMIZE_PLAN) {
int random_option = choose_random_option(lengthof(g_agglist));
return (AGG_HASHED == g_agglist[random_option]);
}
/*
* See if the estimated cost is no more than doing it the other way. While
* avoiding the need for sorted input is usually a win, the fact that the
* output won't be sorted may be a loss; so we need to do an actual cost
* comparison.
*
* We need to consider cheapest_path + hashagg [+ final sort] versus
* sorted_path [+ sort] + group [+ final sort] where brackets indicate a
* step that may not be needed.
*
* These path variables are dummies that just hold cost fields; we don't
* make actual Paths for these steps.
*/
ng_copy_distribution(&hashed_p.distribution, cheapest_distribution);
cost_agg(&hashed_p,
root,
AGG_HASHED,
NULL,
numDistinctCols,
dNumDistinctRows,
cheapest_startup_cost,
cheapest_total_cost,
path_rows,
path_width,
hashentrysize);
/*
* Result of hashed agg is always unsorted, so if ORDER BY is present we
* need to charge for the final sort.
*/
if (parse->sortClause)
cost_sort(&hashed_p,
root->sort_pathkeys,
hashed_p.total_cost,
dNumDistinctRows,
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
limit_tuples,
root->glob->vectorized);
/*
* Now for the GROUP case. See comments in grouping_planner about the
* sorting choices here --- this code should match that code.
*/
compute_distinct_sorted_path_cost(&sorted_p,
sorted_pathkeys,
parse,
root,
numDistinctCols,
sorted_startup_cost,
sorted_total_cost,
path_rows,
sorted_distribution,
path_width,
dNumDistinctRows,
limit_tuples);
/*
* Now make the decision using the top-level tuple fraction. First we
* have to convert an absolute count (LIMIT) into fractional form.
*/
tuple_fraction = tuple_fraction >= 1.0 ? tuple_fraction / dNumDistinctRows : tuple_fraction;
if (compare_fractional_path_costs(&hashed_p, &sorted_p, tuple_fraction) < 0) {
/* Hashed is cheaper, so use it */
return true;
}
return false;
}
/*
* make_subplanTargetList
* Generate appropriate target list when grouping is required.
*
* When grouping_planner inserts grouping or aggregation plan nodes
* above the scan/join plan constructed by query_planner+create_plan,
* we typically want the scan/join plan to emit a different target list
* than the outer plan nodes should have. This routine generates the
* correct target list for the scan/join subplan.
*
* The initial target list passed from the parser already contains entries
* for all ORDER BY and GROUP BY expressions, but it will not have entries
* for variables used only in HAVING clauses; so we need to add those
* variables to the subplan target list. Also, we flatten all expressions
* except GROUP BY items into their component variables; the other expressions
* will be computed by the inserted nodes rather than by the subplan.
* For example, given a query like
* SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
* we want to pass this targetlist to the subplan:
* a+b,c,d
* where the a+b target will be used by the Sort/Group steps, and the
* other targets will be used for computing the final results.
*
* If we are grouping or aggregating, *and* there are no non-Var grouping
* expressions, then the returned tlist is effectively dummy; we do not
* need to force it to be evaluated, because all the Vars it contains
* should be present in the "flat" tlist generated by create_plan, though
* possibly in a different order. In that case we'll use create_plan's tlist,
* and the tlist made here is only needed as input to query_planner to tell
* it which Vars are needed in the output of the scan/join plan.
*
* 'tlist' is the query's target list.
* 'groupColIdx' receives an array of column numbers for the GROUP BY
* expressions (if there are any) in the returned target list.
* 'need_tlist_eval' is set true if we really need to evaluate the
* returned tlist as-is.
*
* The result is the targetlist to be passed to query_planner.
*/
static List* make_subplanTargetList(PlannerInfo* root, List* tlist, AttrNumber** groupColIdx, bool* need_tlist_eval)
{
Query* parse = root->parse;
List* sub_tlist = NIL;
List* non_group_cols = NIL;
List* non_group_vars = NIL;
int numCols;
*groupColIdx = NULL;
/*
* If we're not grouping or aggregating, there's nothing to do here;
* query_planner should receive the unmodified target list.
*/
if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual &&
!parse->hasWindowFuncs) {
*need_tlist_eval = true;
return tlist;
}
/*
* Otherwise, we must build a tlist containing all grouping columns, plus
* any other Vars mentioned in the targetlist and HAVING qual.
*/
sub_tlist = NIL;
non_group_cols = NIL;
*need_tlist_eval = false; /* only eval if not flat tlist */
get_tlist_group_vars_split(parse, tlist, &sub_tlist, &non_group_cols);
numCols = list_length(parse->groupClause);
if (numCols > 0) {
/*
* If grouping, create sub_tlist entries for all GROUP BY columns, and
* make an array showing where the group columns are in the sub_tlist.
*
* Note: with this implementation, the array entries will always be
* 1..N, but we don't want callers to assume that.
*/
AttrNumber* grpColIdx = NULL;
ListCell* tl = NULL;
int i = 1;
/* Reserve one position for adding groupingid column for grouping set query */
if (parse->groupingSets)
grpColIdx = (AttrNumber*)palloc0(sizeof(AttrNumber) * (numCols + 1));
else
grpColIdx = (AttrNumber*)palloc0(sizeof(AttrNumber) * numCols);
*groupColIdx = grpColIdx;
foreach (tl, sub_tlist) {
TargetEntry* tle = (TargetEntry*)lfirst(tl);
int colno;
colno = get_grouping_column_index(parse, tle, parse->groupClause);
AssertEreport(colno >= 0, MOD_OPT, "invalid GROUP BY column position.");
/*
* It's a grouping column, so add it to the result tlist and
* remember its resno in grpColIdx[].
*/
TargetEntry* newtle = NULL;
newtle = makeTargetEntry(tle->expr, i++, NULL, false);
newtle->ressortgroupref = tle->ressortgroupref;
lfirst(tl) = newtle;
AssertEreport(grpColIdx[colno] == 0,
MOD_OPT,
"invalid grpColIdx item when adding a grouping column to the result tlist."); /* no dups expected */
grpColIdx[colno] = newtle->resno;
if (!(newtle->expr && IsA(newtle->expr, Var)))
*need_tlist_eval = true; /* tlist contains non Vars */
}
}
/*
* Pull out all the Vars mentioned in non-group cols (plus HAVING), and
* add them to the result tlist if not already present. (A Var used
* directly as a GROUP BY item will be present already.) Note this
* includes Vars used in resjunk items, so we are covering the needs of
* ORDER BY and window specifications. Vars used within Aggrefs will be
* pulled out here, too.
*/
non_group_vars = pull_var_clause((Node*)non_group_cols, PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS);
sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
/* clean up cruft */
list_free_ext(non_group_vars);
list_free_ext(non_group_cols);
return sub_tlist;
}
/*
* build_grouping_itst_keys
* we add group by items to superset keys, so redistribution during agg is eliminated
* if superset key path is chosen
*
* Parameters:
* @in root: planner info struct for current query level
* @in active_windosws: in-use window funcs in current query level
*/
static void build_grouping_itst_keys(PlannerInfo* root, List* active_windows)
{
Query* parse = root->parse;
List* targetlist = parse->targetList;
List* groupClause = NIL;
List* superset_keys = NIL;
ListCell* lc = NULL;
/* reset for superset key of current query level */
root->dis_keys.superset_keys = NIL;
/* find the bottom level group key, except alway redistributed groupingsets */
if (parse->groupingSets != NIL)
groupClause = NIL;
else if (parse->groupClause != NIL)
groupClause = parse->groupClause;
else if (active_windows != NIL) {
WindowClause* wc = (WindowClause*)linitial(active_windows);
groupClause = wc->partitionClause;
} else if (parse->distinctClause != NIL)
groupClause = parse->distinctClause;
/* find corresponding super set key from group by clause */
if (groupClause != NIL) {
foreach (lc, targetlist) {
TargetEntry* tle = (TargetEntry*)lfirst(lc);
int colno;
colno = get_grouping_column_index(parse, tle, groupClause);
/* it's group by expr if colno no less then 0 */
if (colno >= 0 && IsTypeDistributable(exprType((Node*)tle->expr))) {
superset_keys = list_append_unique(superset_keys, tle->expr);
}
}
}
if (superset_keys != NIL)
root->dis_keys.superset_keys = list_make1(superset_keys);
}
/*
* locate_grouping_columns
* Locate grouping columns in the tlist chosen by create_plan.
*
* This is only needed if we don't use the sub_tlist chosen by
* make_subplanTargetList. We have to forget the column indexes found
* by that routine and re-locate the grouping exprs in the real sub_tlist.
*/
static void locate_grouping_columns(PlannerInfo* root, List* tlist, List* sub_tlist, AttrNumber* groupColIdx)
{
int keyno = 0;
ListCell* gl = NULL;
/*
* No work unless grouping.
*/
if (!root->parse->groupClause) {
AssertEreport(groupColIdx == NULL,
MOD_OPT,
"invalid group column index when locating grouping columns in the target list.");
return;
}
AssertEreport(
groupColIdx != NULL, MOD_OPT, "invalid group column index when locating grouping columns in the target list.");
foreach (gl, root->parse->groupClause) {
SortGroupClause* grpcl = (SortGroupClause*)lfirst(gl);
Node* groupexpr = get_sortgroupclause_expr(grpcl, tlist);
TargetEntry* te = tlist_member(groupexpr, sub_tlist);
if (te == NULL)
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
(errmsg("failed to locate grouping columns"))));
groupColIdx[keyno++] = te->resno;
}
}
/*
* postprocess_setop_tlist
* Fix up targetlist returned by plan_set_operations().
*
* We need to transpose sort key info from the orig_tlist into new_tlist.
* NOTE: this would not be good enough if we supported resjunk sort keys
* for results of set operations --- then, we'd need to project a whole
* new tlist to evaluate the resjunk columns. For now, just ereport if we
* find any resjunk columns in orig_tlist.
*/
static List* postprocess_setop_tlist(List* new_tlist, List* orig_tlist)
{
ListCell* l = NULL;
ListCell* orig_tlist_item = list_head(orig_tlist);
foreach (l, new_tlist) {
TargetEntry* new_tle = (TargetEntry*)lfirst(l);
TargetEntry* orig_tle = NULL;
/* ignore resjunk columns in setop result */
if (new_tle->resjunk)
continue;
AssertEreport(orig_tlist_item != NULL, MOD_OPT, "invalid origin targetlist item when fixing up targetlist.");
orig_tle = (TargetEntry*)lfirst(orig_tlist_item);
orig_tlist_item = lnext(orig_tlist_item);
if (orig_tle->resjunk) /* should not happen */
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_CASE_NOT_FOUND),
(errmsg("resjunk output columns are not implemented"))));
AssertEreport(new_tle->resno == orig_tle->resno,
MOD_OPT,
"The resno of new target entry does not match to the resno of origin target entry.");
new_tle->ressortgroupref = orig_tle->ressortgroupref;
}
if (orig_tlist_item != NULL)
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_CASE_NOT_FOUND),
(errmsg("resjunk output columns are not implemented"))));
return new_tlist;
}
/*
* select_active_windows
* Create a list of the "active" window clauses (ie, those referenced
* by non-deleted WindowFuncs) in the order they are to be executed.
*/
void select_active_windows(PlannerInfo* root, WindowLists* wflists)
{
List* actives = NIL;
ListCell* lc = NULL;
/* First, make a list of the active windows */
actives = NIL;
foreach (lc, root->parse->windowClause) {
WindowClause* wc = (WindowClause*)lfirst(lc);
/* It's only active if wflists shows some related WindowFuncs */
AssertEreport(
wc->winref <= wflists->maxWinRef, MOD_OPT, "the window function index is out of range of wflists");
if (wflists->windowFuncs[wc->winref] != NIL)
actives = lappend(actives, wc);
}
/*
* Now, ensure that windows with identical partitioning/ordering clauses
* are adjacent in the list. This is required by the SQL standard, which
* says that only one sort is to be used for such windows, even if they
* are otherwise distinct (eg, different names or framing clauses).
*
* There is room to be much smarter here, for example detecting whether
* one window's sort keys are a prefix of another's (so that sorting for
* the latter would do for the former), or putting windows first that
* match a sort order available for the underlying query. For the moment
* we are content with meeting the spec.
*/
while (actives != NIL) {
WindowClause* wc = (WindowClause*)linitial(actives);
ListCell* prev = NULL;
ListCell* next = NULL;
/* Move wc from actives to wflists->activeWindows */
actives = list_delete_first(actives);
wflists->activeWindows = lappend(wflists->activeWindows, wc);
/* Now move any matching windows from actives to wflists->activeWindows */
prev = NULL;
for (lc = list_head(actives); lc; lc = next) {
WindowClause* wc2 = (WindowClause*)lfirst(lc);
next = lnext(lc);
/* framing options are NOT to be compared here! */
if (equal(wc->partitionClause, wc2->partitionClause) && equal(wc->orderClause, wc2->orderClause)) {
actives = list_delete_cell(actives, lc, prev);
wflists->activeWindows = lappend(wflists->activeWindows, wc2);
} else
prev = lc;
}
}
}
/*
* make_windowInputTargetList
* Generate appropriate target list for initial input to WindowAgg nodes.
*
* When grouping_planner inserts one or more WindowAgg nodes into the plan,
* this function computes the initial target list to be computed by the node
* just below the first WindowAgg. This list must contain all values needed
* to evaluate the window functions, compute the final target list, and
* perform any required final sort step. If multiple WindowAggs are needed,
* each intermediate one adds its window function results onto this tlist;
* only the topmost WindowAgg computes the actual desired target list.
*
* This function is much like make_subplanTargetList, though not quite enough
* like it to share code. As in that function, we flatten most expressions
* into their component variables. But we do not want to flatten window
* PARTITION BY/ORDER BY clauses, since that might result in multiple
* evaluations of them, which would be bad (possibly even resulting in
* inconsistent answers, if they contain volatile functions). Also, we must
* not flatten GROUP BY clauses that were left unflattened by
* make_subplanTargetList, because we may no longer have access to the
* individual Vars in them.
*
* Another key difference from make_subplanTargetList is that we don't flatten
* Aggref expressions, since those are to be computed below the window
* functions and just referenced like Vars above that.
*
* 'tlist' is the query's final target list.
* 'activeWindows' is the list of active windows previously identified by
* select_active_windows.
*
* The result is the targetlist to be computed by the plan node immediately
* below the first WindowAgg node.
*/
static List* make_windowInputTargetList(PlannerInfo* root, List* tlist, List* activeWindows)
{
Query* parse = root->parse;
Bitmapset* sgrefs = NULL;
List* new_tlist = NIL;
List* flattenable_cols = NIL;
List* flattenable_vars = NIL;
ListCell* lc = NULL;
AssertEreport(parse->hasWindowFuncs,
MOD_OPT,
"the window function is empty"
"when generating appropriate target list for initial input to WindowAgg nodes.");
/*
* Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
* into a bitmapset for convenient reference below.
*/
sgrefs = NULL;
foreach (lc, activeWindows) {
WindowClause* wc = (WindowClause*)lfirst(lc);
ListCell* lc2 = NULL;
foreach (lc2, wc->partitionClause) {
SortGroupClause* sortcl = (SortGroupClause*)lfirst(lc2);
sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
}
foreach (lc2, wc->orderClause) {
SortGroupClause* sortcl = (SortGroupClause*)lfirst(lc2);
sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
}
}
/* Add in sortgroupref numbers of GROUP BY clauses, too */
foreach (lc, parse->groupClause) {
SortGroupClause* grpcl = (SortGroupClause*)lfirst(lc);
sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
}
/*
* Construct a tlist containing all the non-flattenable tlist items, and
* save aside the others for a moment.
*/
new_tlist = NIL;
flattenable_cols = NIL;
foreach (lc, tlist) {
TargetEntry* tle = (TargetEntry*)lfirst(lc);
/*
* Don't want to deconstruct window clauses or GROUP BY items. (Note
* that such items can't contain window functions, so it's okay to
* compute them below the WindowAgg nodes.)
*/
if (tle->ressortgroupref != 0 && bms_is_member(tle->ressortgroupref, sgrefs)) {
/* Don't want to deconstruct this value, so add to new_tlist */
TargetEntry* newtle = NULL;
newtle = makeTargetEntry(tle->expr, list_length(new_tlist) + 1, NULL, false);
/* Preserve its sortgroupref marking, in case it's volatile */
newtle->ressortgroupref = tle->ressortgroupref;
new_tlist = lappend(new_tlist, newtle);
} else {
/*
* Column is to be flattened, so just remember the expression for
* later call to pull_var_clause. There's no need for
* pull_var_clause to examine the TargetEntry node itself.
*/
flattenable_cols = lappend(flattenable_cols, tle->expr);
}
}
/*
* Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
* add them to the result tlist if not already present. (Some might be
* there already because they're used directly as window/group clauses.)
*
* Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that the
* Aggrefs are placed in the Agg node's tlist and not left to be computed
* at higher levels.
*/
flattenable_vars = pull_var_clause((Node*)flattenable_cols, PVC_INCLUDE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS);
/*
* If there's a group by with expression, we don't have single var in targetlist of
* lefttree, so we'll not add it to new targetlist, like the following example, column
* a will not be added to the targetlist.
* select coalesce(a, 1), rank() over (partition by b) from t group by coalesce(a, 1), b;
*
* However, if group by on unique columns, any column is allowed to add to the targetlist,
* so handle this special case here.
*/
if (parse->groupClause != NIL) {
List* dep_oids = get_parse_dependency_rel_list(parse->constraintDeps);
List* flattenable_vars_final = NIL;
foreach (lc, flattenable_vars) {
Node* node = (Node*)lfirst(lc);
if (IsA(node, Var)) {
if (!tlist_member(node, new_tlist)) {
if (var_from_dependency_rel(parse, (Var *)node, dep_oids) ||
var_from_sublink_pulluped(parse, (Var *) node)) {
flattenable_vars_final = lappend(flattenable_vars_final, node);
}
}
} else
flattenable_vars_final = lappend(flattenable_vars_final, node);
}
new_tlist = add_to_flat_tlist(new_tlist, flattenable_vars_final);
list_free_ext(flattenable_vars_final);
} else
new_tlist = add_to_flat_tlist(new_tlist, flattenable_vars);
/* clean up cruft */
list_free_ext(flattenable_vars);
list_free_ext(flattenable_cols);
return new_tlist;
}
/*
* make_pathkeys_for_window
* Create a pathkeys list describing the required input ordering
* for the given WindowClause.
*
* The required ordering is first the PARTITION keys, then the ORDER keys.
* In the future we might try to implement windowing using hashing, in which
* case the ordering could be relaxed, but for now we always sort.
*/
List* make_pathkeys_for_window(PlannerInfo* root, WindowClause* wc, List* tlist, bool canonicalize)
{
List* window_pathkeys = NIL;
List* window_sortclauses = NIL;
/* Throw error if can't sort */
if (!grouping_is_sortable(wc->partitionClause))
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement window PARTITION BY"),
errdetail("Window partitioning columns must be of sortable datatypes."))));
if (!grouping_is_sortable(wc->orderClause))
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement window ORDER BY"),
errdetail("Window ordering columns must be of sortable datatypes."))));
/* Okay, make the combined pathkeys */
window_sortclauses = list_concat(list_copy(wc->partitionClause), list_copy(wc->orderClause));
window_pathkeys = make_pathkeys_for_sortclauses(root, window_sortclauses, tlist, canonicalize);
list_free_ext(window_sortclauses);
return window_pathkeys;
}
/* ----------
* get_column_info_for_window
* Get the partitioning/ordering column numbers and equality operators
* for a WindowAgg node.
*
* This depends on the behavior of make_pathkeys_for_window()!
*
* We are given the target WindowClause and an array of the input column
* numbers associated with the resulting pathkeys. In the easy case, there
* are the same number of pathkey columns as partitioning + ordering columns
* and we just have to copy some data around. However, it's possible that
* some of the original partitioning + ordering columns were eliminated as
* redundant during the transformation to pathkeys. (This can happen even
* though the parser gets rid of obvious duplicates. A typical scenario is a
* window specification "PARTITION BY x ORDER BY y" coupled with a clause
* "WHERE x = y" that causes the two sort columns to be recognized as
* redundant.) In that unusual case, we have to work a lot harder to
* determine which keys are significant.
*
* The method used here is a bit brute-force: add the sort columns to a list
* one at a time and note when the resulting pathkey list gets longer. But
* it's a sufficiently uncommon case that a faster way doesn't seem worth
* the amount of code refactoring that'd be needed.
* ----------
*/
static void get_column_info_for_window(PlannerInfo* root, WindowClause* wc, List* tlist, int numSortCols,
AttrNumber* sortColIdx, int* partNumCols, AttrNumber** partColIdx, Oid** partOperators, int* ordNumCols,
AttrNumber** ordColIdx, Oid** ordOperators)
{
int numPart = list_length(wc->partitionClause);
int numOrder = list_length(wc->orderClause);
if (numSortCols == numPart + numOrder) {
/* easy case */
*partNumCols = numPart;
*partColIdx = sortColIdx;
*partOperators = extract_grouping_ops(wc->partitionClause);
*ordNumCols = numOrder;
*ordColIdx = sortColIdx + numPart;
*ordOperators = extract_grouping_ops(wc->orderClause);
} else {
List* sortclauses = NIL;
List* pathkeys = NIL;
int scidx;
ListCell* lc = NULL;
/* first, allocate what's certainly enough space for the arrays */
*partNumCols = 0;
*partColIdx = (AttrNumber*)palloc(numPart * sizeof(AttrNumber));
*partOperators = (Oid*)palloc(numPart * sizeof(Oid));
*ordNumCols = 0;
*ordColIdx = (AttrNumber*)palloc(numOrder * sizeof(AttrNumber));
*ordOperators = (Oid*)palloc(numOrder * sizeof(Oid));
sortclauses = NIL;
pathkeys = NIL;
scidx = 0;
foreach (lc, wc->partitionClause) {
SortGroupClause* sgc = (SortGroupClause*)lfirst(lc);
List* new_pathkeys = NIL;
sortclauses = lappend(sortclauses, sgc);
new_pathkeys = make_pathkeys_for_sortclauses(root, sortclauses, tlist, true);
if (list_length(new_pathkeys) > list_length(pathkeys)) {
/* this sort clause is actually significant */
(*partColIdx)[*partNumCols] = sortColIdx[scidx++];
(*partOperators)[*partNumCols] = sgc->eqop;
(*partNumCols)++;
pathkeys = new_pathkeys;
}
}
foreach (lc, wc->orderClause) {
SortGroupClause* sgc = (SortGroupClause*)lfirst(lc);
List* new_pathkeys = NIL;
sortclauses = lappend(sortclauses, sgc);
new_pathkeys = make_pathkeys_for_sortclauses(root, sortclauses, tlist, true);
if (list_length(new_pathkeys) > list_length(pathkeys)) {
/* this sort clause is actually significant */
(*ordColIdx)[*ordNumCols] = sortColIdx[scidx++];
(*ordOperators)[*ordNumCols] = sgc->eqop;
(*ordNumCols)++;
pathkeys = new_pathkeys;
}
}
/* complain if we didn't eat exactly the right number of sort cols */
if (scidx != numSortCols)
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_INVALID_OPERATION),
(errmsg("failed to deconstruct sort operators into partitioning/ordering operators"))));
}
}
/*
* expression_planner
* Perform planner's transformations on a standalone expression.
*
* Various utility commands need to evaluate expressions that are not part
* of a plannable query. They can do so using the executor's regular
* expression-execution machinery, but first the expression has to be fed
* through here to transform it from parser output to something executable.
*
* Currently, we disallow sublinks in standalone expressions, so there's no
* real "planning" involved here. (That might not always be true though.)
* What we must do is run eval_const_expressions to ensure that any function
* calls are converted to positional notation and function default arguments
* get inserted. The fact that constant subexpressions get simplified is a
* side-effect that is useful when the expression will get evaluated more than
* once. Also, we must fix operator function IDs.
*
* Note: this must not make any damaging changes to the passed-in expression
* tree. (It would actually be okay to apply fix_opfuncids to it, but since
* we first do an expression_tree_mutator-based walk, what is returned will
* be a new node tree.)
*/
Expr* expression_planner(Expr* expr)
{
Node* result = NULL;
/*
* Convert named-argument function calls, insert default arguments and
* simplify constant subexprs
*/
result = eval_const_expressions(NULL, (Node*)expr);
/* Fill in opfuncid values if missing */
fix_opfuncids(result);
return (Expr*)result;
}
/*
* plan_cluster_use_sort
* Use the planner to decide how CLUSTER should implement sorting
*
* tableOid is the OID of a table to be clustered on its index indexOid
* (which is already known to be a btree index). Decide whether it's
* cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
* Return TRUE to use sorting, FALSE to use an indexscan.
*
* Note: caller had better already hold some type of lock on the table.
*/
bool plan_cluster_use_sort(Oid tableOid, Oid indexOid)
{
PlannerInfo* root = NULL;
Query* query = NULL;
PlannerGlobal* glob = NULL;
RangeTblEntry* rte = NULL;
RelOptInfo* rel = NULL;
IndexOptInfo* indexInfo = NULL;
QualCost indexExprCost;
Cost comparisonCost;
Path* seqScanPath = NULL;
Path seqScanAndSortPath;
IndexPath* indexScanPath = NULL;
ListCell* lc = NULL;
/* Set up mostly-dummy planner state */
query = makeNode(Query);
query->commandType = CMD_SELECT;
glob = makeNode(PlannerGlobal);
root = makeNode(PlannerInfo);
root->parse = query;
root->glob = glob;
root->query_level = 1;
root->planner_cxt = CurrentMemoryContext;
root->wt_param_id = -1;
/* Build a minimal RTE for the rel */
rte = makeNode(RangeTblEntry);
rte->rtekind = RTE_RELATION;
rte->relid = tableOid;
rte->relkind = RELKIND_RELATION;
rte->lateral = false;
rte->inh = false;
rte->inFromCl = true;
query->rtable = list_make1(rte);
/* Set up RTE/RelOptInfo arrays */
setup_simple_rel_arrays(root);
/* Build RelOptInfo */
rel = build_simple_rel(root, 1, RELOPT_BASEREL);
/* Locate IndexOptInfo for the target index */
indexInfo = NULL;
foreach (lc, rel->indexlist) {
indexInfo = (IndexOptInfo*)lfirst(lc);
if (indexInfo->indexoid == indexOid)
break;
}
/*
* It's possible that get_relation_info did not generate an IndexOptInfo
* for the desired index; this could happen if it's not yet reached its
* indcheckxmin usability horizon, or if it's a system index and we're
* ignoring system indexes. In such cases we should tell CLUSTER to not
* trust the index contents but use seqscan-and-sort.
*/
if (lc == NULL) /* not in the list? */
return true; /* use sort */
/*
* Rather than doing all the pushups that would be needed to use
* set_baserel_size_estimates, just do a quick hack for rows and width.
*/
rel->rows = rel->tuples;
rel->width = get_relation_data_width(tableOid, InvalidOid, NULL);
root->total_table_pages = rel->pages;
/*
* Determine eval cost of the index expressions, if any. We need to
* charge twice that amount for each tuple comparison that happens during
* the sort, since tuplesort.c will have to re-evaluate the index
* expressions each time. (XXX that's pretty inefficient...)
*/
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
/* Estimate the cost of seq scan + sort */
seqScanPath = create_seqscan_path(root, rel, NULL);
cost_sort(&seqScanAndSortPath,
NIL,
seqScanPath->total_cost,
RELOPTINFO_LOCAL_FIELD(root, rel, rows),
rel->width,
comparisonCost,
u_sess->attr.attr_memory.maintenance_work_mem,
-1.0,
(rel->orientation != REL_ROW_ORIENTED));
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo, NIL, NIL, NIL, NIL, NIL, ForwardScanDirection, false, NULL, NULL, 1.0);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
/*
* @@GaussDB@@
* Target : data partition
* Brief : Use the planner to decide how CLUSTER should implement sorting
* Description :
* Notes : caller had better already hold some type of lock on the table.
* Input :
* Output : Return TRUE to use sorting, FALSE to use an indexscan.
*/
bool planClusterPartitionUseSort(Relation partRel, Oid indexOid, PlannerInfo* root, RelOptInfo* relOptInfo)
{
IndexOptInfo* indexInfo = NULL;
QualCost indexExprCost;
Cost comparisonCost = 0;
Path* seqScanPath = NULL;
Path seqScanAndSortPath;
IndexPath* indexScanPath = NULL;
ListCell* lc = NULL;
/* Locate IndexOptInfo for the target index */
indexInfo = NULL;
foreach (lc, relOptInfo->indexlist) {
indexInfo = (IndexOptInfo*)lfirst(lc);
if (indexInfo->indexoid == indexOid) {
break;
}
}
/*
* It's possible that get_relation_info did not generate an IndexOptInfo
* for the desired index; this could happen if it's not yet reached its
* indcheckxmin usability horizon, or if it's a system index and we're
* ignoring system indexes. In such cases we should tell CLUSTER to not
* trust the index contents but use seqscan-and-sort.
*/
if (lc == NULL) { /* not in the list? */
return true; /* use sort */
}
/*
* Determine eval cost of the index expressions, if any. We need to
* charge twice that amount for each tuple comparison that happens during
* the sort, since tuplesort.c will have to re-evaluate the index
* expressions each time. (XXX that's pretty inefficient...)
*/
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
/* Estimate the cost of seq scan + sort */
seqScanPath = create_seqscan_path(root, relOptInfo, NULL);
cost_sort(&seqScanAndSortPath,
NIL,
seqScanPath->total_cost,
relOptInfo->tuples,
relOptInfo->width,
comparisonCost,
u_sess->attr.attr_memory.maintenance_work_mem,
-1.0,
(relOptInfo->orientation != REL_ROW_ORIENTED));
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo, NIL, NIL, NIL, NIL, NIL, ForwardScanDirection, false, NULL, NULL, 1.0);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
/* Get aligned hash entry size from Agg width and number of agg functions */
Size get_hash_entry_size(int width, int numAggs)
{
return alloc_trunk_size(MAXALIGN(width) + MAXALIGN(sizeof(MinimalTupleData))) + hash_agg_entry_size(numAggs);
}
#ifdef STREAMPLAN
static bool needs_two_level_groupagg(PlannerInfo* root, Plan* plan, Node* distinct_node, List* distributed_key,
bool* need_redistribute, bool* need_local_redistribute)
{
Query* parse = root->parse;
bool located = false;
bool two_level_sort = false;
double multiple = 1.0;
List* locate_node_list = NIL;
List* group_exprs = get_sortgrouplist_exprs(parse->groupClause, parse->targetList);
bool force_single_group = expression_returns_set((Node*)parse->targetList);
ListCell* lc = NULL;
foreach (lc, group_exprs) {
if (equal(distinct_node, lfirst(lc))) {
located = true;
break;
}
}
list_free_ext(group_exprs);
if (!located) {
List* distinct_node_list = list_make1(distinct_node);
bool needs_redistribute_distinct = needs_agg_stream(root, distinct_node_list, plan->distributed_keys);
/*
* for smp plan, when doing agg we have to do a local redistribute even
* if the agg col is the distribute key
*/
if (!needs_redistribute_distinct && SET_DOP(plan->dop) > 1) {
*need_local_redistribute = true;
}
/* For AGG_PLAIN case, and no redistribution on count(distinct) expr case, we can directly judge two_level_sort
* is needed. Else, if distinct number of group by clause is too small to give work to all dn, we can do
* two_level_sort to get better performance */
if (!needs_redistribute_distinct) {
if ((!parse->groupClause || distributed_key) && !force_single_group) {
two_level_sort = true;
}
} else if (!parse->groupClause) {
locate_node_list = get_distributekey_from_tlist(root, NIL, distinct_node_list, plan->plan_rows, &multiple);
if (locate_node_list != NIL && !force_single_group) {
AssertEreport(list_member(locate_node_list, distinct_node),
MOD_OPT,
"The distinct node is not a member of the locate node list");
list_free_ext(locate_node_list);
two_level_sort = true;
*need_redistribute = true;
}
} else {
List* groupby_node_list = NIL;
double groupby_multiple = 1.0;
groupby_node_list =
get_distributekey_from_tlist(root, NIL, distributed_key, plan->plan_rows, &groupby_multiple);
if (groupby_node_list != NIL && groupby_multiple > 1) {
locate_node_list =
get_distributekey_from_tlist(root, NIL, distinct_node_list, plan->plan_rows, &multiple);
if (locate_node_list != NIL && multiple < groupby_multiple && !force_single_group) {
AssertEreport(list_member(locate_node_list, distinct_node),
MOD_OPT,
"The distinct node is not a member of the locate node list");
two_level_sort = true;
*need_redistribute = true;
}
list_free_ext(locate_node_list);
}
list_free_ext(groupby_node_list);
}
list_free_ext(distinct_node_list);
}
return two_level_sort;
}
#ifdef ENABLE_MULTIPLE_NODES
static List* append_distribute_var_list(List* varlist, Node* tlist_node)
{
ListCell* lc = NULL;
ListCell* lc2 = NULL;
List* expr_varlist =
pull_var_clause(tlist_node, PVC_INCLUDE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_INCLUDE_SPECIAL_EXPR);
List* resultlist = NIL;
foreach (lc, expr_varlist) {
Node* node = (Node*)lfirst(lc);
if (IsA(node, Var))
varlist = list_append_unique(varlist, node);
else {
lc2 = NULL;
AssertEreport(IsA(node, EstSPNode), MOD_OPT, "invalid node type.");
foreach (lc2, varlist) {
Node* tmp = (Node*)lfirst(lc2);
if (IsA(tmp, Var)) {
if (list_member(((EstSPNode*)node)->varlist, tmp))
break;
} else {
AssertEreport(IsA(tmp, EstSPNode), MOD_OPT, "invalid node type.");
if (((resultlist = list_intersection(((EstSPNode*)node)->varlist, ((EstSPNode*)tmp)->varlist))) !=
NIL) {
list_free_ext(resultlist);
break;
}
}
}
if (lc2 == NULL)
varlist = lappend(varlist, node);
else
list_free_ext(((EstSPNode*)node)->varlist);
}
}
list_free_ext(expr_varlist);
return varlist;
}
#endif
/*
* @Description: Generate top agg.
* @in root: Per-query information for planning/optimization.
* @in tlist: Targetlist.
* @in agg_plan: Under agg_plan.
* @in stream_plan: Under stream plan.
* @in agg_orientation: Agg strategy.
*/
static Plan* mark_top_agg(
PlannerInfo* root, List* tlist, Plan* agg_plan, Plan* stream_plan, AggOrientation agg_orientation)
{
Plan* top_node = NULL;
Plan* leftchild = agg_plan->lefttree;
Plan* sub_plan = NULL;
agg_plan->lefttree = NULL;
if (((Agg*)agg_plan)->aggstrategy == AGG_SORTED) {
AssertEreport(
agg_orientation != DISTINCT_INTENT, MOD_OPT, "unexpected aggregate orientation when generating top agg.");
AttrNumber* groupColIdx = (AttrNumber*)palloc(list_length(root->parse->groupClause) * sizeof(AttrNumber));
locate_grouping_columns(root, agg_plan->targetlist, ((Plan*)stream_plan)->targetlist, groupColIdx);
sub_plan = (Plan*)make_sort_from_groupcols(root, root->parse->groupClause, groupColIdx, (Plan*)stream_plan);
inherit_plan_locator_info(sub_plan, stream_plan);
#ifdef ENABLE_MULTIPLE_NODES
if (IsA(stream_plan, RemoteQuery)) {
((RemoteQuery*)stream_plan)->mergesort_required = true;
sub_plan->plan_rows = PLAN_LOCAL_ROWS(sub_plan);
} else {
AssertEreport(IsA(stream_plan, Stream), MOD_OPT, "unexpected node type when generating top agg.");
((Stream*)stream_plan)->is_sorted = true;
}
#else
if (IsA(stream_plan, Stream)) {
((Stream*)stream_plan)->is_sorted = true;
}
#endif
} else
sub_plan = (Plan*)stream_plan;
top_node = (Plan*)copyObject(agg_plan);
/* remove the skew opt from low layer agg, we only display the flag on top agg. */
((Agg*)agg_plan)->skew_optimize = SKEW_RES_NONE;
// restore the lefttree pointer of original plan
/* The having qual of second agg node is copied from first agg and has been processed to second agg expression.
* Remove having qual for the first aggregation.
*/
agg_plan->qual = NIL;
agg_plan->lefttree = leftchild;
top_node->startup_cost = sub_plan->startup_cost;
top_node->total_cost = sub_plan->total_cost;
top_node->lefttree = sub_plan;
top_node->targetlist = tlist;
/* Set smp info. */
if (IsA(stream_plan, Stream)) {
top_node->dop = stream_plan->dop;
} else {
top_node->dop = 1;
}
if (IsA(stream_plan, RemoteQuery))
top_node->plan_rows = PLAN_LOCAL_ROWS(top_node);
inherit_plan_locator_info(top_node, top_node->lefttree);
if (IsA(top_node, Agg)) {
Agg* agg_node = (Agg*)top_node;
if (agg_orientation != DISTINCT_INTENT && root->parse->groupClause != NIL)
locate_grouping_columns(root, top_node->targetlist, top_node->lefttree->targetlist, agg_node->grpColIdx);
agg_node->is_final = true;
if (IsA(stream_plan, RemoteQuery))
agg_node->numGroups = (long)Min(top_node->plan_rows, (double)LONG_MAX);
}
return (Plan*)top_node;
}
static Plan* mark_agg_stream(PlannerInfo* root, List* tlist, Plan* plan, List* group_or_distinct_cls,
AggOrientation agg_orientation, bool* has_second_agg_sort)
{
Plan* streamplan = NULL;
bool subplan_exec_on_dn = false;
if (plan == NULL || !IsA(plan, Agg) || is_execute_on_coordinator(plan) || is_execute_on_allnodes(plan))
return plan;
*has_second_agg_sort = false;
subplan_exec_on_dn = check_subplan_exec_datanode(root, (Node*)plan->qual);
if (root->query_level == 1 && !subplan_exec_on_dn) {
streamplan = make_simple_RemoteQuery(plan, root, false);
} else { // subquery
List* distribute_keys = NIL;
double multiple = 0.0;
distribute_keys = get_optimal_distribute_key(root, group_or_distinct_cls, plan, &multiple);
Distribution* distribution = (distribute_keys == NULL)
? ng_get_correlated_subplan_group_distribution() : ng_get_dest_distribution(plan);
streamplan = make_stream_plan(root, plan, distribute_keys, multiple, distribution);
AssertEreport(streamplan->exec_nodes != NULL,
MOD_OPT,
"The list of datanodes where to execute stream plan is empty when generating top agg.");
/* Handle of parallelism of plain agg when we create broadcast for it. */
Stream* stream = (Stream*)streamplan;
if (stream->type == STREAM_BROADCAST) {
stream->smpDesc.consumerDop = 1;
streamplan->dop = 1;
}
}
#ifdef ENABLE_MULTIPLE_NODES
if (plan == streamplan) {
// in some cases, there is no new stream node added (such as in called by add_remote_subplan)
// if that's case, nothing else should be done. otherwise, we may create agg over agg operator
// which is unnecessary and incorrrect for some agg function such as (avg)
return plan;
}
#endif
if (((Agg*)plan)->aggstrategy == AGG_SORTED)
*has_second_agg_sort = true;
return mark_top_agg(root, tlist, plan, streamplan, agg_orientation);
}
static Plan* mark_group_stream(PlannerInfo* root, List* tlist, Plan* result_plan)
{
Plan* streamplan = NULL;
Plan* bottom_node = NULL;
Plan* top_node = NULL;
Plan* leftchild = NULL;
Plan* sort_node = NULL;
List* groupClause = NIL;
AttrNumber* groupColIdx = NULL;
bool subplan_exec_on_dn = false;
AssertEreport(
result_plan && IsA(result_plan, Group), MOD_OPT, "The result plan is NULL or its type is not T_Group.");
groupClause = root->parse->groupClause;
groupColIdx = (AttrNumber*)palloc(sizeof(AttrNumber) * list_length(groupClause));
locate_grouping_columns(root, result_plan->targetlist, result_plan->targetlist, groupColIdx);
/*
* If the qual contains subplan exec on DN, group + gather + group will cause
* the query unshippable(see finalize_node_id), we should never gen such plan.
*/
subplan_exec_on_dn = subplan_exec_on_dn || check_subplan_exec_datanode(root, (Node*)result_plan->qual);
if (!equal(tlist, result_plan->targetlist)) {
subplan_exec_on_dn = subplan_exec_on_dn || check_subplan_exec_datanode(root, (Node*)tlist);
}
if (root->query_level == 1 && !subplan_exec_on_dn)
streamplan = make_simple_RemoteQuery(result_plan, root, false);
else {
double multiple = 0.0;
List* groupcls = root->parse->groupClause;
List* distribute_keys = NIL;
distribute_keys = get_optimal_distribute_key(root, groupcls, result_plan, &multiple);
streamplan = make_stream_plan(root, result_plan, distribute_keys, multiple);
}
sort_node = (Plan*)make_sort_from_groupcols(root, groupClause, groupColIdx, streamplan);
bottom_node = sort_node;
leftchild = result_plan->lefttree;
result_plan->lefttree = NULL;
top_node = (Plan*)copyObject(result_plan);
/* If the sub group by is parallelized, and the top group is above gather. */
if (root->query_level == 1 && !subplan_exec_on_dn)
top_node->dop = 1;
result_plan->qual = NIL;
result_plan->lefttree = leftchild;
top_node->lefttree = bottom_node;
top_node->targetlist = tlist;
inherit_plan_locator_info(top_node, top_node->lefttree);
((Group*)top_node)->grpColIdx = groupColIdx;
return top_node;
}
/*
* @Description: Check this winagg node if include other winfuns expect rank and row_number.
* @in wfc: windows function.
* @return: Return true if include other windows fun besides rank and row_number else return false.
*/
static bool contain_other_windowfuncs(List* tlist, WindowFunc* wfc)
{
ListCell* cell = NULL;
foreach (cell, tlist) {
TargetEntry* tle = (TargetEntry*)lfirst(cell);
Expr* expr = tle->expr;
if (IsA(expr, WindowFunc)) {
WindowFunc* winFun = (WindowFunc*)expr;
/* If this winFun and wfc be computed in same node and is not rank or row_number return true.*/
if (winFun->winref == wfc->winref && winFun->winfnoid != ROWNUMBERFUNCOID &&
winFun->winfnoid != RANKFUNCOID) {
return true;
}
}
}
return false;
}
/*
* @Description: Build lower windows agg
* @in root: Per-query information for planning/optimization.
* @in plan: windows agg plan.
* @in wc: windows agg clause.
* @return: If build lower plan return this plan else return NULL.
*/
static Plan* build_lower_winagg_plan(PlannerInfo* root, Plan* plan, WindowClause* wc, List* partitionExprs)
{
Plan* bottomPlan = NULL;
if (list_length(root->subqueryRestrictInfo) == 1) {
Expr* expr = (Expr*)(((RestrictInfo*)linitial(root->subqueryRestrictInfo))->clause);
if (IsA(expr, OpExpr) && list_length(((OpExpr*)expr)->args) == 2) {
OpExpr* op_expr = (OpExpr*)expr;
Node* arg1 = (Node*)linitial(op_expr->args);
Node* arg2 = (Node*)lsecond(op_expr->args);
/* We only support parameter is const and operator is less than or less equal. */
if (IsA(arg1, Var) && IsA(arg2, Const) && (op_expr->opno == INT84LTOID || op_expr->opno == INT84LEOID)) {
Var* var = (Var*)arg1;
Const* con = (Const*)arg2;
TargetEntry* tge = (TargetEntry*)list_nth(plan->targetlist, var->varattno - 1);
Node* arg = (Node*)tge->expr;
/*
* Only support rank() and row_number() and this node targetlist only can include rank()
* and row_number().
*/
if (IsA(arg, WindowFunc) &&
(((WindowFunc*)arg)->winfnoid == ROWNUMBERFUNCOID || ((WindowFunc*)arg)->winfnoid == RANKFUNCOID) &&
!contain_other_windowfuncs(plan->targetlist, (WindowFunc*)arg)) {
double selec = get_windowagg_selectivity(root,
wc,
(WindowFunc*)arg,
partitionExprs,
DatumGetInt32(con->constvalue),
PLAN_LOCAL_ROWS(plan),
ng_get_dest_num_data_nodes(plan));
/*
* When less than or less equal filtration ratio less than 1/3, we will
* generate two levels winfunc plan.
*/
if (selec < TWOLEVELWINFUNSELECTIVITY) {
Plan* leftree = plan->lefttree;
List* winPlanTarget = plan->targetlist;
List* leftTarget = plan->lefttree->targetlist;
TargetEntry* tle = NULL;
OpExpr* winFunCondition = (OpExpr*)copyObject(expr);
/* This qual's left arg is windowagg's rank or row_number. */
linitial(winFunCondition->args) = copyObject(arg);
/* Copy a windows agg plan as lower windows agg node. */
plan->lefttree = NULL;
plan->targetlist = NIL;
Plan* lowerWindowPlan = (Plan*)copyObject(plan);
/* Lower windows plan's targetlist should be lefttree's targetlist and windows fun. */
lowerWindowPlan->targetlist = (List*)copyObject(leftTarget);
tle = (TargetEntry*)copyObject(tge);
lowerWindowPlan->targetlist = lappend(lowerWindowPlan->targetlist, tle);
tle->resno = list_length(lowerWindowPlan->targetlist);
lowerWindowPlan->lefttree = leftree;
/* Restore plan's targetlist. */
plan->targetlist = winPlanTarget;
bottomPlan = (Plan*)make_result(
root, (List*)copyObject(leftTarget), NULL, lowerWindowPlan, list_make1(winFunCondition));
set_plan_rows(bottomPlan, clamp_row_est(plan->plan_rows * selec));
}
}
}
}
}
return bottomPlan;
}
Distribution* get_windows_best_distribution(Plan* plan)
{
if (!u_sess->attr.attr_sql.enable_dngather || !u_sess->opt_cxt.is_dngather_support) {
return ng_get_dest_distribution(plan);
}
if (plan->plan_rows <= u_sess->attr.attr_sql.dngather_min_rows) {
return ng_get_single_node_distribution();
}
return ng_get_dest_distribution(plan);
}
/*
* @Description: Add stream node under windowAgg node if need.
* @in root: Per-query information for planning/optimization.
* @in plan: windows agg node.
* @in tlist: target list.
* @in wc: current window clause.
* @in pathkeys: current sort pathkeys.
* @in wflists: window function list.
*/
static Plan* mark_windowagg_stream(
PlannerInfo* root, Plan* plan, List* tlist, WindowClause* wc, List* pathkeys, WindowLists* wflists)
{
WindowAgg* wa_plan = NULL;
Plan* resultPlan = plan;
Plan* bottomPlan = NULL;
Plan* lefttree = NULL;
AssertEreport(plan && IsA(plan, WindowAgg), MOD_OPT, "The plan is null or its type is not T_WindowAgg.");
wa_plan = (WindowAgg*)plan;
bottomPlan = plan->lefttree;
if (IsA(bottomPlan, Sort))
bottomPlan = bottomPlan->lefttree;
if (wa_plan->partNumCols > 0) {
AssertEreport(
wa_plan->partColIdx, MOD_OPT, "invalid part column index when adding stream node upder windowAgg node.");
List* partitionExprs = get_sortgrouplist_exprs(wc->partitionClause, tlist);
bool need_stream = needs_agg_stream(root, partitionExprs, plan->distributed_keys, &plan->exec_nodes->distribution);
/*
* Two case we need stream:
* 1. partition keys not in subplan's distribute keys <=> need_stream = true;
* 2. partition keys in subplan's keys but we want to parallelize it.
*/
if (need_stream || plan->dop > 1) {
double multiple = 0.0;
List* bestDistExpr = NIL;
/* For local redistribute in parallelization. */
if (!need_stream)
bestDistExpr = plan->distributed_keys;
else
bestDistExpr = get_optimal_distribute_key(root, partitionExprs, bottomPlan, &multiple);
/* only build two-level plan for the last windowagg */
if (wc == llast(wflists->activeWindows)) {
Plan* lower_plan = build_lower_winagg_plan(root, plan, wc, partitionExprs);
/* Can generate two level windows agg. */
if (lower_plan != NULL) {
bottomPlan = lower_plan;
resultPlan = plan;
}
}
Plan* streamplan = make_stream_plan(root, bottomPlan, bestDistExpr, multiple, get_windows_best_distribution(plan));
if (!need_stream)
((Stream*)streamplan)->smpDesc.distriType = LOCAL_DISTRIBUTE;
/* We need sort node stream after. */
if (pathkeys != NIL) {
lefttree = (Plan*)make_sort_from_pathkeys(root, streamplan, pathkeys, -1.0, true);
} else {
lefttree = streamplan;
}
resultPlan->lefttree = lefttree;
/* Recalculate windowagg cost for two-level plan or plan multiple change */
if (wc == llast(wflists->activeWindows) || lefttree->multiple != plan->multiple) {
Path windowagg_path; /* dummy for result of cost_windowagg */
set_plan_rows(resultPlan, lefttree->plan_rows, lefttree->multiple);
cost_windowagg(&windowagg_path,
root,
wflists->windowFuncs[wa_plan->winref],
wa_plan->partNumCols,
wa_plan->ordNumCols,
lefttree->startup_cost,
lefttree->total_cost,
PLAN_LOCAL_ROWS(lefttree));
plan->startup_cost = windowagg_path.startup_cost;
plan->total_cost = windowagg_path.total_cost;
} else {
/* Add stream and lower_plan cost. */
resultPlan->startup_cost += (lefttree->startup_cost - plan->startup_cost);
resultPlan->total_cost += (lefttree->total_cost - plan->total_cost);
}
inherit_plan_locator_info(resultPlan, lefttree);
} else {
resultPlan = plan;
}
} else { /* Have not partition by */
Plan* gatherPlan = NULL;
Sort* sortPlan = NULL;
SimpleSort* streamSort = NULL;
if (pathkeys != NIL) {
sortPlan = make_sort_from_pathkeys(root, bottomPlan, pathkeys, -1.0);
streamSort = makeNode(SimpleSort);
streamSort->numCols = sortPlan->numCols;
streamSort->sortColIdx = sortPlan->sortColIdx;
streamSort->sortOperators = sortPlan->sortOperators;
streamSort->nullsFirst = sortPlan->nullsFirst;
streamSort->sortToStore = false;
streamSort->sortCollations = sortPlan->collations;
}
/* for plan in sub level, we push it down to dn */
if (root->query_level == 1) {
/*
* If have pathkeys, we can push down Sort to Datanode and then merge partial
* sorted results in RemoteQuery.
*/
if (pathkeys != NIL) {
gatherPlan = make_simple_RemoteQuery((Plan*)sortPlan, root, false);
if (IsA(gatherPlan, RemoteQuery)) {
((RemoteQuery*)gatherPlan)->sort = streamSort;
} else if (IsA(gatherPlan, Stream)) {
((Stream*)gatherPlan)->sort = streamSort;
}
} else {
gatherPlan = make_simple_RemoteQuery(bottomPlan, root, false);
}
} else {
if (((unsigned int)u_sess->attr.attr_sql.cost_param & COST_ALTERNATIVE_MERGESORT) ||
root->is_under_recursive_cte) {
gatherPlan = make_stream_plan(root, bottomPlan, NIL, 1.0);
if (pathkeys != NIL)
gatherPlan = (Plan*)make_sort_from_pathkeys(root, gatherPlan, pathkeys, -1.0);
} else {
bool single_node =
#ifndef ENABLE_MULTIPLE_NODES
plan->dop <= 1 &&
#endif
(bottomPlan->exec_nodes != NULL && list_length(bottomPlan->exec_nodes->nodeList) == 1);
/* If there's a sort, we need it to do merge sort */
if (IsA(plan->lefttree, Sort)) {
/*
* If bottom plan is already run single node and need a global sort, we should
* redistribute to all datanodes to do sort to avoid bottleneck
*/
if (single_node) {
bottomPlan = plan->lefttree->lefttree;
/* Construct group clause using targetlist. */
List* grplist = make_groupcl_for_append(root, bottomPlan->targetlist);
if (grplist != NIL) {
double multiple;
List* distkeys = NIL;
/* Get distkeys according to bias. */
distkeys = get_distributekey_from_tlist(
root, bottomPlan->targetlist, grplist, bottomPlan->plan_rows, &multiple);
/* If distribute key is much less skewed, we use it */
if (distkeys != NIL &&
multiple < u_sess->pgxc_cxt.NumDataNodes * TWOLEVELWINFUNSELECTIVITY) {
bottomPlan = make_stream_plan(root, bottomPlan, distkeys, 1.0);
plan->lefttree->lefttree = bottomPlan;
inherit_plan_locator_info((Plan*)plan->lefttree, bottomPlan);
single_node = false;
}
}
}
bottomPlan = plan->lefttree;
}
/* If there are multiple producers, add a merge sort */
if (!single_node) {
gatherPlan = make_stream_plan(root, bottomPlan, NIL, 1.0);
pick_single_node_plan_for_replication(gatherPlan);
if (pathkeys != NIL)
((Stream*)gatherPlan)->sort = streamSort;
} else
gatherPlan = bottomPlan;
}
}
plan->lefttree = gatherPlan;
inherit_plan_locator_info(plan, gatherPlan);
resultPlan = plan;
}
return resultPlan;
}
static Plan* mark_distinct_stream(
PlannerInfo* root, List* tlist, Plan* plan, List* distinctcls, Index query_level, List* current_pathkeys)
{
Plan* leftchild = NULL;
Plan* bottom_node = NULL;
Plan* top_node = NULL;
Plan* streamplan = NULL;
if (plan == NULL || !IsA(plan, Unique) || is_execute_on_coordinator(plan) || is_execute_on_allnodes(plan))
return plan;
#ifndef ENABLE_MULTIPLE_NODES
/* if on single-node and dop is 1, no need add a stream operator */
if (plan->dop == 1) {
return plan;
}
#endif
if (query_level == 1)
streamplan = make_simple_RemoteQuery(plan, root, false);
else {
List* distribute_keys = NIL;
double multiple = 0.0;
distribute_keys = get_optimal_distribute_key(root, distinctcls, plan, &multiple);
streamplan = make_stream_plan(root, plan, distribute_keys, multiple);
}
if (current_pathkeys != NULL) {
bottom_node = (Plan*)make_sort_from_pathkeys(root, streamplan, current_pathkeys, -1.0);
inherit_plan_locator_info(bottom_node, streamplan);
} else {
bottom_node = streamplan;
}
if (IsA(streamplan, RemoteQuery))
((RemoteQuery*)streamplan)->mergesort_required = true;
else
((Stream*)streamplan)->is_sorted = true;
leftchild = plan->lefttree;
plan->lefttree = NULL;
top_node = (Plan*)copyObject(plan);
// restore the lefttree pointer of original plan
plan->lefttree = leftchild;
top_node->lefttree = bottom_node;
top_node->targetlist = tlist;
inherit_plan_locator_info(top_node, bottom_node);
return top_node;
}
/*
* get_optimal_distribute_key
* For distinct, windowagg, group aggregation, we can't get close estimation, so roughly
* check if matching key can be the optimal distribute key, only if it has less biase than
* actual optimal distribute key
*
* Parameters:
* @in root: plannerinfo struct from current query level
* @in groupClause: group by clause of current query level
* @in plan: plan node to find best distribute key
* @out multiple: skew multiple of optimal distribute key
* Return:
* list of optimal distribute key
*/
static List* get_optimal_distribute_key(PlannerInfo* root, List* groupClause, Plan* plan, double* multiple)
{
double multiple_matched = -1.0;
bool use_skew = true;
bool use_bias = true;
double skew_multiple = 0.0;
double bias_multiple = 0.0;
List* distribute_keys = NIL;
if (root->dis_keys.matching_keys != NIL &&
list_is_subset(root->dis_keys.matching_keys, groupClause)) { /* have a inserting table */
get_multiple_from_exprlist(
root, root->dis_keys.matching_keys, plan->plan_rows, &use_skew, use_bias, &skew_multiple, &bias_multiple);
multiple_matched = Max(skew_multiple, bias_multiple);
if (multiple_matched <= 1.0) {
*multiple = multiple_matched;
distribute_keys = root->dis_keys.matching_keys;
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), (errmsg("matching key distribution is chosen due to no skew."))));
}
}
if (distribute_keys == NIL) {
List* local_distribute_keys = NIL;
local_distribute_keys =
get_distributekey_from_tlist(root, plan->targetlist, groupClause, plan->plan_rows, multiple);
/* Compare match key with local distribute key. */
if (multiple_matched > 0.0 && multiple_matched <= *multiple) {
*multiple = multiple_matched;
distribute_keys = root->dis_keys.matching_keys;
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
(errmsg("matching key distribution is chosen due to no skewer than best distribute key."))));
} else {
distribute_keys = local_distribute_keys;
}
}
return distribute_keys;
}
/*
* @Description: Check if has array operator
*
* @param[IN] plan: plan to check
* @return: bool, true if has
*/
static bool has_array_operator(Plan* plan)
{
if (IsA(plan, Agg)) {
for (int i = 0; i < ((Agg*)plan)->numCols; i++) {
if (((Agg*)plan)->grpOperators[i] == ARRAY_EQ_OP) {
return true;
}
}
}
if (IsA(plan, Group)) {
for (int i = 0; i < ((Group*)plan)->numCols; i++) {
if (((Group*)plan)->grpOperators[i] == ARRAY_EQ_OP) {
return true;
}
}
}
return false;
}
/*
* @Description: Check if has column store relation
*
* @param[IN] top_plan: current plan node
* @return: bool, true if has one
*/
static bool has_column_store_relation(Plan* top_plan)
{
switch (nodeTag(top_plan)) {
/* Node which vec_output is true */
case T_DfsScan:
case T_DfsIndexScan:
case T_CStoreScan:
case T_CStoreIndexScan:
case T_CStoreIndexCtidScan:
case T_CStoreIndexHeapScan:
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScan:
#endif /* ENABLE_MULTIPLE_NODES */
return true;
case T_ForeignScan:
/* If it is column store relation, return true */
if (IsVecOutput(top_plan))
return true;
break;
case T_ExtensiblePlan: {
ExtensiblePlan* ext_plans = (ExtensiblePlan*)top_plan;
ListCell* lc = NULL;
/* If result table is column store, return true */
if (IsVecOutput(top_plan))
return true;
foreach (lc, ext_plans->extensible_plans) {
Plan* plan = (Plan*)lfirst(lc);
if (has_column_store_relation(plan))
return true;
}
} break;
case T_MergeAppend: {
MergeAppend* ma = (MergeAppend*)top_plan;
ListCell* lc = NULL;
foreach (lc, ma->mergeplans) {
Plan* plan = (Plan*)lfirst(lc);
if (has_column_store_relation(plan))
return true;
}
} break;
case T_Append: {
Append* append = (Append*)top_plan;
ListCell* lc = NULL;
foreach (lc, append->appendplans) {
Plan* plan = (Plan*)lfirst(lc);
if (has_column_store_relation(plan))
return true;
}
} break;
case T_ModifyTable: {
ModifyTable* mt = (ModifyTable*)top_plan;
ListCell* lc = NULL;
/* If result table is column store, return true */
if (IsVecOutput(top_plan))
return true;
foreach (lc, mt->plans) {
Plan* plan = (Plan*)lfirst(lc);
if (has_column_store_relation(plan))
return true;
}
} break;
case T_SubqueryScan: {
SubqueryScan* ss = (SubqueryScan*)top_plan;
if (ss->subplan && has_column_store_relation(ss->subplan))
return true;
} break;
case T_BitmapAnd:
case T_CStoreIndexAnd: {
BitmapAnd* ba = (BitmapAnd*)top_plan;
ListCell* lc = NULL;
foreach (lc, ba->bitmapplans) {
Plan* plan = (Plan*)lfirst(lc);
if (has_column_store_relation(plan))
return true;
}
} break;
case T_BitmapOr:
case T_CStoreIndexOr: {
BitmapOr* bo = (BitmapOr*)top_plan;
ListCell* lc = NULL;
foreach (lc, bo->bitmapplans) {
Plan* plan = (Plan*)lfirst(lc);
if (has_column_store_relation(plan))
return true;
}
} break;
case T_IndexScan:
case T_SeqScan: {
if (u_sess->attr.attr_sql.enable_force_vector_engine) {
ListCell* cell = NULL;
TargetEntry* entry = NULL;
Var* var = NULL;
foreach (cell, top_plan->targetlist) {
entry = (TargetEntry*)lfirst(cell);
if (IsA(entry->expr, Var)) {
var = (Var*)entry->expr;
if (var->varattno > 0 && var->varoattno > 0 &&
!IsTypeSupportedByCStore(var->vartype, var->vartypmod)) {
return false;
}
}
}
return true;
}
return false;
} break;
default:
if (outerPlan(top_plan)) {
if (has_column_store_relation(outerPlan(top_plan)))
return true;
}
if (innerPlan(top_plan)) {
if (has_column_store_relation(innerPlan(top_plan)))
return true;
}
break;
}
return false;
}
/*
* @Description: Check if it is vetor scan
*
* @param[IN] plan: current plan node
* @return: bool, true if it is
*/
bool is_vector_scan(Plan* plan)
{
if (plan == NULL) {
return false;
}
switch (nodeTag(plan)) {
case T_CStoreScan:
case T_DfsScan:
case T_DfsIndexScan:
case T_CStoreIndexScan:
case T_CStoreIndexCtidScan:
case T_CStoreIndexHeapScan:
case T_CStoreIndexAnd:
case T_CStoreIndexOr:
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScan:
#endif /* ENABLE_MULTIPLE_NODES */
return true;
case T_PartIterator:
if (is_vector_scan(plan->lefttree))
return true;
break;
default:
break;
}
return false;
}
static bool IsTypeUnSupportedByVectorEngine(Oid typeOid)
{
/* we don't support user defined type. */
if (typeOid >= FirstNormalObjectId) {
return true;
}
for (uint32 i = 0; i < sizeof(VectorEngineUnsupportType) / sizeof(Oid); ++i) {
if (VectorEngineUnsupportType[i] == typeOid) {
return true;
}
}
return false;
}
/*
* @Description: Check if it has unsupport expression in vector engine
*
* @param[IN] node: current expr node
* @return: bool, true if it has
*/
bool vector_engine_unsupport_expression_walker(Node* node)
{
if (node == NULL) {
return false;
}
/* Find the vector engine not support expression */
switch (nodeTag(node)) {
case T_ArrayRef:
case T_AlternativeSubPlan:
case T_FieldSelect:
case T_FieldStore:
case T_ArrayCoerceExpr:
case T_ConvertRowtypeExpr:
case T_ArrayExpr:
case T_RowExpr:
case T_XmlExpr:
case T_CoerceToDomain:
case T_CoerceToDomainValue:
case T_CurrentOfExpr:
return true;
case T_Var: {
Var *var = (Var *)node;
if (var->varattno == InvalidAttrNumber) {
return true;
} else {
return IsTypeUnSupportedByVectorEngine(var->vartype);
}
break;
}
case T_Const: {
Const* c = (Const *)node;
return IsTypeUnSupportedByVectorEngine(c->consttype);
}
case T_Param: {
Param *par = (Param *)node;
return IsTypeUnSupportedByVectorEngine(par->paramtype);
}
default:
break;
}
return expression_tree_walker(node, (bool (*)())vector_engine_unsupport_expression_walker, (void*)NULL);
}
/*
* @Description: Try to generate vectorized plan
*
* @param[IN] top_plan: current plan node
* @param[IN] parse: query tree
* @param[IN] from_subplan: if node from subplan
* @param[IN] subroot: plan root of current subquery plan tree
* @return: Plan*, vectorized plan, fallbacked plan, or leave unchanged
*/
Plan* try_vectorize_plan(Plan* top_plan, Query* parse, bool from_subplan, PlannerInfo* subroot)
{
/* If has no column store relation, just leave unchanged */
if (!has_column_store_relation(top_plan))
return top_plan;
/*
* Fallback to original non-vectorized plan, if either the GUC 'enable_vector_engine'
* is turned off or the plan cannot go through vector_engine_walker.
*/
if (!u_sess->attr.attr_sql.enable_vector_engine || vector_engine_walker(top_plan, from_subplan) ||
(subroot != NULL && subroot->is_under_recursive_tree) ||
(ENABLE_PRED_PUSH_ALL(NULL) || (subroot != NULL && SUBQUERY_PREDPUSH(subroot)))) {
/*
* Distributed Recursive CTE Support
*
* In case of a SubPlan node appears under a recursive CTE's recursive plan
* branch, we don't try vectorization plan, instead we do fallback to just
* add vec2row on top of CStore operators
*
* In the future if we support native-recursive execution e.g. says VecRecursiveUnion,
* then we need revisit this part to lift this restriction
*
*
* We go through fallback_plan to transfer plan to row engine.
* If it's already for row engine, it leaves unchanged
*/
top_plan = fallback_plan(top_plan);
} else {
top_plan = vectorize_plan(top_plan, from_subplan);
if (from_subplan && !IsVecOutput(top_plan))
top_plan = fallback_plan(top_plan);
}
if (IsVecOutput(top_plan))
top_plan = (Plan*)make_vectorow(top_plan);
return top_plan;
}
/*
* @Description: Walk through the expression tree to see if it's supported in Vector Engine
*
* @param[IN] node: points to query tree or expr node
* @param[IN] context: points to a struct that holds whatever context information
* the walker routine needs
* @return: bool, true means unsupported, false means supported
*/
static bool vector_engine_expression_walker(Node* node, DenseRank_context* context)
{
Oid funcOid = InvalidOid;
if (node == NULL)
return false;
if (IsA(node, Aggref)) {
Aggref* aggref = (Aggref*)node;
funcOid = aggref->aggfnoid;
} else if (IsA(node, WindowFunc)) {
WindowFunc* wfunc = (WindowFunc*)node;
funcOid = wfunc->winfnoid;
if (context != NULL) {
if (wfunc->winagg)
context->has_agg = true;
if (funcOid == DENSERANKFUNCOID)
context->has_denserank = true;
}
}
if (funcOid != InvalidOid) {
bool found = false;
/*
* Only ROW_NUMBER, RANK, AVG, COUNT, MAX, MIN and SUM are supported now
* and their func oid must be found in hash table g_instance.vec_func_hash.
*/
(void)hash_search(g_instance.vec_func_hash, &funcOid, HASH_FIND, &found);
/* If not found means that the Agg function is not yet implemented */
if (!found)
return true;
}
return expression_tree_walker(node, (bool (*)())vector_engine_expression_walker, context);
}
/*
* @Description: Walk through the expression tree to see if it's supported in Vector Engine,
* if have set-returning function, then not support.
*
* @param[IN] node: points to query tree or expr node
* @param[IN] context: points to a struct that holds whatever context information
* the walker routine needs
* @return: bool, true means unsupported, false means supported
*/
static bool vector_engine_setfunc_walker(Node* node, DenseRank_context* context)
{
if (node == NULL)
return false;
if (IsA(node, FuncExpr)) {
FuncExpr* expr = (FuncExpr*)node;
if (expr->funcretset == true) {
return true;
}
}
return expression_tree_walker(node, (bool (*)())vector_engine_setfunc_walker, context);
}
/*
* @Description: Walk through the plan tree to see if it's supported in Vector Engine
*
* @param[IN] result_plan: current plan node
* @param[IN] check_rescan: if need check rescan
* @return: bool, true means unsupported, false means supported
*/
static bool vector_engine_walker(Plan* result_plan, bool check_rescan)
{
if (result_plan == NULL)
return false;
/* if have set-returning function, not support. */
if (vector_engine_setfunc_walker((Node*)(result_plan->targetlist), NULL))
return true;
/* check whether there is unsupport expression in vector engine */
if (vector_engine_unsupport_expression_walker((Node*)result_plan->targetlist))
return true;
if (vector_engine_unsupport_expression_walker((Node*)result_plan->qual))
return true;
switch (nodeTag(result_plan)) {
/* Operators below cannot be vectorized */
case T_SeqScan:
if (result_plan->isDeltaTable) {
return false;
}
case T_IndexScan:
if (u_sess->attr.attr_sql.enable_force_vector_engine) {
ListCell* cell = NULL;
TargetEntry* entry = NULL;
Var* var = NULL;
foreach (cell, result_plan->targetlist) {
entry = (TargetEntry*)lfirst(cell);
if (IsA(entry->expr, Var)) {
var = (Var*)entry->expr;
if (var->varattno > 0 && var->varoattno > 0 &&
!IsTypeSupportedByCStore(var->vartype, var->vartypmod)) {
return true;
}
}
}
return false;
}
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
case T_FunctionScan:
case T_CteScan:
case T_LockRows:
case T_MergeAppend:
case T_RecursiveUnion:
return true;
case T_RemoteQuery:
/* ExecReScanVecRemoteQuery is not yet implemented */
if (check_rescan)
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
break;
case T_Stream: {
check_rescan = false;
Stream* sj = (Stream*)result_plan;
if (vector_engine_unsupport_expression_walker((Node*)sj->distribute_keys))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
} break;
case T_Limit: {
Limit* lm = (Limit*)result_plan;
if (vector_engine_unsupport_expression_walker((Node*)lm->limitCount))
return true;
if (vector_engine_unsupport_expression_walker((Node*)lm->limitOffset))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
} break;
case T_BaseResult: {
BaseResult* br = (BaseResult*)result_plan;
if (vector_engine_unsupport_expression_walker((Node*)br->resconstantqual))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
} break;
case T_PartIterator:
case T_SetOp:
case T_Group:
/* Check if contains array operator, not support distrtribute on ARRAY type now */
if (has_array_operator(result_plan))
return true;
case T_Unique:
case T_Material:
case T_Hash:
case T_Sort:
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
break;
case T_Agg: {
/* Check if targetlist contains unsupported feature */
if (vector_engine_expression_walker((Node*)(result_plan->targetlist), NULL))
return true;
/* Check if qual contains unsupported feature */
if (vector_engine_expression_walker((Node*)(result_plan->qual), NULL))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
/* Check if contains array operator, not support distrtribute on ARRAY type now */
if (has_array_operator(result_plan))
return true;
} break;
case T_WindowAgg: {
/* Only default window clause is supported now */
if (((WindowAgg*)result_plan)->frameOptions !=
(FRAMEOPTION_RANGE | FRAMEOPTION_START_UNBOUNDED_PRECEDING | FRAMEOPTION_END_CURRENT_ROW))
return true;
/* Check if targetlist contains unsupported feature */
DenseRank_context context;
context.has_agg = false;
context.has_denserank = false;
if (vector_engine_expression_walker((Node*)(result_plan->targetlist), &context))
return true;
/* Only single denserank is supported now */
if (context.has_agg && context.has_denserank)
return true;
/*
* WindowAgg nodes never have quals, since they can only occur at the
* logical top level of a query (ie, after any WHERE or HAVING filters)
*/
WindowAgg* wa = (WindowAgg*)result_plan;
if (vector_engine_unsupport_expression_walker((Node*)wa->startOffset))
return true;
if (vector_engine_unsupport_expression_walker((Node*)wa->endOffset))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
} break;
case T_MergeJoin: {
MergeJoin* mj = (MergeJoin*)result_plan;
if (vector_engine_unsupport_expression_walker((Node*)mj->mergeclauses))
return true;
/* Find unsupport expr *Join* clause */
if (vector_engine_unsupport_expression_walker((Node*)mj->join.joinqual))
return true;
if (vector_engine_unsupport_expression_walker((Node*)mj->join.nulleqqual))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
if (vector_engine_walker(result_plan->righttree, check_rescan))
return true;
} break;
case T_NestLoop: {
NestLoop* nl = (NestLoop*)result_plan;
/* Find unsupport expr in *Join* clause */
if (vector_engine_unsupport_expression_walker((Node*)nl->join.joinqual))
return true;
if (vector_engine_unsupport_expression_walker((Node*)nl->join.nulleqqual))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
if (IsA(result_plan->righttree, Material) && result_plan->righttree->allParam == NULL)
check_rescan = false;
else
check_rescan = true;
if (vector_engine_walker(result_plan->righttree, check_rescan))
return true;
} break;
case T_HashJoin: {
/* Vector Hash Full Join is not yet implemented */
Join* j = (Join*)result_plan;
if (j->jointype == JOIN_FULL)
return true;
HashJoin* hj = (HashJoin*)result_plan;
/* Find unsupport expr in *Hash* clause */
if (vector_engine_unsupport_expression_walker((Node*)hj->hashclauses))
return true;
/* Find unsupport expr in *Join* clause */
if (vector_engine_unsupport_expression_walker((Node*)hj->join.joinqual))
return true;
if (vector_engine_unsupport_expression_walker((Node*)hj->join.nulleqqual))
return true;
if (vector_engine_walker(result_plan->lefttree, check_rescan))
return true;
if (vector_engine_walker(result_plan->righttree, check_rescan))
return true;
} break;
case T_Append: {
Append* append = (Append*)result_plan;
ListCell* lc = NULL;
foreach (lc, append->appendplans) {
Plan* plan = (Plan*)lfirst(lc);
if (vector_engine_walker(plan, check_rescan))
return true;
}
} break;
case T_ModifyTable: {
ModifyTable* mt = (ModifyTable*)result_plan;
ListCell* lc = NULL;
foreach (lc, mt->plans) {
Plan* plan = (Plan*)lfirst(lc);
if (vector_engine_walker(plan, check_rescan))
return true;
}
} break;
case T_SubqueryScan: {
SubqueryScan* ss = (SubqueryScan*)result_plan;
if (ss->subplan && vector_engine_walker(ss->subplan, check_rescan))
return true;
} break;
case T_ForeignScan: {
ForeignScan* fscan = (ForeignScan*)result_plan;
if (IsSpecifiedFDWFromRelid(fscan->scan_relid, GC_FDW) ||
IsSpecifiedFDWFromRelid(fscan->scan_relid, LOG_FDW)) {
result_plan->vec_output = false;
return true;
}
} break;
case T_ExtensiblePlan: {
ExtensiblePlan* ext_plan = (ExtensiblePlan*)result_plan;
ListCell* lc = NULL;
foreach (lc, ext_plan->extensible_plans) {
Plan* plan = (Plan*)lfirst(lc);
if (vector_engine_walker(plan, check_rescan))
return true;
}
} break;
default:
break;
}
return false;
}
/*
* @Description: Fallback plan, generate hybrid row-column plan
*
* @param[IN] result_plan: current plan node
* @return: Plan*, fallbacked plan
*/
static Plan* fallback_plan(Plan* result_plan)
{
if (result_plan == NULL)
return NULL;
switch (nodeTag(result_plan)) {
/* Add Row Adapter */
case T_CStoreScan:
case T_DfsScan:
case T_DfsIndexScan:
case T_CStoreIndexScan:
case T_CStoreIndexHeapScan:
case T_CStoreIndexCtidScan:
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScan:
#endif /* ENABLE_MULTIPLE_NODES */
result_plan = (Plan*)make_vectorow(build_vector_plan(result_plan));
break;
/* vec_output was set to 'true' initially, change to 'false' in row plan */
case T_ForeignScan:
result_plan->vec_output = false;
break;
case T_ExtensiblePlan: {
ListCell* lc = NULL;
ExtensiblePlan* ext_plans = (ExtensiblePlan*) result_plan;
foreach (lc, ext_plans->extensible_plans) {
Plan* plan = (Plan*)lfirst(lc);
plan = (Plan*)fallback_plan(plan);
if (IsVecOutput(plan)) {
plan = (Plan*)make_vectorow(plan);
}
lfirst(lc) = plan;
}
} break;
case T_RemoteQuery:
if (!IsVecOutput(result_plan) && IsVecOutput(result_plan->lefttree) &&
IsA(result_plan->lefttree, ModifyTable)) {
result_plan->type = T_VecRemoteQuery;
result_plan->vec_output = true;
result_plan = (Plan*)make_vectorow(result_plan);
}
result_plan->lefttree = fallback_plan(result_plan->lefttree);
break;
case T_Limit:
case T_PartIterator:
case T_SetOp:
case T_Group:
case T_Unique:
case T_BaseResult:
case T_Sort:
case T_Stream:
case T_Material:
case T_WindowAgg:
case T_Hash:
case T_Agg:
case T_RowToVec:
case T_VecRemoteQuery:
result_plan->lefttree = fallback_plan(result_plan->lefttree);
break;
case T_MergeJoin:
case T_NestLoop:
case T_HashJoin:
case T_RecursiveUnion:
result_plan->lefttree = fallback_plan(result_plan->lefttree);
result_plan->righttree = fallback_plan(result_plan->righttree);
break;
case T_Append: {
Append* append = (Append*)result_plan;
ListCell* lc = NULL;
foreach (lc, append->appendplans) {
Plan* plan = (Plan*)lfirst(lc);
plan = (Plan*)fallback_plan(plan);
if (IsVecOutput(plan)) {
plan = (Plan*)make_vectorow(plan);
}
lfirst(lc) = plan;
}
} break;
case T_ModifyTable: {
ModifyTable* mt = (ModifyTable*)result_plan;
ListCell* lc = NULL;
foreach (lc, mt->plans) {
Plan* plan = (Plan*)lfirst(lc);
if (IsVecOutput(result_plan)) {
result_plan->type = T_VecModifyTable;
if (!IsVecOutput(plan))
lfirst(lc) = (Plan*)fallback_plan((Plan*)make_rowtovec(plan));
else if (IsA(plan, CStoreScan) || IsA(plan, CStoreIndexScan))
break;
} else
lfirst(lc) = (Plan*)fallback_plan(plan);
}
} break;
case T_SubqueryScan: {
SubqueryScan* ss = (SubqueryScan*)result_plan;
if (ss->subplan)
ss->subplan = (Plan*)fallback_plan(ss->subplan);
} break;
case T_MergeAppend: {
MergeAppend* ma = (MergeAppend*)result_plan;
ListCell* lc = NULL;
foreach (lc, ma->mergeplans) {
Plan* plan = (Plan*)lfirst(lc);
lfirst(lc) = (Plan*)fallback_plan(plan);
}
} break;
case T_SeqScan: {
((SeqScan*)result_plan)->executeBatch = false;
result_plan->vec_output = false;
} break;
default:
break;
}
return result_plan;
}
/*
* @Description: Generate vectorized plan
*
* @param[IN] result_plan: current plan node
* @param[IN] ignore_remotequery: if ignore RemoteQuery node
* @return: Plan*, vectorized plan
*/
Plan* vectorize_plan(Plan* result_plan, bool ignore_remotequery)
{
if (result_plan == NULL)
return NULL;
switch (nodeTag(result_plan)) {
/*
* For Scan node, just leave it.
*/
case T_DfsScan:
case T_DfsIndexScan:
case T_CStoreScan:
case T_CStoreIndexCtidScan:
case T_CStoreIndexHeapScan:
case T_CStoreIndexScan:
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScan:
#endif /* ENABLE_MULTIPLE_NODES */
result_plan = build_vector_plan(result_plan);
break;
case T_ForeignScan:
if (IsVecOutput(result_plan))
return build_vector_plan(result_plan);
break;
case T_ExtensiblePlan:
{
ExtensiblePlan* ext_plans = (ExtensiblePlan*)result_plan;
ListCell* lc = NULL;
List* newPlans = NIL;
foreach (lc, ext_plans->extensible_plans) {
Plan* plan = (Plan*)lfirst(lc);
lfirst(lc) = vectorize_plan(plan, ignore_remotequery);
if (IsVecOutput(result_plan) &&
!IsVecOutput(plan)) {
if (IsA(plan, ForeignScan)) {
build_vector_plan(plan);
} else {
plan = (Plan*)make_rowtovec(plan);
}
} else if (!IsVecOutput(result_plan) && IsVecOutput(plan))
plan = (Plan*)make_vectorow(plan);
newPlans = lappend(newPlans, plan);
}
ext_plans->extensible_plans = newPlans;
if (IsVecOutput(result_plan)) {
build_vector_plan(result_plan);
}
break;
}
case T_SeqScan: {
if (result_plan->isDeltaTable || u_sess->attr.attr_sql.enable_force_vector_engine) {
((SeqScan*)result_plan)->executeBatch = true;
result_plan->vec_output = true;
}
break;
}
case T_IndexScan: {
if (u_sess->attr.attr_sql.enable_force_vector_engine) {
result_plan = (Plan*)make_rowtovec(result_plan);
}
break;
}
case T_ValuesScan: {
result_plan = (Plan*)make_rowtovec(result_plan);
} break;
/*
* For those node that support vectorize, build vector node if child is
* vector or enable_force_vector_engine.
*/
case T_RemoteQuery:
if (ignore_remotequery)
return result_plan;
case T_Limit:
case T_PartIterator:
case T_SetOp:
case T_Group:
case T_Unique:
case T_BaseResult:
case T_Sort:
case T_Stream:
case T_Material:
case T_WindowAgg:
result_plan->lefttree = vectorize_plan(result_plan->lefttree, ignore_remotequery);
if (result_plan->lefttree && IsVecOutput(result_plan->lefttree))
return build_vector_plan(result_plan);
else if ((result_plan->lefttree && !IsVecOutput(result_plan->lefttree)) &&
u_sess->attr.attr_sql.enable_force_vector_engine) {
result_plan->lefttree = (Plan*)make_rowtovec(result_plan->lefttree);
return build_vector_plan(result_plan);
} else if (IsA(result_plan, BaseResult) && result_plan->lefttree == NULL) {
make_dummy_targetlist(result_plan);
result_plan = (Plan*)make_rowtovec(result_plan);
return result_plan;
}
break;
case T_MergeJoin:
case T_NestLoop:
result_plan->lefttree = vectorize_plan(result_plan->lefttree, ignore_remotequery);
result_plan->righttree = vectorize_plan(result_plan->righttree, ignore_remotequery);
if (IsVecOutput(result_plan->lefttree) && IsVecOutput(result_plan->righttree)) {
return build_vector_plan(result_plan);
}
if (u_sess->attr.attr_sql.enable_force_vector_engine) {
if (!IsVecOutput(result_plan->lefttree))
result_plan->lefttree = (Plan*)make_rowtovec(result_plan->lefttree);
if (!IsVecOutput(result_plan->righttree))
result_plan->righttree = (Plan*)make_rowtovec(result_plan->righttree);
return build_vector_plan(result_plan);
} else {
if (IsVecOutput(result_plan->lefttree))
result_plan->lefttree = (Plan*)make_vectorow(result_plan->lefttree);
if (IsVecOutput(result_plan->righttree))
result_plan->righttree = (Plan*)make_vectorow(result_plan->righttree);
return result_plan;
}
/*
* For those node with only child node that support vectorize, we just mark the vector flag
* according to its child node flag.
*/
case T_Hash:
break;
case T_Agg: {
result_plan->lefttree = vectorize_plan(result_plan->lefttree, ignore_remotequery);
if (IsVecOutput(result_plan->lefttree))
return build_vector_plan(result_plan);
} break;
/*
* For those node with only two nodes that support vectorize, we try to go vector.
*/
case T_HashJoin: {
/* HashJoin supports vector right now */
result_plan->lefttree = vectorize_plan(result_plan->lefttree, ignore_remotequery);
result_plan->righttree->lefttree = vectorize_plan(result_plan->righttree->lefttree, ignore_remotequery);
if (IsVecOutput(result_plan->lefttree) && IsVecOutput(result_plan->righttree->lefttree)) {
/* Remove hash node */
result_plan->righttree = result_plan->righttree->lefttree;
return build_vector_plan(result_plan);
} else {
if (IsVecOutput(result_plan->lefttree))
result_plan->lefttree = (Plan*)make_vectorow(result_plan->lefttree);
if (IsVecOutput(result_plan->righttree->lefttree))
result_plan->righttree->lefttree = (Plan*)make_vectorow(result_plan->righttree->lefttree);
}
} break;
case T_Append: {
Append* append = (Append*)result_plan;
ListCell* lc = NULL;
bool isVec = true;
foreach (lc, append->appendplans) {
Plan* plan = (Plan*)lfirst(lc);
plan = vectorize_plan(plan, ignore_remotequery);
lfirst(lc) = plan;
if (!IsVecOutput(plan)) {
if (u_sess->attr.attr_sql.enable_force_vector_engine)
lfirst(lc) = (Plan*)make_rowtovec(plan);
isVec = false;
}
}
if (isVec == true || u_sess->attr.attr_sql.enable_force_vector_engine) {
return build_vector_plan(result_plan);
} else {
foreach (lc, append->appendplans) {
Plan* plan = (Plan*)lfirst(lc);
if (IsVecOutput(plan)) {
lfirst(lc) = (Plan*)make_vectorow(plan);
}
}
return result_plan;
}
} break;
case T_ModifyTable:
/* ModifyTable doesn't support vector right now */
{
ModifyTable* mt = (ModifyTable*)result_plan;
ListCell* lc = NULL;
List* newPlans = NIL;
foreach (lc, mt->plans) {
Plan* plan = (Plan*)lfirst(lc);
lfirst(lc) = vectorize_plan(plan, ignore_remotequery);
if (IsVecOutput(result_plan) &&
!IsVecOutput(plan)) { // If we support vectorize ModifyTable, please remove it
if (IsA(plan, ForeignScan)) {
build_vector_plan(plan);
} else {
plan = (Plan*)make_rowtovec(plan);
}
} else if (!IsVecOutput(result_plan) && IsVecOutput(plan))
plan = (Plan*)make_vectorow(plan);
newPlans = lappend(newPlans, plan);
}
mt->plans = newPlans;
if (IsVecOutput(result_plan)) {
build_vector_plan(result_plan);
}
break;
}
case T_SubqueryScan:
/* SubqueryScan supports vector right now */
{
SubqueryScan* ss = (SubqueryScan*)result_plan;
if (ss->subplan)
ss->subplan = vectorize_plan(ss->subplan, ignore_remotequery);
if (IsVecOutput(ss->subplan)) { // If we support vectorize ModifyTable, please remove it
build_vector_plan(result_plan);
}
}
break;
default:
break;
}
return result_plan;
}
/*
* @Description: Generate vectorized plan
*
* @param[IN] result_plan: current plan node
* @return: Plan*, vectorized plan node
*/
static Plan* build_vector_plan(Plan* plan)
{
make_dummy_targetlist(plan);
plan->vec_output = true;
/*
* For nodetype T_CStoreIndexHeapScan/T_CStoreIndexCtidScan/
* T_CStoreIndexAnd/T_CStoreIndexOr, we have dealed with colstore
* case in create_scan_plan.
*
*/
switch (nodeTag(plan)) {
case T_NestLoop:
plan->type = T_VecNestLoop;
break;
case T_MergeJoin:
plan->type = T_VecMergeJoin;
break;
case T_WindowAgg:
plan->type = T_VecWindowAgg;
break;
case T_Limit:
plan->type = T_VecLimit;
break;
case T_Agg:
plan->type = T_VecAgg;
break;
case T_DfsScan:
case T_DfsIndexScan:
case T_CStoreScan:
case T_CStoreIndexCtidScan:
case T_CStoreIndexHeapScan:
case T_CStoreIndexScan:
#ifdef ENABLE_MULTIPLE_NODES
case T_TsStoreScan:
#endif /* ENABLE_MULTIPLE_NODES */
break;
case T_Hash: // we should remove hash node in the vector plan
break;
case T_HashJoin:
plan->type = T_VecHashJoin;
break;
case T_RemoteQuery:
plan->type = T_VecRemoteQuery;
break;
case T_Stream:
plan->type = T_VecStream;
break;
case T_SubqueryScan:
plan->type = T_VecSubqueryScan;
break;
case T_BaseResult:
plan->type = T_VecResult;
break;
case T_PartIterator:
plan->type = T_VecPartIterator;
break;
case T_ForeignScan:
plan->type = T_VecForeignScan;
break;
case T_Append:
plan->type = T_VecAppend;
break;
case T_Group:
plan->type = T_VecGroup;
break;
case T_Unique:
plan->type = T_VecUnique;
break;
case T_SetOp:
plan->type = T_VecSetOp;
break;
case T_ModifyTable:
plan->type = T_VecModifyTable;
break;
case T_Sort:
plan->type = T_VecSort;
break;
case T_Material:
plan->type = T_VecMaterial;
break;
default:
plan->vec_output = false;
break;
}
return plan;
}
/*
* cost_agg_convert_to_path
* convert subplan to path before we calculate agg cost of each hash agg method
*
* @param (in) plan:
* the sub-plan
*
* @return:
* the converted path
*/
static Path* cost_agg_convert_to_path(Plan* plan)
{
Path* path = makeNode(Path);
path->type = T_Path;
path->pathtype = plan->type;
/*
* This distribution is used for cost estimation,
* we should get it from exec nodes (not data nodes).
*/
ExecNodes* exec_nodes = ng_get_dest_execnodes(plan);
Distribution* distribution = ng_convert_to_distribution(exec_nodes);
ng_set_distribution(&path->distribution, distribution);
path->locator_type = ng_get_dest_locator_type(plan);
path->distribute_keys = ng_get_dest_distribute_keys(plan);
if (IsLocatorReplicated(path->locator_type)) {
path->rows = PLAN_LOCAL_ROWS(plan);
} else {
path->rows = plan->plan_rows;
}
path->multiple = plan->multiple;
path->startup_cost = plan->startup_cost;
path->total_cost = plan->total_cost;
return path;
}
/*
* cost_agg_do_redistribute
* add redistribute path node and calculate it's cost
* when we choose optimal method from all hash agg methods
*
* @param (in) subpath:
* the subpath
* @param (in) distributed_key:
* distribute key of redistribute stream node
* @param (in) multiple:
* the multiple
* @param (in) target_distribution:
* the target node group
* @param (in) width:
* the path width
* @param (in) vec_output:
* mark wheather it's a vector plan node
* @param (in) dop:
* the dop of SMP
*
* @return:
* the stream path
*/
static StreamPath* cost_agg_do_redistribute(Path* subpath, List* distributed_key, double multiple,
Distribution* target_distribution, int width, bool vec_output, int dop, bool needs_stream)
{
StreamPath* spath = makeNode(StreamPath);
spath->path.type = T_StreamPath;
spath->path.pathtype = vec_output ? T_VecStream : T_Stream;
spath->path.multiple = multiple;
Distribution* distribution = ng_get_dest_distribution(subpath);
ng_set_distribution(&spath->path.distribution, distribution);
spath->path.locator_type = LOCATOR_TYPE_HASH;
spath->path.distribute_keys = distributed_key;
spath->type = STREAM_REDISTRIBUTE;
spath->subpath = subpath;
ng_set_distribution(&spath->consumer_distribution, target_distribution);
if (dop > 1) {
spath->smpDesc = create_smpDesc(dop, dop, needs_stream ? REMOTE_SPLIT_DISTRIBUTE : LOCAL_DISTRIBUTE);
}
cost_stream(spath, width);
return spath;
}
/*
* cost_agg_do_gather
* add gather path node and calculate it's cost
* when we choose optimal method from all hash agg methods
*
* @param (in) subpath:
* the subpath
* @param (in) width:
* the path width
* @param (in) vec_output:
* mark wheather it's a vector plan node
*
* @return:
* the gather path
*/
static StreamPath* cost_agg_do_gather(Path* subpath, int width, bool vec_output)
{
StreamPath* spath = makeNode(StreamPath);
spath->path.type = T_StreamPath;
spath->path.pathtype = vec_output ? T_VecStream : T_Stream;
spath->path.multiple = 1.0;
Distribution* producer_distribution = ng_get_dest_distribution(subpath);
ng_set_distribution(&spath->path.distribution, producer_distribution);
spath->path.locator_type = LOCATOR_TYPE_REPLICATED;
spath->type = STREAM_GATHER;
spath->subpath = subpath;
/* It's in CN, NOT really in DN_0. Just make local rows and global rows calculation happy */
Distribution* consumer_distribution = ng_get_single_node_group_distribution();
ng_set_distribution(&spath->consumer_distribution, consumer_distribution);
cost_stream(spath, width);
return spath;
}
/*
* cost_agg_do_agg
* add agg path node and calculate it's cost
* when we choose optimal method from all hash agg methods
*
* @return:
* the agg path
*/
static Path* cost_agg_do_agg(Path* subpath, PlannerInfo* root, AggStrategy agg_strategy, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, Size hashentrysize, QualCost total_cost, int width, bool vec_output, int dop)
{
Path* agg_path = makeNode(Path);
agg_path->type = T_Path;
agg_path->pathtype = vec_output ? T_VecAgg : T_Agg;
Distribution* distribution = ng_get_dest_distribution(subpath);
ng_set_distribution(&agg_path->distribution, distribution);
agg_path->locator_type = subpath->locator_type;
agg_path->distribute_keys = subpath->distribute_keys;
cost_agg(agg_path,
root,
agg_strategy,
aggcosts,
numGroupCols,
numGroups,
subpath->startup_cost,
subpath->total_cost,
PATH_LOCAL_ROWS(subpath),
width,
hashentrysize,
dop);
agg_path->startup_cost += total_cost.startup;
agg_path->total_cost += total_cost.startup + total_cost.per_tuple * PATH_LOCAL_ROWS(agg_path);
return agg_path;
}
/*
* get_hashagg_gather_hashagg_path: get result path for agg(dn)->gather->agg(cn).
*
* Parameters:
* @in root: plan info node
* @in lefttree: the current plan
* @in aggcosts: the execution costs of the aggregates' input expressions
* @in numGroupCols: the column num of group by clause
* @in numGroups: the local distinct of group by clause for the first level
* @in final_groups: the global distinct of group by clause for the final leve
* @in total_cost: the initial total cost for qual
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in result_path: the result path for agg(dn)->gather->agg(cn) with total cost
*
* Returns: void
*/
static void get_hashagg_gather_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, QualCost total_cost, Size hashentrysize,
AggStrategy agg_strategy, bool needs_stream, Path* result_path)
{
Path* subpath = cost_agg_convert_to_path(lefttree);
Path* agg_path_1 = cost_agg_do_agg(subpath,
root,
agg_strategy,
aggcosts,
numGroupCols,
numGroups,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop);
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Agg method1 cost after dn agg: %lf", agg_path_1->total_cost)));
StreamPath* gather_path = cost_agg_do_gather(agg_path_1, lefttree->plan_width, lefttree->vec_output);
ereport(
DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Agg method1 cost after gather: %lf", gather_path->path.total_cost)));
if (needs_stream) {
/* The agg above gather can not be parallelized. */
Path* agg_path_2 = cost_agg_do_agg((Path*)gather_path,
root,
agg_strategy,
aggcosts,
numGroupCols,
final_groups,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
1);
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Agg method1 cost after cn agg: %lf", agg_path_2->total_cost)));
debug_print_agg_detail(root, AGG_HASHED, DN_AGG_CN_AGG, agg_path_2, &gather_path->path, agg_path_1);
copy_path_costsize(result_path, agg_path_2);
} else {
if (root->query_level == 1) {
debug_print_agg_detail(root, AGG_HASHED, DN_AGG_CN_AGG, &gather_path->path, agg_path_1);
copy_path_costsize(result_path, &gather_path->path);
} else {
debug_print_agg_detail(root, AGG_HASHED, DN_AGG_CN_AGG, agg_path_1);
copy_path_costsize(result_path, agg_path_1);
}
}
}
/*
* get_redist_hashagg_gather_hashagg_path
* get result path for redist->agg(dn)->gather->agg(cn).
*
* Parameters:
* @in root: plan info node
* @in lefttree: the current plan
* @in aggcosts: the execution costs of the aggregates' input expressions
* @in numGroupCols: the column num of group by clause
* @in numGroups: the local distinct of group by clause for the first level
* @in final_groups: the global distinct of group by clause for the final leve
* @in distributed_key_less_skew: the less skewed distribute key
* @in multiple_less_skew: the multiple of the less skewed distribute key
* @in target_distribution: the target node group
* @in total_cost: the initial total cost for qual
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in result_path: the result path for agg(dn)->gather->agg(cn) with total cost
*
* Returns: void
*/
static void get_redist_hashagg_gather_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, List* distributed_key_less_skew, double multiple_less_skew,
Distribution* target_distribution, QualCost total_cost, Size hashentrysize, AggStrategy agg_strategy,
bool needs_stream, Path* result_path)
{
Path* subpath = cost_agg_convert_to_path(lefttree);
AssertEreport(target_distribution != NULL && target_distribution->bms_data_nodeids != NULL,
MOD_OPT,
"invalid target distribution information or its bitmap set is null.");
StreamPath* redist_path = cost_agg_do_redistribute(subpath,
distributed_key_less_skew,
multiple_less_skew,
target_distribution,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop,
needs_stream);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method 4 (1+) cost after dn redist: %lf", redist_path->path.total_cost)));
Path* agg_path_1 = cost_agg_do_agg((Path*)redist_path,
root,
agg_strategy,
aggcosts,
numGroupCols,
numGroups,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop);
ereport(
DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Agg method 4 (1+) cost after dn agg: %lf", agg_path_1->total_cost)));
StreamPath* gather_path = cost_agg_do_gather(agg_path_1, lefttree->plan_width, lefttree->vec_output);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method 4 (1+) cost after gather: %lf", gather_path->path.total_cost)));
if (needs_stream) {
/* The agg above gather can not be parallelized. */
Path* agg_path_2 = cost_agg_do_agg((Path*)gather_path,
root,
agg_strategy,
aggcosts,
numGroupCols,
final_groups,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
1);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method 4 (1+) cost after cn agg: %lf", agg_path_2->total_cost)));
debug_print_agg_detail(root,
AGG_HASHED,
DN_REDISTRIBUTE_AGG_CN_AGG,
agg_path_2,
&gather_path->path,
agg_path_1,
&redist_path->path);
copy_path_costsize(result_path, agg_path_2);
} else {
if (root->query_level == 1) {
debug_print_agg_detail(
root, AGG_HASHED, DN_REDISTRIBUTE_AGG_CN_AGG, &gather_path->path, agg_path_1, &redist_path->path);
copy_path_costsize(result_path, &gather_path->path);
} else {
debug_print_agg_detail(root, AGG_HASHED, DN_REDISTRIBUTE_AGG_CN_AGG, agg_path_1, &redist_path->path);
copy_path_costsize(result_path, agg_path_1);
}
}
}
/*
* get_redist_hashagg_path: get result path for distributecost() + aggcost() + gathercost().
*
* Parameters:
* @in root: plan info node
* @in lefttree: the current plan
* @in aggcosts: the execution costs of the aggregates' input expressions
* @in numGroupCols: the column num of group by clause
* @in numGroups: the local distinct of group by clause for the first level
* @in final_groups: the global distinct of group by clause for the final level
* @in distributed_key: the distribute key for stream
* @in multiple: the multiple for stream
* @in total_cost: the initial total cost for qual
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in result_path: the result path for distributecost() + aggcost() + gathercost() with total cost
*
* Returns: void
*/
static void get_redist_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts, int numGroupCols,
double numGroups, double final_groups, List* distributed_key, double multiple, Distribution* target_distribution,
QualCost total_cost, Size hashentrysize, bool needs_stream, Path* result_path)
{
Path* subpath = cost_agg_convert_to_path(lefttree);
if (target_distribution == NULL || (
target_distribution->bms_data_nodeids == NULL &&
target_distribution->group_oid == InvalidOid)) {
target_distribution = ng_get_installation_group_distribution();
}
StreamPath* redist_path = cost_agg_do_redistribute(subpath,
distributed_key,
multiple,
target_distribution,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop,
needs_stream);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method2 cost after redistribute: %lf", redist_path->path.total_cost)));
double numGroups_agg_path =
clamp_row_est(get_local_rows(final_groups, 1.0, false, ng_get_dest_num_data_nodes((Path*)redist_path)));
Path* agg_path = cost_agg_do_agg((Path*)redist_path,
root,
AGG_HASHED,
aggcosts,
numGroupCols,
numGroups_agg_path,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop);
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Agg method2 cost after agg: %lf", agg_path->total_cost)));
if (root->query_level == 1) {
StreamPath* gather_path = cost_agg_do_gather(agg_path, lefttree->plan_width, lefttree->vec_output);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method2 cost after gather: %lf", gather_path->path.total_cost)));
debug_print_agg_detail(root, AGG_HASHED, DN_REDISTRIBUTE_AGG, &gather_path->path, agg_path, &redist_path->path);
copy_path_costsize(result_path, &gather_path->path);
} else {
debug_print_agg_detail(root, AGG_HASHED, DN_REDISTRIBUTE_AGG, agg_path, &redist_path->path);
copy_path_costsize(result_path, agg_path);
}
result_path->distribute_keys = distributed_key;
}
/*
* get_hashagg_redist_hashagg_path: get result path for aggcost() + distributecost() + aggcost() + gathercost().
*
* Parameters:
* @in root: plan info node
* @in lefttree: the current plan
* @in aggcosts: the execution costs of the aggregates' input expressions
* @in numGroupCols: the column num of group by clause
* @in numGroups: the local distinct of group by clause for the first level
* @in final_groups: the global distinct of group by clause for the final level
* @in distributed_key: the distribute key for stream
* @in multiple: the multiple for stream
* @in total_cost: the initial total cost for qual
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in result_path: the result path for aggcost() + distributecost() + aggcost() + gathercost() with total cost
*
* Returns: void
*/
static void get_hashagg_redist_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, List* distributed_key, double multiple,
Distribution* target_distribution, QualCost total_cost, Size hashentrysize, bool needs_stream, Path* result_path)
{
Path* subpath = cost_agg_convert_to_path(lefttree);
if (target_distribution == NULL || (
target_distribution->bms_data_nodeids == NULL &&
target_distribution->group_oid == InvalidOid)) {
target_distribution = ng_get_installation_group_distribution();
}
Path* agg_path_1 = cost_agg_do_agg(subpath,
root,
AGG_HASHED,
aggcosts,
numGroupCols,
numGroups,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop);
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Agg method3 cost after first agg: %lf", agg_path_1->total_cost)));
StreamPath* redist_path = cost_agg_do_redistribute(agg_path_1,
distributed_key,
multiple,
target_distribution,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop,
needs_stream);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method3 cost after redistribute: %lf", redist_path->path.total_cost)));
double numGroups_agg_path_2 =
clamp_row_est(get_local_rows(final_groups, 1.0, false, ng_get_dest_num_data_nodes((Path*)redist_path)));
Path* agg_path_2 = cost_agg_do_agg((Path*)redist_path,
root,
AGG_HASHED,
aggcosts,
numGroupCols,
numGroups_agg_path_2,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop);
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Agg method3 cost after second agg: %lf", agg_path_2->total_cost)));
if (root->query_level == 1) {
StreamPath* gather_path = cost_agg_do_gather(agg_path_2, lefttree->plan_width, lefttree->vec_output);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method3 cost after gather: %lf", gather_path->path.total_cost)));
debug_print_agg_detail(
root, AGG_HASHED, DN_AGG_REDISTRIBUTE_AGG, &gather_path->path, agg_path_2, &redist_path->path, agg_path_1);
copy_path_costsize(result_path, &gather_path->path);
} else {
debug_print_agg_detail(root, AGG_HASHED, DN_AGG_REDISTRIBUTE_AGG, agg_path_2, &redist_path->path, agg_path_1);
copy_path_costsize(result_path, agg_path_2);
}
result_path->distribute_keys = distributed_key;
}
/*
* get_redist_hashagg_redist_hashagg_path:
* get result path for redist -> agg -> distribute -> agg -> gather.
*
* Parameters:
* @in root: plan info node
* @in lefttree: the current plan
* @in aggcosts: the execution costs of the aggregates' input expressions
* @in numGroupCols: the column num of group by clause
* @in numGroups: the local distinct of group by clause for the first level
* @in final_groups: the global distinct of group by clause for the final level
* @in distributed_key_less_skew: the less skewed distribute key
* @in multiple_less_skew: the multiple of the less skewed distribute key
* @in target_distribution: the target node group
* @in distributed_key: the distribute key for stream
* @in multiple: the multiple for stream
* @in total_cost: the initial total cost for qual
* @in hashentrysize: hash entry size include space for per tuple width, space for pass-by-ref transition values,
* the per-hash-entry overhead
* @in result_path: the result path for aggcost() + distributecost() + aggcost() + gathercost() with total cost
*
* Returns: void
*/
static void get_redist_hashagg_redist_hashagg_path(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, double final_groups, List* distributed_key_less_skew, double multiple_less_skew,
Distribution* target_distribution, List* distributed_key, double multiple, QualCost total_cost, Size hashentrysize,
bool needs_stream, Path* result_path)
{
Path* subpath = cost_agg_convert_to_path(lefttree);
if (target_distribution == NULL || (
target_distribution->bms_data_nodeids == NULL &&
target_distribution->group_oid == InvalidOid)) {
target_distribution = ng_get_installation_group_distribution();
}
StreamPath* redist_path_1 = cost_agg_do_redistribute(subpath,
distributed_key_less_skew,
multiple_less_skew,
target_distribution,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop,
needs_stream);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
errmsg("Agg method 5 (3+) cost after first redistribute: %lf", redist_path_1->path.total_cost)));
Path* agg_path_1 = cost_agg_do_agg((Path*)redist_path_1,
root,
AGG_HASHED,
aggcosts,
numGroupCols,
numGroups,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method 5 (3+) cost after first agg: %lf", agg_path_1->total_cost)));
StreamPath* redist_path_2 = cost_agg_do_redistribute(agg_path_1,
distributed_key,
multiple,
target_distribution,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop,
needs_stream);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
errmsg("Agg method 5 (3+) cost after second redistribute: %lf", redist_path_2->path.total_cost)));
double numGroups_agg_path_2 =
clamp_row_est(get_local_rows(final_groups, 1.0, false, ng_get_dest_num_data_nodes((Path*)redist_path_2)));
Path* agg_path_2 = cost_agg_do_agg((Path*)redist_path_2,
root,
AGG_HASHED,
aggcosts,
numGroupCols,
numGroups_agg_path_2,
hashentrysize,
total_cost,
lefttree->plan_width,
lefttree->vec_output,
lefttree->dop);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method 5 (3+) cost after second agg: %lf", agg_path_2->total_cost)));
if (root->query_level == 1) {
StreamPath* gather_path = cost_agg_do_gather(agg_path_2, lefttree->plan_width, lefttree->vec_output);
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("Agg method 5 (3+) cost after gather: %lf", gather_path->path.total_cost)));
debug_print_agg_detail(root,
AGG_HASHED,
DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG,
&gather_path->path,
agg_path_2,
&redist_path_2->path,
agg_path_1,
&redist_path_1->path);
copy_path_costsize(result_path, &gather_path->path);
} else {
debug_print_agg_detail(root,
AGG_HASHED,
DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG,
agg_path_2,
&redist_path_2->path,
agg_path_1,
&redist_path_1->path);
copy_path_costsize(result_path, agg_path_2);
}
result_path->distribute_keys = distributed_key;
}
/*
* @Description: Confirm whether the distributed_key has skew when do redistribution.
* There are two cases : hint skew and null skew.
* Firstly, confirm whether the distributed_key has hint skew: SKEW_RES_HINT.
* Secondly, confirm whether the distributed_key has null skew becasue of outer join of sub plan: SKEW_RES_RELU.
* If has skew, then choose to do agg first to avoid redis skew.
*/
static uint32 get_hashagg_skew(AggSkewInfo* skew_info, List* distribute_keys)
{
/* If guc 'skew_option' is setting to off, just return. */
if (u_sess->opt_cxt.skew_strategy_opt == SKEW_OPT_OFF)
return SKEW_RES_NONE;
if (distribute_keys == NIL)
return SKEW_RES_NONE;
skew_info->setDistributeKeys(distribute_keys);
skew_info->findStreamSkewInfo();
return skew_info->getSkewInfo();
}
DistrbutionPreferenceType get_agg_distribution_perference_type(Plan* plan)
{
if (!u_sess->attr.attr_sql.enable_dngather || !u_sess->opt_cxt.is_dngather_support) {
return DPT_SHUFFLE;
}
if (plan->plan_rows <= u_sess->attr.attr_sql.dngather_min_rows) {
return DPT_SINGLE;
}
return DPT_SHUFFLE;
}
/*
* Agg's single node distribution comparision function.
*/
bool compare_agg_single_node_distribution(Distribution* new_distribution, Distribution* old_distribution,
double new_cost, double old_cost)
{
if (!u_sess->attr.attr_sql.enable_dngather || !u_sess->opt_cxt.is_dngather_support) {
return new_cost < old_cost;
}
bool better_distribution = false;
bool is_new_single_node_distribution = ng_is_single_node_group_distribution(new_distribution);
bool is_old_single_node_distribution = ng_is_single_node_group_distribution(old_distribution);
if (is_new_single_node_distribution && !is_old_single_node_distribution) {
better_distribution = true;
} else if (!is_new_single_node_distribution && is_old_single_node_distribution) {
better_distribution = false;
} else {
better_distribution = new_cost < old_cost;
}
return better_distribution;
}
/*
* Choose the cheapest plan for agg from the following three paths
* 1. agg(dn)->gather->agg(cn)
* 2. distribute(dn)->agg(dn)->gather
* 3. agg(dn)->distribute(dn)->agg(dn)->gather
*/
static SAggMethod get_optimal_hashagg(PlannerInfo* root, Plan* lefttree, const AggClauseCosts* aggcosts,
int numGroupCols, double numGroups, List* distributed_key, List* target_list, double final_groups, double multiple,
List* distribute_key_less_skew, double multiple_less_skew, AggOrientation agg_orientation, Cost* final_cost,
Distribution** final_distribution, bool need_stream, AggSkewInfo* skew_info, uint32 aggmethod_filter)
{
Query* parse = root->parse;
double best_cost = 0.0;
Distribution* best_target_distribution = NULL;
SAggMethod option = DN_AGG_CN_AGG;
QualCost qual_cost, tlist_cost, total_cost;
Path result_path;
errno_t rc = EOK; /* Initialize rc to keep compiler slient */
bool force_slvl_agg = aggmethod_filter & FOREC_SLVL_AGG;
bool disallow_cn_agg = aggmethod_filter & DISALLOW_CN_AGG;
/*
* Confirm whether the distributed_key has skew when do redistribution.
* With two cases : hint skew and null skew.
* Notice, the priority:
* force_slvl_agg avoid > plan_mode_seed > SKEW_RES_HINT > best_agg_plan > SKEW_RES_RULE
*/
uint32 has_skew = SKEW_RES_NONE;
uint32 has_skew_for_redisfirst = SKEW_RES_NONE;
if (!force_slvl_agg) {
has_skew_for_redisfirst = get_hashagg_skew(skew_info, distribute_key_less_skew);
has_skew = get_hashagg_skew(skew_info, distributed_key);
}
/* Get target computing node group list with heuristic method */
List* distribution_list = ng_get_agg_candidate_distribution_list(lefttree, root->is_correlated,
get_agg_distribution_perference_type(lefttree));
/* If guc u_sess->attr.attr_sql.plan_mode_seed is random plan, we should choose random path between AGG_HASHED and
* AGG_SORTED */
if (u_sess->attr.attr_sql.plan_mode_seed != OPTIMIZE_PLAN) {
int hashagg_option_num = 0;
int random_option = 0;
/* If the distribute keys is NIL, we can only choose DN_AGG_CN_AGG. */
if (distributed_key == NIL) {
return DN_AGG_CN_AGG;
} else {
if (root->query_level == 1) {
/*
* When the agg's group by keys include distribute key,
* we do not need stream, but when parallel this situation,
* we have to add local redistribute, so we can not choose
* DN_AGG_CN_AGG.
*/
if (lefttree->dop <= 1 || need_stream)
hashagg_option_num = ALL_HASHAGG_OPTION;
else
hashagg_option_num = HASHAGG_OPTION_WITH_STREAM;
} else {
hashagg_option_num = HASHAGG_OPTION_WITH_STREAM;
}
}
if (list_length(distribution_list) != 1) {
random_option = choose_random_option(list_length(distribution_list));
*final_distribution = (Distribution*)list_nth(distribution_list, random_option);
}
if (force_slvl_agg)
random_option = 0;
else
random_option = choose_random_option(hashagg_option_num);
return g_hashagg_option_list[random_option];
}
if (parse->havingQual != NULL)
cost_qual_eval(&qual_cost, (List*)parse->havingQual, root);
else {
rc = memset_s(&qual_cost, sizeof(QualCost), 0, sizeof(QualCost));
securec_check(rc, "\0", "\0");
}
cost_qual_eval(&tlist_cost, target_list, root);
total_cost.startup = qual_cost.startup + tlist_cost.startup;
total_cost.per_tuple = qual_cost.per_tuple + tlist_cost.per_tuple;
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), errmsg("Local groups %lf, final groups: %lf.", numGroups, final_groups)));
rc = memset_s(&result_path, sizeof(Path), 0, sizeof(Path));
securec_check(rc, "\0", "\0");
/* If lefttree is parallel, we need local redistribute, thus DN_AGG_CN_AGG is not allowed. */
if (((root->query_level == 1 && (lefttree->dop <= 1 || need_stream)) ||
(root->query_level > 1 && !need_stream && lefttree->dop <= 1)) &&
!force_slvl_agg && !disallow_cn_agg) {
if (u_sess->attr.attr_sql.best_agg_plan == OPTIMAL_AGG ||
u_sess->attr.attr_sql.best_agg_plan == DN_AGG_CN_AGG || (SKEW_RES_HINT & has_skew)) {
/* 1. get total cost for hashagg (dn) + gather + hashagg (cn). */
get_hashagg_gather_hashagg_path(root,
lefttree,
aggcosts,
numGroupCols,
numGroups,
final_groups,
total_cost,
0,
AGG_HASHED,
need_stream,
&result_path);
if (best_cost == 0.0 || result_path.total_cost < best_cost) {
best_cost = result_path.total_cost;
best_target_distribution = NULL;
option = DN_AGG_CN_AGG;
}
}
/* We consider this kind of path only when we successfully got a less skewed distribute key */
if (((u_sess->attr.attr_sql.best_agg_plan == OPTIMAL_AGG && !(has_skew_for_redisfirst & SKEW_RES_RULE)) ||
u_sess->attr.attr_sql.best_agg_plan == DN_REDISTRIBUTE_AGG_CN_AGG) &&
distribute_key_less_skew != NULL &&
(!equal_distributekey(root, distributed_key, distribute_key_less_skew)) &&
!(has_skew_for_redisfirst & SKEW_RES_HINT)) {
ListCell* lc = NULL;
foreach (lc, distribution_list) {
Distribution* target_distribution = (Distribution*)lfirst(lc);
/* 1+. get total cost for redistribute(dn) + hashagg(dn) + gather + hashagg(cn). */
get_redist_hashagg_gather_hashagg_path(root,
lefttree,
aggcosts,
numGroupCols,
numGroups,
final_groups,
distribute_key_less_skew,
multiple_less_skew,
target_distribution,
total_cost,
0,
AGG_HASHED,
need_stream,
&result_path);
/*2. Compare new cost with the last.*/
bool better_distribution = compare_agg_single_node_distribution(target_distribution, best_target_distribution, result_path.total_cost, best_cost);
if (1 == u_sess->opt_cxt.query_dop && (best_cost == 0.0 || better_distribution) &&
best_cost < NG_FORBIDDEN_COST) {
best_cost = result_path.total_cost;
best_target_distribution = target_distribution;
option = DN_REDISTRIBUTE_AGG_CN_AGG;
}
}
}
}
if (distributed_key != NIL) {
ListCell* lc = NULL;
foreach (lc, distribution_list) {
Distribution* target_distribution = (Distribution*)lfirst(lc);
if ((u_sess->attr.attr_sql.best_agg_plan == OPTIMAL_AGG && !(SKEW_RES_RULE & has_skew)) ||
u_sess->attr.attr_sql.best_agg_plan == DN_REDISTRIBUTE_AGG || force_slvl_agg) {
if (force_slvl_agg || !(SKEW_RES_HINT & has_skew)) {
/* 2. get total cost for redistribute(dn) + hashagg (dn) + gather. */
get_redist_hashagg_path(root,
lefttree,
aggcosts,
numGroupCols,
numGroups,
final_groups,
distributed_key,
multiple,
target_distribution,
total_cost,
0,
need_stream,
&result_path);
/* Compare new cost with the last.*/
bool better_distribution = compare_agg_single_node_distribution(target_distribution, best_target_distribution, result_path.total_cost, best_cost);
if (best_cost == 0.0 || better_distribution) {
best_cost = result_path.total_cost;
best_target_distribution = target_distribution;
option = DN_REDISTRIBUTE_AGG;
}
}
}
if ((u_sess->attr.attr_sql.best_agg_plan == OPTIMAL_AGG ||
u_sess->attr.attr_sql.best_agg_plan == DN_AGG_REDISTRIBUTE_AGG || (SKEW_RES_HINT & has_skew)) &&
!force_slvl_agg) {
/* 3. get total cost for hashagg (dn) + redistribute(dn) + hashagg (dn) + gather. */
get_hashagg_redist_hashagg_path(root,
lefttree,
aggcosts,
numGroupCols,
numGroups,
final_groups,
distributed_key,
multiple,
target_distribution,
total_cost,
0,
need_stream,
&result_path);
/* Compare new cost with the last.*/
bool better_distribution = compare_agg_single_node_distribution(target_distribution, best_target_distribution, result_path.total_cost, best_cost);
if (best_cost == 0.0 || better_distribution) {
best_cost = result_path.total_cost;
best_target_distribution = target_distribution;
option = DN_AGG_REDISTRIBUTE_AGG;
}
}
/* We consider this kind of path only when we successfully got a less skewed distribute key */
if (((u_sess->attr.attr_sql.best_agg_plan == OPTIMAL_AGG && !(SKEW_RES_RULE & has_skew)) ||
u_sess->attr.attr_sql.best_agg_plan == DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG) &&
distribute_key_less_skew != NULL && multiple_less_skew < multiple &&
(!equal_distributekey(root, distributed_key, distribute_key_less_skew)) && !force_slvl_agg &&
!(SKEW_RES_HINT & has_skew_for_redisfirst)) {
/* 3+. get total cost for redistribute(dn) + hashagg(dn) + redistribute(dn) + hashagg(dn) + gather */
get_redist_hashagg_redist_hashagg_path(root,
lefttree,
aggcosts,
numGroupCols,
numGroups,
final_groups,
distribute_key_less_skew,
multiple_less_skew,
target_distribution,
distributed_key,
multiple,
total_cost,
0,
need_stream,
&result_path);
/* Compare new cost with the last.*/
bool better_distribution = compare_agg_single_node_distribution(target_distribution, best_target_distribution, result_path.total_cost, best_cost);
if (1 == u_sess->opt_cxt.query_dop && (best_cost == 0.0 || better_distribution) &&
best_cost < NG_FORBIDDEN_COST) {
best_cost = result_path.total_cost;
best_target_distribution = target_distribution;
option = DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG;
}
}
}
}
*final_cost = best_cost;
*final_distribution = best_target_distribution;
/* Add optimal info to log for hint skew and null skew. */
if (!force_slvl_agg && u_sess->attr.attr_sql.plan_mode_seed == OPTIMIZE_PLAN && (has_skew & SKEW_RES_HINT)) {
ereport(DEBUG1,
(errmodule(MOD_OPT_SKEW),
errmsg(
"[SkewAgg : SKEW_RES_HINT] The optimal hash agg method is: %d (cost: %lf).", option, *final_cost)));
} else if (!force_slvl_agg && u_sess->attr.attr_sql.plan_mode_seed == OPTIMIZE_PLAN &&
u_sess->attr.attr_sql.best_agg_plan == OPTIMAL_AGG && (has_skew & SKEW_RES_RULE)) {
ereport(DEBUG1,
(errmodule(MOD_OPT_SKEW),
errmsg(
"[SkewAgg : SKEW_RES_RULE] The optimal hash agg method is: %d (cost: %lf).", option, *final_cost)));
} else {
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("The optimal hash agg method is: %d (cost: %lf).", option, *final_cost)));
}
if (*final_distribution) {
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
errmsg("The optimal hash agg computing group has %d datanode(s).",
bms_num_members((*final_distribution)->bms_data_nodeids))));
}
if (agg_orientation != AGG_LEVEL_2_1_INTENT && u_sess->attr.attr_sql.best_agg_plan != OPTIMAL_AGG) {
if (distributed_key == NIL)
return DN_AGG_CN_AGG;
else if (force_slvl_agg)
return DN_REDISTRIBUTE_AGG;
/*
* If distributed_key has skew, then choose the lowest cost plan between
* DN_AGG_CN_AGG and DN_AGG_REDISTRIBUTE_AGG.
*/
else if (SKEW_RES_HINT & has_skew)
return option;
return (SAggMethod)u_sess->attr.attr_sql.best_agg_plan;
}
return option;
}
/*
* For compare agg's distribution with child's to reduce stream plan.
*/
bool is_agg_distribution_compalible_with_child(Distribution* aggDistribution, Distribution* childDistribution)
{
if (ng_is_single_node_group_distribution(aggDistribution)
&& ng_is_single_node_group_distribution(childDistribution)) {
return true;
}
// For other Distribution type, they may compalible too, but now
// we only handle the single distribution.
return false;
}
static Plan* generate_hashagg_plan(PlannerInfo* root, Plan* plan, List* final_list, AggClauseCosts* agg_costs,
int numGroupCols, const double* numGroups, WindowLists* wflists, AttrNumber* groupColIdx, Oid* groupColOps,
bool* needs_stream, Size hash_entry_size, AggOrientation agg_orientation, RelOptInfo* rel_info)
{
SAggMethod agg_option = DN_AGG_CN_AGG;
SAggMethod final_agg_mp_option = DN_AGG_CN_AGG;
Plan* agg_plan = NULL;
List* distributed_key = NIL;
List* distribute_key_less_skew = NIL;
Query* parse = root->parse;
List* groupClause = groupColIdx != NULL ? parse->groupClause : parse->distinctClause;
List* qual = NIL;
AttrNumber* local_groupColIdx =
groupColIdx != NULL ? groupColIdx : extract_grouping_cols(parse->distinctClause, plan->targetlist);
bool trans_agg = groupColIdx != NULL ? true : false;
double temp_num_groups[2];
double final_groups = numGroups[1];
double local_distinct;
List* group_exprs = NIL;
double multiple = 0.0;
double multiple_less_skew = 0.0;
Distribution* final_distribution = NULL;
int dop = plan->dop > 1 ? plan->dop : 1;
AggSkewInfo* skew_info = NULL;
uint32 skew_opt = SKEW_RES_NONE;
uint32 aggmethod_filter = ALLOW_ALL_AGG;
/* Confirm whether the distributed_key has skew */
if (SKEW_OPT_OFF != u_sess->opt_cxt.skew_strategy_opt)
skew_info = New(CurrentMemoryContext) AggSkewInfo(root, plan, rel_info);
temp_num_groups[0] = numGroups[0];
temp_num_groups[1] = numGroups[1];
/*
* If plan is agg, having qual has be moved to plan->qual.
* To distinct clause, we need not to move having qual which must be filter in lower levels agg operator,
* and if having qual be moved, error can happend because this aggplan's targetlist is parse->targetlist.
*/
if (IsA(plan, Agg) && plan->qual && groupClause != parse->distinctClause) {
qual = plan->qual;
plan->qual = NIL;
} else {
qual = groupColIdx != NULL ? (List*)parse->havingQual : NIL;
}
if (groupColIdx == NULL) {
group_exprs = get_sortgrouplist_exprs(groupClause, final_list);
} else {
int i;
for (i = 0; i < numGroupCols; i++) {
TargetEntry* tle = (TargetEntry*)list_nth(plan->targetlist, groupColIdx[i] - 1);
group_exprs = lappend(group_exprs, tle->expr);
}
if (numGroupCols != list_length(groupClause))
get_num_distinct(root,
group_exprs,
PLAN_LOCAL_ROWS(plan),
plan->plan_rows,
ng_get_dest_num_data_nodes(plan),
temp_num_groups);
}
/* Estimate distinct for hashagg. */
local_distinct = estimate_agg_num_distinct(root, group_exprs, plan, temp_num_groups);
/*
* If there are subplan in qual, then find the vars in subplan in targetlist, if not exists, then only support
* dn_redistribute_agg, because for two-level agg, we'll use final list as the target list of first level agg,
* and qual is calculated in second level, then the var will not be found
*/
bool subplan_in_qual = check_subplan_in_qual(final_list, qual);
/* string_agg only support dn_redistribute_agg */
bool has_dnagg = (agg_costs != NULL && (agg_costs->hasdctDnAggs || agg_costs->hasDnAggs));
/*
* If the qual contains subplan exec on DN, DN_AGG_CN_AGG and DN_REDISTRIBUTE_AGG_CN_AGG will cause
* the query unshippable(see finalize_node_id), we should never gen such plan.
*/
bool subplan_exec_on_dn = check_subplan_exec_datanode(root, (Node*)qual);
/*
* If tlist contains expressions that return sets, force to do single level agg to avoid
* twice calculation which may cause wrong resulsts
*/
bool contain_sets_expr = expression_returns_set((Node*)final_list);
if (subplan_in_qual || has_dnagg || contain_sets_expr)
aggmethod_filter |= FOREC_SLVL_AGG;
else if (subplan_exec_on_dn)
aggmethod_filter |= DISALLOW_CN_AGG;
if ((!*needs_stream && (!ng_is_multiple_nodegroup_scenario())) || root->glob->insideRecursion) {
agg_option = DN_AGG_CN_AGG;
} else {
double multiple_matched = -1.0;
bool choose_matched = false;
bool use_skew = true;
bool use_bias = true;
double skew_multiple = 0.0;
double bias_multiple = 0.0;
List* local_distributed_key = NIL;
List* desired_exprs = NIL;
double desired_exprs_numdistinct[2];
/* get final groups */
final_groups = temp_num_groups[1];
/*
* Choose the optimal distribute key.
* First, we find ideal target distribute key, in two case: 1. target distribute key of inserting table.
* 2. actual group clause of count(distinct) case, stored in local_distributed_key.
* Then, if the ideal target distribute key has multiple no more than 1, we use it. or we find the
* optimal distribute key of all the group by clause, stored in distributed_key. Now, if the multiple
* of ideal key is no more than optimal key, we just use the ideal key.
* Finally, we should compare the cost of ideal key and optimal key plus redistribution cost, and
* choose the better one to do redistribution.
*/
if (root->dis_keys.matching_keys != NIL && parse->groupingSets == NIL) { /* don't count grouping set case */
ListCell* lc = NULL;
foreach (lc, root->dis_keys.matching_keys) {
if (!list_member(group_exprs, lfirst(lc))) {
break;
}
}
if (lc == NULL) {
get_multiple_from_exprlist(root,
root->dis_keys.matching_keys,
plan->plan_rows,
&use_skew,
use_bias,
&skew_multiple,
&bias_multiple);
multiple_matched = Max(skew_multiple, bias_multiple);
if (multiple_matched <= 1.0) {
multiple = multiple_matched;
choose_matched = true;
distributed_key = root->dis_keys.matching_keys;
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG), errmsg("matching key distribution is chosen due to no skew.")));
} else {
local_distributed_key = root->dis_keys.matching_keys;
}
}
}
if (multiple_matched == -1 && parse->groupingSets == NIL) { /* don't count grouping set case */
if (root->dis_keys.superset_keys != NIL) {
ListCell* lc = NULL;
double desired_multiple = -1.0;
/* loop all the possible superset key to find keys with lowest multiple */
foreach (lc, root->dis_keys.superset_keys) {
List* superset_keys = (List*)lfirst(lc);
desired_exprs = list_intersection(superset_keys, group_exprs);
List* new_local_distributed_key = get_distributekey_from_tlist(
root, final_list, desired_exprs, plan->plan_rows, &desired_multiple, skew_info);
if (multiple_matched == -1.0 || desired_multiple < multiple_matched) {
multiple_matched = desired_multiple;
local_distributed_key = new_local_distributed_key;
}
if (multiple_matched <= 1.0) {
break;
}
}
} else if (numGroupCols != list_length(groupClause)) { /* the first level of count(distinct) */
desired_exprs = get_sortgrouplist_exprs(groupClause, parse->targetList);
local_distributed_key = get_distributekey_from_tlist(
root, final_list, desired_exprs, plan->plan_rows, &multiple_matched, skew_info);
}
if (desired_exprs != NIL && local_distributed_key != NIL) {
if (multiple_matched <= 1.0) {
choose_matched = true;
distributed_key = local_distributed_key;
multiple = multiple_matched;
}
/*
* get distinct of desired exprs for compute the cost of paths
* agg+redistribute+agg and redistribute+agg,
* in order to judge which distribute_key we want to use.
*/
get_num_distinct(root,
desired_exprs,
clamp_row_est(final_groups / ng_get_dest_num_data_nodes(plan) / dop),
final_groups,
ng_get_dest_num_data_nodes(plan),
desired_exprs_numdistinct,
NULL);
}
}
if (!choose_matched) {
distributed_key =
get_distributekey_from_tlist(root, final_list, group_exprs, plan->plan_rows, &multiple, skew_info);
if (multiple_matched > 0.0 && multiple_matched <= multiple) {
multiple = multiple_matched;
choose_matched = true;
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
errmsg("matching key distribution is chosen due to no more skew than best distribute key.")));
}
}
/* Generate less skew distribute key for potential shuffle */
if (Abs(plan->multiple - 1.0) > 0.001 || plan->distributed_keys == NIL) {
List* final_list_exprs = get_tlist_exprs(plan->targetlist, false);
distribute_key_less_skew = get_distributekey_from_tlist(
root, NIL, final_list_exprs, plan->plan_rows, &multiple_less_skew, skew_info);
}
/* set skew optimization method. */
if (skew_info != NULL) {
skew_opt = skew_info->getSkewInfo();
}
if (aggmethod_filter == FOREC_SLVL_AGG) {
if (distributed_key == NIL) {
if (subplan_in_qual) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Subplan in having qual + Group by\" on redistribution unsupported data type");
securec_check_ss_c(sprintf_rc, "\0", "\0");
} else {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"String_agg/Array_agg/Listagg + Group by\" on redistribution unsupported data type");
securec_check_ss_c(sprintf_rc, "\0", "\0");
}
mark_stream_unsupport();
*needs_stream = false;
return plan;
} else {
Cost final_cost;
agg_option = get_optimal_hashagg(root,
plan,
agg_costs,
numGroupCols,
local_distinct,
distributed_key,
final_list,
final_groups,
multiple,
distribute_key_less_skew,
multiple_less_skew,
agg_orientation,
&final_cost,
&final_distribution,
*needs_stream,
skew_info,
aggmethod_filter);
}
} else {
Cost final_cost;
agg_option = get_optimal_hashagg(root,
plan,
agg_costs,
numGroupCols,
local_distinct,
distributed_key,
final_list,
final_groups,
multiple,
distribute_key_less_skew,
multiple_less_skew,
agg_orientation,
&final_cost,
&final_distribution,
*needs_stream,
skew_info,
aggmethod_filter);
if (!choose_matched && multiple_matched > 0.0) {
Cost final_cost_matched;
SAggMethod agg_option_matched;
Cost redistribute_cost = 0.0;
Cost agg_redis_cost = 0.0;
Cost cheapest_cost = 0.0;
double glbrows;
agg_option_matched = get_optimal_hashagg(root,
plan,
agg_costs,
numGroupCols,
local_distinct,
local_distributed_key,
final_list,
final_groups,
multiple_matched,
distribute_key_less_skew,
multiple_less_skew,
agg_orientation,
&final_cost_matched,
&final_distribution,
*needs_stream,
skew_info,
aggmethod_filter);
unsigned int path_num_datanodes = ng_get_dest_num_data_nodes(plan);
/* redistribution cost of redistribute+agg path */
compute_stream_cost(STREAM_REDISTRIBUTE,
plan->exec_nodes ? plan->exec_nodes->baselocatortype : LOCATOR_TYPE_REPLICATED,
clamp_row_est(final_groups / path_num_datanodes),
final_groups,
multiple_matched,
plan->plan_width,
false,
local_distributed_key,
&redistribute_cost,
&glbrows,
path_num_datanodes,
path_num_datanodes);
if (desired_exprs != NIL) {
/* redistribution cost of agg+redistribute+agg path */
compute_stream_cost(STREAM_REDISTRIBUTE,
plan->exec_nodes ? plan->exec_nodes->baselocatortype : LOCATOR_TYPE_REPLICATED,
desired_exprs_numdistinct[0],
desired_exprs_numdistinct[0] * path_num_datanodes,
multiple_matched,
plan->plan_width,
false,
local_distributed_key,
&agg_redis_cost,
&glbrows,
path_num_datanodes,
path_num_datanodes);
cheapest_cost =
final_cost +
Min(agg_redis_cost * (1 + desired_exprs_numdistinct[0] /
clamp_row_est(final_groups / path_num_datanodes / dop)),
redistribute_cost);
} else {
cheapest_cost = final_cost + redistribute_cost;
}
if (final_cost_matched <= cheapest_cost) {
choose_matched = true;
agg_option = agg_option_matched;
multiple = multiple_matched;
ereport(DEBUG1,
(errmodule(MOD_OPT_AGG),
errmsg("matching key distribution is chosen due to less cost than best distribute key.")));
}
}
if (choose_matched && local_distributed_key != NIL) {
distributed_key = local_distributed_key;
}
}
}
/*
* The final agg has two parallel methods:
* 1: local redistribute + agg
* 2: agg + local redistribute + agg
* We can still use the get_optimal_hashagg() function to
* get the best parallel agg path for final hashagg.
*/
if (agg_option == DN_AGG_CN_AGG && plan->dop > 1 && !*needs_stream && is_local_redistribute_needed(plan)) {
Cost final_cost_matched;
Distribution* final_distribution_matched = NULL;
final_agg_mp_option = get_optimal_hashagg(root,
plan,
agg_costs,
numGroupCols,
local_distinct,
plan->distributed_keys,
final_list,
final_groups,
multiple,
NIL,
0.0,
agg_orientation,
&final_cost_matched,
&final_distribution_matched,
*needs_stream,
skew_info,
aggmethod_filter);
/*
* if plan->dop > 1 && 1 == best_agg_plan,
* DN_AGG_CN_AGG should be replaced by DN_AGG_REDISTRIBUTE_AGG.
*/
if (final_agg_mp_option == DN_AGG_CN_AGG) {
final_agg_mp_option = DN_AGG_REDISTRIBUTE_AGG;
}
}
// Single node distribution.
if (is_agg_distribution_compalible_with_child(final_distribution, &(plan->exec_nodes->distribution))) {
plan = (Plan*)make_agg(root,
plan->targetlist,
qual,
AGG_HASHED,
agg_costs,
numGroupCols,
local_groupColIdx,
groupColOps,
final_groups,
plan,
wflists,
*needs_stream,
trans_agg,
NIL,
hash_entry_size,
true,
agg_orientation);
if (skew_info != NULL) {
if (skew_opt == SKEW_RES_NONE) {
skew_opt = skew_info->getSkewInfo();
}
((Agg*)plan)->skew_optimize = skew_opt;
delete skew_info;
}
return plan;
}
if (agg_option == DN_REDISTRIBUTE_AGG_CN_AGG || agg_option == DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG) {
/* add first stream node */
AssertEreport(distribute_key_less_skew != NULL, MOD_OPT, "invalid distribute key less skew.");
plan = make_redistribute_for_agg(root, plan, distribute_key_less_skew, multiple_less_skew, final_distribution);
}
if (agg_option == DN_AGG_REDISTRIBUTE_AGG || agg_option == DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG ||
final_agg_mp_option == DN_AGG_REDISTRIBUTE_AGG) {
/*
* not pass need_stream here, when local redistribute is needed for
* smp but need_stream is false
*/
agg_plan = (Plan*)make_agg(root,
final_list,
qual,
AGG_HASHED,
agg_costs,
numGroupCols,
local_groupColIdx,
groupColOps,
(long)Min(local_distinct, (double)LONG_MAX),
plan,
wflists,
true, /* pass true instead of need_stream */
trans_agg,
NIL,
hash_entry_size,
true,
agg_orientation);
if (wflists != NULL && wflists->activeWindows) {
agg_plan->targetlist = make_windowInputTargetList(root, agg_plan->targetlist, wflists->activeWindows);
}
} else {
agg_plan = plan;
}
if (agg_option == DN_REDISTRIBUTE_AGG || agg_option == DN_AGG_REDISTRIBUTE_AGG ||
agg_option == DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG) {
/* add distribute stream plan */
plan = make_redistribute_for_agg(root, agg_plan, distributed_key, multiple, final_distribution);
*needs_stream = false;
} else if (final_agg_mp_option == DN_REDISTRIBUTE_AGG || final_agg_mp_option == DN_AGG_REDISTRIBUTE_AGG) {
/* Parallel the final agg. */
plan = create_local_redistribute(root, agg_plan, agg_plan->distributed_keys, multiple);
}
if (agg_option == DN_AGG_REDISTRIBUTE_AGG || agg_option == DN_REDISTRIBUTE_AGG_REDISTRIBUTE_AGG ||
final_agg_mp_option == DN_AGG_REDISTRIBUTE_AGG) {
Path hashed_p;
errno_t rc = EOK;
rc = memset_s(&hashed_p, sizeof(Path), 0, sizeof(Path));
securec_check(rc, "\0", "\0");
plan = mark_top_agg(root, final_list, agg_plan, plan, agg_orientation);
plan->plan_rows = final_groups;
((Agg*)plan)->numGroups = (long)Min(plan->plan_rows, (double)LONG_MAX);
/* add new agg node cost */
Distribution* distribution = ng_get_dest_distribution(plan);
ng_copy_distribution(&hashed_p.distribution, distribution);
cost_agg(&hashed_p,
root,
AGG_HASHED,
agg_costs,
numGroupCols,
PLAN_LOCAL_ROWS(plan),
plan->startup_cost,
plan->total_cost,
PLAN_LOCAL_ROWS(plan->lefttree),
plan->lefttree->plan_width,
hash_entry_size,
plan->dop,
&((Agg*)plan)->mem_info);
/* Consider the selectivity of having qual */
if (plan->qual != NIL && plan->plan_rows >= HAVING_THRESHOLD) {
plan->plan_rows = clamp_row_est(plan->plan_rows * DEFAULT_MATCH_SEL);
}
plan->startup_cost = hashed_p.startup_cost;
plan->total_cost = hashed_p.total_cost;
} else {
long rows;
if (agg_option == DN_AGG_CN_AGG && final_agg_mp_option == DN_AGG_CN_AGG)
rows = (long)Min(clamp_row_est(local_distinct), (double)LONG_MAX);
else {
unsigned int num_datanodes = ng_get_dest_num_data_nodes(plan);
rows = (long)Min(clamp_row_est(final_groups / num_datanodes), (double)LONG_MAX);
}
plan = (Plan*)make_agg(root,
final_list,
qual,
AGG_HASHED,
agg_costs,
numGroupCols,
local_groupColIdx,
groupColOps,
rows,
plan,
wflists,
*needs_stream,
trans_agg,
NIL,
hash_entry_size,
true,
agg_orientation);
}
if (skew_info != NULL) {
if (skew_opt == SKEW_RES_NONE) {
skew_opt = skew_info->getSkewInfo();
}
((Agg*)plan)->skew_optimize = skew_opt;
delete skew_info;
}
return plan;
}
/*
* @Description: Find the informational constraint info by Var.
* @in var: the specified var, find constraint on the column of var.
* @in relid: Relation id.
* @in conType: Constraint type.
* @return: true or false.
*/
bool findConstraintByVar(Var* var, Oid relid, constraintType conType)
{
Relation conrel;
HeapTuple htup;
bool result = false;
ScanKeyData skey[1];
SysScanDesc conscan;
ScanKeyInit(&skey[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid));
conrel = heap_open(ConstraintRelationId, AccessShareLock);
conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true, NULL, 1, skey);
/* Forantion table only can exist one information constraint now. */
while (HeapTupleIsValid(htup = systable_getnext(conscan))) {
bool isNull = false;
Datum adatum;
int16* attnums = NULL;
ArrayType* arr = NULL;
adatum = SysCacheGetAttr(CONSTROID, htup, Anum_pg_constraint_conkey, &isNull);
arr = DatumGetArrayTypeP(adatum);
attnums = (int16*)ARR_DATA_PTR(arr);
/*
* Currently, the foreign table support only primary key and unique constraint,
* Multi-column constraint unsupported, so the length of array attnums is 1.
*/
Form_pg_constraint conform = (Form_pg_constraint)GETSTRUCT(htup);
/* This constraint is informantional constraint and can used when building plan. */
if (var->varattno == attnums[0] && conform->consoft && conform->conopt) {
if (conType == UNIQUE_CONSTRAINT) {
/* Primary key and unique have unique affect. */
if (CONSTRAINT_PRIMARY == conform->contype || CONSTRAINT_UNIQUE == conform->contype) {
result = true;
break;
}
} else if (conType == NOT_NULL_CONSTRAINT) { /* Primary key have not null affect. */
if (CONSTRAINT_PRIMARY == conform->contype) {
result = true;
break;
}
}
}
}
systable_endscan(conscan);
heap_close(conrel, AccessShareLock);
return result;
}
/*
* get_count_distinct_newtlist: get new tlist as merge orig tlist with distinct node.
*
* Parameters:
* @in root: plan info node
* @in tlist: the final targetlist
* @in distinct_node: the node for distinct expr
* @in/out orig_tlist: group by exprs and aggref exprs from final targetlist
* @in/out duplicate_tlist: the targetEntry exisit in both final targetlist and group by clause
* @in distinct_eq_op: the equality operator oid for count(distinct)
*
* Returns: new tlist as merge orig tlist with distinct node
*/
static List* get_count_distinct_newtlist(
PlannerInfo* root, List* tlist, Node* distinct_node, List** orig_tlist, List** duplicate_tlist, Oid* distinct_eq_op)
{
List* new_tlist = NIL;
ListCell* lc = NULL;
ListCell* lc2 = NULL;
int i = 0;
/* Extract group by exprs and aggref exprs from final targetlist */
*orig_tlist = make_agg_var_list(root, tlist, duplicate_tlist);
i = 0;
/* Make expr to TargetEntry in sub targetlist, and replace count(distinct(b)) with b */
foreach (lc, *orig_tlist) {
Node* n = (Node*)lfirst(lc);
Node* expr = NULL;
TargetEntry* tle = NULL;
/* We only add one distinct node to new targetlist */
if (IsA(n, Aggref) && ((Aggref*)n)->aggdistinct != NIL) {
Aggref* agg_node = (Aggref*)n;
expr = distinct_node;
if (!OidIsValid(*distinct_eq_op))
*distinct_eq_op = ((SortGroupClause*)linitial(agg_node->aggdistinct))->eqop;
else {
AssertEreport(*distinct_eq_op == ((SortGroupClause*)linitial(agg_node->aggdistinct))->eqop,
MOD_OPT,
"The equality operator of distinct node is not the head of aggdistinct.");
continue;
}
} else
expr = (Node*)copyObject(n);
if (IsA(n, TargetEntry)) {
((TargetEntry*)expr)->resno = i + 1;
new_tlist = lappend(new_tlist, expr);
} else {
foreach (lc2, tlist) {
TargetEntry* te = (TargetEntry*)lfirst(lc2);
if (equal(te->expr, n)) {
tle = flatCopyTargetEntry(te);
tle->expr = (Expr*)expr;
tle->resno = i + 1;
break;
}
}
if (tle == NULL)
tle = makeTargetEntry((Expr*)expr, i + 1, NULL, false);
new_tlist = lappend(new_tlist, tle);
}
i++;
}
return new_tlist;
}
/*
* get_count_distinct_param: get new targetlist and group cols for count(distinct) and group by clause.
*
* Parameters:
* @in root: plan info node
* @in subplan: input plan
* @in tlist: the final targetlist
* @in distinct_node: the node for distinct expr
* @in/out numGrpColsNew: the new group cols include count(distinct) and orig tlist
* @in groupColIdx: the idx for original group by clause
* @in/out groupColIdx_new: the new idx for the new group cols
* @in/out groupingOps_new: the equality operator oid for count(distinct)
* @in/out orig_tlist: group by exprs and aggref exprs from final targetlist
* @in/out duplicate_tlist: the targetEntry exisit in both final targetlist and group by clause
* @in/out newtlist: new tlist as merge orig tlist with distinct node
*
* Returns: void
*/
static void get_count_distinct_param(PlannerInfo* root, Plan** subplan, List* tlist, Node* distinct_node,
int* numGrpColsNew, AttrNumber* groupColIdx, AttrNumber** groupColIdx_new, Oid** groupingOps_new, List** orig_tlist,
List** duplicate_tlist, List** newtlist)
{
Query* parse = root->parse;
Oid* orig_grouping_ops = extract_grouping_ops(parse->groupClause);
int numGroupCols = list_length(parse->groupClause);
ListCell* lc = NULL;
bool located = false;
int i;
Oid distinct_eq_op = InvalidOid;
Plan* result_plan = *subplan;
/* Initialize new groupCols for additional level of hashagg */
Oid* groupingOps_tmp = (Oid*)palloc0(sizeof(Oid) * (numGroupCols + 1));
AttrNumber* groupColIdx_tmp = (AttrNumber*)palloc0(sizeof(AttrNumber) * (numGroupCols + 1));
if (numGroupCols != 0) {
errno_t rc = EOK; /* Initialize rc to keep compiler slient */
rc = memcpy_s(groupingOps_tmp, sizeof(Oid) * (numGroupCols + 1), orig_grouping_ops, sizeof(Oid) * numGroupCols);
securec_check(rc, "\0", "\0");
rc = memcpy_s(
groupColIdx_tmp, sizeof(AttrNumber) * (numGroupCols + 1), groupColIdx, sizeof(AttrNumber) * numGroupCols);
securec_check(rc, "\0", "\0");
}
/* construct new tlist as merge orig tlist with distinct node. */
*newtlist = get_count_distinct_newtlist(root, tlist, distinct_node, orig_tlist, duplicate_tlist, &distinct_eq_op);
/* Add groupCol and groupOp for new group by item */
foreach (lc, result_plan->targetlist) {
TargetEntry* te = (TargetEntry*)lfirst(lc);
if (equal(te->expr, distinct_node)) {
located = true;
for (i = 0; i < numGroupCols; i++)
if (te->resno == groupColIdx_tmp[i])
break;
if (i == numGroupCols) {
groupColIdx_tmp[numGroupCols] = te->resno;
} else
*numGrpColsNew = numGroupCols;
break;
}
}
/* If count(distinct) expr is not in target list yet, so we add it to target list for aggregation */
if (!located) {
if (!is_projection_capable_plan(result_plan)) {
result_plan = (Plan*)make_result(root, (List*)copyObject(result_plan->targetlist), NULL, result_plan);
*subplan = result_plan;
}
TargetEntry* newtlist_entry =
makeTargetEntry((Expr*)distinct_node, list_length(result_plan->targetlist) + 1, NULL, true);
result_plan->targetlist = lappend(result_plan->targetlist, newtlist_entry);
groupColIdx_tmp[numGroupCols] = newtlist_entry->resno;
}
groupingOps_tmp[numGroupCols] = distinct_eq_op;
*groupColIdx_new = groupColIdx_tmp;
*groupingOps_new = groupingOps_tmp;
return;
}
/*
* get_count_distinct_partial_plan: return a hashagg supporting count(distinct) plan
*
* A new hashagg level should be formed here to support count(distinct) case, like:
* select a, count(distinct(b)), sum(c) from t group by a;
* We should first group by a, b, output is a, b, sum(c), then group by a,
* output is a, count(b), sum(c).
*/
static Plan* get_count_distinct_partial_plan(PlannerInfo* root, Plan* result_plan, List** final_tlist,
Node* distinct_node, AggClauseCosts agg_costs, const double* numGroups, WindowLists* wflists,
AttrNumber* groupColIdx, bool* needs_stream, Size hash_entry_size, RelOptInfo* rel_info)
{
Query* parse = root->parse;
Oid* groupingOps_new = NULL;
AttrNumber* groupColIdx_new = NULL;
Oid* orig_grouping_ops = extract_grouping_ops(parse->groupClause);
List* tlist = *final_tlist;
List* new_tlist = NIL;
List* orig_tlist = NIL;
List* duplicate_tlist = NIL;
int numGroupCols = list_length(parse->groupClause);
ListCell* lc = NULL;
ListCell* lc2 = NULL;
ListCell* lc3 = NULL;
bool located = false;
int numGrpColsNew = numGroupCols + 1;
Node* qual = NULL;
/* get new groupcols and targetlist for count(distinct) and group by clause. */
get_count_distinct_param(root,
&result_plan,
tlist,
distinct_node,
&numGrpColsNew,
groupColIdx,
&groupColIdx_new,
&groupingOps_new,
&orig_tlist,
&duplicate_tlist,
&new_tlist);
*needs_stream = needs_agg_stream(root, new_tlist, result_plan->distributed_keys);
if (check_subplan_in_qual(new_tlist, (List*)parse->havingQual)) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Subplan in having qual + Count(distinct)\"");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
}
/* Since we don't want to final to CN for this level of hashagg, then mark it as subquery */
root->query_level++;
qual = parse->havingQual;
parse->havingQual = NULL;
result_plan = generate_hashagg_plan(root,
result_plan,
new_tlist,
&agg_costs,
numGrpColsNew,
numGroups,
NULL,
groupColIdx_new,
groupingOps_new,
needs_stream,
hash_entry_size,
AGG_LEVEL_2_1_INTENT,
rel_info);
parse->havingQual = qual;
root->query_level--;
if (!IsA(result_plan, Agg) || *needs_stream) {
errno_t sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
NOTPLANSHIPPING_LENGTH,
"\"Count(Distinct)\" on redistribution unsupported data type");
securec_check_ss_c(sprintf_rc, "\0", "\0");
mark_stream_unsupport();
/* Set final_tlist to plan targetlist, this plan will be discarded. */
result_plan->targetlist = *final_tlist;
return result_plan;
}
/* We should make proper change to generated first level plan */
if (((Agg*)result_plan)->is_final && numGrpColsNew == numGroupCols + 1) {
Agg* agg_plan = (Agg*)result_plan;
List* sub_targetlist = result_plan->lefttree->targetlist;
int sub_seq_no = 1;
foreach (lc, sub_targetlist) {
TargetEntry* te = (TargetEntry*)lfirst(lc);
if (equal(te->expr, distinct_node))
break;
sub_seq_no++;
}
AssertEreport(sub_seq_no <= list_length(sub_targetlist),
MOD_OPT,
"invalid sub_seq_no when getting a hashagg supporting count(distinct) plan.");
agg_plan->grpColIdx[numGroupCols] = sub_seq_no;
}
((Agg*)result_plan)->is_final = false;
((Agg*)result_plan)->single_node = false;
/*
* Make proper change of some property to final target list and having qual, actually referred by orig_tlist and
* duplicate_tlist Following changes are made: 1. change count(distinct(b)) to count(b); 2. Increase aggstage of
* non-count_distinct node
*/
lc2 = list_head(result_plan->targetlist);
located = false;
foreach (lc, orig_tlist) {
Node* n = (Node*)lfirst(lc);
List* matched_node = NIL;
ListCell* lc4 = NULL;
bool next = true;
foreach (lc3, duplicate_tlist) {
if (equal(lfirst(lc3), n))
matched_node = lappend(matched_node, lfirst(lc3));
}
if (IsA(n, Aggref) && ((Aggref*)n)->aggdistinct != NULL) {
foreach (lc4, matched_node) {
Aggref* m = (Aggref*)lfirst(lc4);
list_free_deep(((Aggref*)m)->aggdistinct);
((Aggref*)m)->aggdistinct = NULL;
}
list_free_deep(((Aggref*)n)->aggdistinct);
((Aggref*)n)->aggdistinct = NULL;
if (!located)
located = true;
else
next = false;
} else if (IsA(n, Aggref)) {
TargetEntry* te = (TargetEntry*)lfirst(lc2);
AssertEreport(IsA(te->expr, Aggref), MOD_OPT, "The type of expression is not T_Aggref.");
foreach (lc4, matched_node) {
Aggref* m = (Aggref*)lfirst(lc4);
((Aggref*)m)->aggstage = ((Aggref*)te->expr)->aggstage + 1;
}
((Aggref*)n)->aggstage = ((Aggref*)te->expr)->aggstage + 1;
((Aggref*)te->expr)->aggtype = ((Aggref*)te->expr)->aggtrantype;
te->resjunk = false;
}
list_free_ext(matched_node);
matched_node = NIL;
if (next)
lc2 = lnext(lc2);
}
if (numGroupCols != 0)
locate_grouping_columns(root, tlist, result_plan->targetlist, groupColIdx);
*needs_stream = needs_agg_stream(root, tlist, result_plan->distributed_keys);
*final_tlist = tlist;
if (numGroupCols != 0)
result_plan = generate_hashagg_plan(root,
result_plan,
tlist,
&agg_costs,
numGroupCols,
numGroups,
wflists,
groupColIdx,
orig_grouping_ops,
needs_stream,
hash_entry_size,
AGG_LEVEL_2_2_INTENT,
rel_info);
else
result_plan = (Plan*)make_agg(root,
tlist,
(List*)parse->havingQual,
AGG_PLAIN,
&agg_costs,
0,
NULL,
NULL,
(long)Min(numGroups[0], (double)LONG_MAX),
result_plan,
wflists,
*needs_stream,
true,
NIL,
hash_entry_size,
true,
AGG_LEVEL_2_2_INTENT);
return result_plan;
}
#ifdef ENABLE_MULTIPLE_NODES
static void free_est_varlist(List* varlist)
{
ListCell* lc = NULL;
foreach (lc, varlist) {
EstSPNode* node = (EstSPNode*)lfirst(lc);
if (IsA(node, EstSPNode)) {
list_free_ext(node->varlist);
}
}
list_free_ext(varlist);
}
#endif
double get_bias_from_varlist(PlannerInfo* root, List* varlist, double rows, bool isCoalesceExpr)
{
double bias = 1.0;
ListCell* lc = NULL;
foreach (lc, varlist) {
Node* node = (Node*)lfirst(lc);
double mcf = 1.0;
if (IsA(node, Var)) {
mcf = get_node_mcf(root, node, (isCoalesceExpr ? rows : 0.0));
bias *= mcf;
} else {
EstSPNode* sp = (EstSPNode*)node;
double var_bias;
AssertEreport(IsA(sp, EstSPNode), MOD_OPT, "The node type is not T_EstSPNode.");
if (IsA(sp->expr, CoalesceExpr))
var_bias = get_bias_from_varlist(root, sp->varlist, rows, true);
else if (IsA(sp->expr, Aggref))
var_bias = 0.0;
else
var_bias = get_bias_from_varlist(root, sp->varlist, 0.0);
bias *= Max(DEFAULT_SPECIAL_EXPR_BIASE, var_bias / u_sess->pgxc_cxt.NumDataNodes);
}
}
bias = Max(1.0, bias * u_sess->pgxc_cxt.NumDataNodes);
return bias;
}
void get_multiple_from_exprlist(PlannerInfo* root, List* exprList, double rows, bool* useskewmultiple,
bool usebiasmultiple, double* skew_multiple, double* bias_multiple)
{
#ifdef ENABLE_MULTIPLE_NODES
ListCell* lc = NULL;
List* varlist = NIL;
Node* expr = NULL;
if (*useskewmultiple) {
/* Estimate distinct for expr in global datanode. */
double ndistinct = estimate_num_groups(root, exprList, rows, u_sess->pgxc_cxt.NumDataNodes, STATS_TYPE_GLOBAL);
/* we can't increase ndistinct by add more distribute column */
if (ndistinct == rows)
*useskewmultiple = false;
*skew_multiple = get_skew_ratio(ndistinct);
}
if (usebiasmultiple) {
foreach (lc, exprList) {
expr = (Node*)lfirst(lc);
varlist = append_distribute_var_list(varlist, expr);
}
/* Get multiple from varlist */
if (varlist == NIL)
*bias_multiple = u_sess->pgxc_cxt.NumDataNodes;
else
*bias_multiple = get_bias_from_varlist(root, varlist, rows);
}
free_est_varlist(varlist);
#else
*skew_multiple = 1.0;
*bias_multiple = 0.0;
#endif
return;
}
static Node* get_multiple_from_expr(
PlannerInfo* root, Node* expr, double rows, double* skew_multiple, double* bias_multiple)
{
List* groupExprs = NIL;
Oid datatype = exprType((Node*)(expr));
bool use_skew_multiple = true;
if (!OidIsValid(datatype) || !IsTypeDistributable(datatype))
return NULL;
groupExprs = list_make1(expr);
get_multiple_from_exprlist(root, groupExprs, rows, &use_skew_multiple, true, skew_multiple, bias_multiple);
list_free_ext(groupExprs);
return expr;
}
static List* add_multiple_to_list(Node* expr, double multiple, List* varMultipleList)
{
ExprMultipleData* pstExprMultipleData = NULL;
ListCell* prev_node = NULL;
/* Add bias to varBiasList order by asc. */
prev_node = NULL;
if (varMultipleList != NULL) {
ListCell* lc = NULL;
ExprMultipleData* vmd = NULL;
foreach (lc, varMultipleList) {
vmd = (ExprMultipleData*)lfirst(lc);
if (vmd->multiple > multiple)
break;
if (equal(vmd->expr, expr))
return varMultipleList;
prev_node = lc;
}
}
pstExprMultipleData = (ExprMultipleData*)palloc(sizeof(ExprMultipleData));
pstExprMultipleData->expr = expr;
pstExprMultipleData->multiple = multiple;
if (prev_node == NULL)
varMultipleList = lcons(pstExprMultipleData, varMultipleList);
else
lappend_cell(varMultipleList, prev_node, pstExprMultipleData);
return varMultipleList;
}
/* Get distribute keys of group by or partition by operates, maybe an column or multi columns */
static List* get_mix_diskey_by_exprlist(
PlannerInfo* root, List* exprMultipleList, double rows, double* result_multiple, AggSkewInfo* skew_info = NULL)
{
List* distkey = NIL;
Node* expr = NULL;
ExprMultipleData* exprMultipleData = (ExprMultipleData*)linitial(exprMultipleList);
double bias_multiple = 0.0;
double skew_multiple = 0.0;
bool useskewmultiple = true;
bool usebiasmultiple = true;
int group_num = 2;
expr = get_multiple_from_expr(root, exprMultipleData->expr, rows, &skew_multiple, &bias_multiple);
if (expr != NULL) {
useskewmultiple = skew_multiple == 1.0 ? false : true;
usebiasmultiple = bias_multiple <= 1.0 ? false : true;
distkey = lappend(distkey, exprMultipleData->expr);
}
if (list_length(exprMultipleList) >= group_num) {
while (group_num <= list_length(exprMultipleList)) {
skew_multiple = 1.0;
bias_multiple = 0.0;
exprMultipleData = (ExprMultipleData*)list_nth(exprMultipleList, group_num - 1);
distkey = lappend(distkey, exprMultipleData->expr);
get_multiple_from_exprlist(
root, distkey, rows, &useskewmultiple, usebiasmultiple, &skew_multiple, &bias_multiple);
useskewmultiple = skew_multiple == 1 ? false : true;
usebiasmultiple = bias_multiple <= 1 ? false : true;
group_num++;
if ((skew_multiple == 1) && (bias_multiple <= 1)) {
/* Check if this distribute key is skew. */
if (skew_info != NULL && (list_length(distkey) < list_length(exprMultipleList))) {
uint32 skew_opt = get_hashagg_skew(skew_info, distkey);
if ((skew_opt & SKEW_RES_HINT) || (skew_opt & SKEW_RES_STAT)) {
ereport(DEBUG1,
(errmodule(MOD_OPT_SKEW),
(errmsg("[SkewAgg] The distribute keys have skew according to [SKEW_RES:%u],"
" we will add more column into distribute keys.",
skew_opt))));
continue;
}
}
/* !usebiasmultiple && !useskewmultiple */
*result_multiple = 1;
return distkey;
}
}
}
*result_multiple = Max(bias_multiple, skew_multiple);
AssertEreport(*result_multiple >= 1, MOD_OPT, "invalid result of multiple columns.");
return distkey;
}
List* get_distributekey_from_tlist(
PlannerInfo* root, List* tlist, List* groupcls, double rows, double* result_multiple, void* skew_info)
{
ListCell* lcell = NULL;
List* distkey = NIL;
double multiple = 0.0;
double bias_multiple = 0.0;
double skew_multiple = 0.0;
List* exprMultipleList = NIL;
foreach (lcell, groupcls) {
Node* expr = (Node*)lfirst(lcell);
if (IsA(expr, SortGroupClause))
expr = get_sortgroupclause_expr((SortGroupClause*)expr, tlist);
expr = get_multiple_from_expr(root, expr, rows, &skew_multiple, &bias_multiple);
if (expr != NULL) {
/*
* we can't estimate skew of grouping sets because there's
* null added, so just add all columns and set mutiple to 1
*/
if (root->parse->groupingSets) {
distkey = lappend(distkey, expr);
*result_multiple = 1;
continue;
}
if ((skew_multiple == 1.0) && (bias_multiple <= 1.0)) {
*result_multiple = 1;
list_free_ext(exprMultipleList);
return list_make1(expr);
} else if ((u_sess->pgxc_cxt.NumDataNodes == skew_multiple) &&
(u_sess->pgxc_cxt.NumDataNodes ==
bias_multiple)) { /* All the expr are const, return the first expr. */
if (distkey == NULL)
distkey = lappend(distkey, expr);
*result_multiple = u_sess->pgxc_cxt.NumDataNodes;
continue;
} else {
if (skew_multiple == 1.0) {
/*
* If distinct num of multiple has no skew, we should use bias multiple to
* compute mix multiple.
*/
multiple = bias_multiple;
}
else if (bias_multiple <= 1.0) /* mcf has no skew, handle skew_multiple */
multiple = skew_multiple;
else
multiple = Max(bias_multiple, skew_multiple);
exprMultipleList = add_multiple_to_list(expr, multiple, exprMultipleList);
}
}
}
if (exprMultipleList != NULL) {
distkey = get_mix_diskey_by_exprlist(root, exprMultipleList, rows, result_multiple, (AggSkewInfo*)skew_info);
list_free_ext(exprMultipleList);
}
return distkey;
}
static void copy_path_costsize(Path* dest, Path* src)
{
set_path_rows(dest, src->rows, src->multiple);
dest->startup_cost = src->startup_cost;
dest->total_cost = src->total_cost;
dest->locator_type = src->locator_type;
}
#endif
static ExecNodes* initExecNodes()
{
ExecNodes* exec_nodes = (ExecNodes*)makeNode(ExecNodes);
exec_nodes->baselocatortype = LOCATOR_TYPE_HASH;
exec_nodes->accesstype = RELATION_ACCESS_READ;
exec_nodes->primarynodelist = NIL;
exec_nodes->en_expr = NULL;
return exec_nodes;
}
// ----- new functions for node group support
ExecNodes* getExecNodesByGroupName(const char* gname)
{
ExecNodes* exec_nodes = NULL;
Oid* members = NULL;
int nmembers = 0;
Oid groupoid = get_pgxc_groupoid(gname);
if (groupoid == InvalidOid) {
return get_all_data_nodes(LOCATOR_TYPE_HASH);
}
/* First check if we already have default nodegroup set */
nmembers = get_pgxc_groupmembers(groupoid, &members);
AssertEreport(nmembers > 0, MOD_OPT, "invalid number of pgxc group members.");
exec_nodes = initExecNodes();
/* Creating executing-node list */
for (int i = 0; i < nmembers; i++) {
int nodeId = PGXCNodeGetNodeId(members[i], PGXC_NODE_DATANODE);
exec_nodes->nodeList = lappend_int(exec_nodes->nodeList, nodeId);
}
Distribution* distribution = ng_convert_to_distribution(exec_nodes->nodeList);
ng_set_distribution(&exec_nodes->distribution, distribution);
exec_nodes->distribution.group_oid = ng_get_group_groupoid(gname);
return exec_nodes;
}
ExecNodes* getRelationExecNodes(Oid tableoid)
{
ExecNodes* exec_nodes = NULL;
Oid* members = NULL;
int nmembers = 0;
/*
* For system table, PGXC doesn't make it in pgxc_class so we are considering they
* are belonging to installation group
*/
if (tableoid < FirstNormalObjectId) {
return getExecNodesByGroupName(PgxcGroupGetInstallationGroup());
}
/* Otherwise we are get first rtable's exec_node as default */
nmembers = get_pgxc_classnodes(tableoid, &members);
AssertEreport(nmembers > 0, MOD_OPT, "invalid number of datanodes.");
exec_nodes = initExecNodes();
/* Creating executing-node list */
for (int i = 0; i < nmembers; i++) {
int nodeId = PGXCNodeGetNodeId(members[i], PGXC_NODE_DATANODE);
exec_nodes->nodeList = lappend_int(exec_nodes->nodeList, nodeId);
}
Distribution* distribution = ng_convert_to_distribution(exec_nodes->nodeList);
ng_set_distribution(&exec_nodes->distribution, distribution);
char relkind = get_rel_relkind(tableoid);
exec_nodes->distribution.group_oid = ng_get_baserel_groupoid(tableoid, relkind);
return exec_nodes;
}
/* Append groupingId expr to targest list
* generate group by clauses include groupingId expr
*/
static List* add_groupingIdExpr_to_tlist(List* tlist)
{
List* newTlist = NULL;
ListCell* lc = NULL;
bool include_groupId = false;
foreach (lc, tlist) {
TargetEntry* tl = (TargetEntry*)lfirst(lc);
if (IsA(tl->expr, GroupingId)) {
tl->resjunk = true;
include_groupId = true;
break;
}
}
/* Add groupingId to targetlist */
if (include_groupId == false) {
GroupingId* groupId = makeNode(GroupingId);
TargetEntry* tle = makeTargetEntry((Expr*)groupId, list_length(tlist) + 1, "groupingid", true);
newTlist = lappend(newTlist, tle);
}
newTlist = list_concat((List*)copyObject(tlist), newTlist);
return newTlist;
}
List* add_groupId_to_groupExpr(List* query_group, List* tlist)
{
int groupMaxLen = 0;
ListCell* target_lc = NULL;
TargetEntry* target_group_id = NULL;
/* Get max ressortgroupref from target list */
foreach (target_lc, tlist) {
TargetEntry* tg = (TargetEntry*)lfirst(target_lc);
if ((int)tg->ressortgroupref > groupMaxLen) {
groupMaxLen = tg->ressortgroupref;
}
if (IsA(tg->expr, GroupingId)) {
target_group_id = tg;
}
}
if (target_group_id != NULL) {
List* grouplist = NIL;
/* Add gropuingId expr to group by clause */
groupMaxLen++;
target_group_id->ressortgroupref = groupMaxLen;
/* Add gropuing() expr to group by clause */
SortGroupClause* grpcl = makeNode(SortGroupClause);
grpcl->tleSortGroupRef = groupMaxLen;
grpcl->eqop = INT4EQOID;
grpcl->sortop = INT4LTOID;
grpcl->nulls_first = false;
grpcl->hashable = true;
grouplist = lappend(list_copy(query_group), grpcl);
return grouplist;
} else
return query_group;
}
/* set_root_matching_key
* for insert, update, delete statement, set matching keys in plannerinfo
*
* Parameters:
* root: plannerinfo struct in current query level
* targetlist: final output targetlist
*/
static void set_root_matching_key(PlannerInfo* root, List* targetlist)
{
Query* parse = root->parse;
/* we only set interesting matching key for non-select query */
if (parse->commandType != CMD_SELECT) {
RangeTblEntry* rte = rt_fetch(parse->resultRelation, parse->rtable); /* target udi relation */
List* partAttrNum = rte->partAttrNum; /* partition columns in target relation */
/* only hash table is cared */
if (rte->locator_type == LOCATOR_TYPE_HASH) {
ListCell* lc1 = NULL;
ListCell* lc2 = NULL;
bool isinvalid = false;
foreach (lc1, partAttrNum) {
int num = lfirst_int(lc1);
TargetEntry* tle = NULL;
/* For insert and update statement, we only check the expr position */
if (parse->commandType == CMD_INSERT || parse->commandType == CMD_UPDATE) {
tle = (TargetEntry*)list_nth(targetlist, num - 1);
/* If exprs are all distributable, record them in matching key and matching nos */
if (IsTypeDistributable(exprType((Node*)tle->expr))) {
Var* var = locate_distribute_var(tle->expr);
if (var != NULL)
root->dis_keys.matching_keys = lappend(root->dis_keys.matching_keys, tle->expr);
else
isinvalid = true;
} else
isinvalid = true;
} else {
/*
* for update, delete statement, we check the exact expr, since the position
* is not matched with final target list.
* find the exact matching expr.
*/
foreach (lc2, targetlist) {
tle = (TargetEntry*)lfirst(lc2);
Var* var = (Var*)tle->expr;
if (IsA(var, Var) && var->varno == (Index)parse->resultRelation &&
var->varattno == (AttrNumber)num)
break;
}
if (lc2 != NULL && IsTypeDistributable(exprType((Node*)tle->expr))) {
root->dis_keys.matching_keys = lappend(root->dis_keys.matching_keys, tle->expr);
} else {
isinvalid = true;
}
}
}
/* free the already allocated space if no matching key at all */
if (isinvalid) {
list_free_ext(root->dis_keys.matching_keys);
root->dis_keys.matching_keys = NIL;
}
}
}
}
/*
* @Description: make dummy targetlist to current plan if the targetlist is NULL for vector engine.
*
* @in plan: current plan
*/
static void make_dummy_targetlist(Plan* plan)
{
/*
* vector engine doesn't support empty targetlist
* Notes: We should exclude ModifyTable nodes, because they also have empty targetlist
*/
if (plan->targetlist == NIL && !IsA(plan, ModifyTable) &&
!(plan->lefttree != NULL && IsA(plan->lefttree, VecModifyTable))) {
Const* c = make_const(NULL, makeString("Dummy"), 0);
plan->targetlist = lappend(plan->targetlist, makeTargetEntry((Expr*)c, 1, NULL, true));
}
}
/*
* passdown_itst_keys_to_subroot:
* Pass the interested keys to root of lower query level
* Paramters:
* @in root: planner info of current query level
* @in diskeys: interested distribute keys of current subquery rel
*/
static void passdown_itst_keys_to_subroot(PlannerInfo* root, ItstDisKey* diskeys)
{
/* for subquery, we should inherit superset key and matching key from parent root */
if (diskeys != NULL) {
ListCell* lc = NULL;
List* result = NIL;
foreach (lc, diskeys->superset_keys) {
ListCell* lc2 = NULL;
List* superset_keys = (List*)lfirst(lc);
result = NIL;
foreach (lc2, superset_keys) {
Expr* key = (Expr*)lfirst(lc2);
result = add_itst_node_to_list(result, root->parse->targetList, key, false);
}
if (result != NIL)
root->dis_keys.superset_keys = lappend(root->dis_keys.superset_keys, result);
}
root->dis_keys.superset_keys = remove_duplicate_superset_keys(root->dis_keys.superset_keys);
result = NIL;
foreach (lc, diskeys->matching_keys) {
Expr* key = (Expr*)lfirst(lc);
result = add_itst_node_to_list(result, root->parse->targetList, key, true);
if (result == NIL)
break;
}
root->dis_keys.matching_keys = result;
}
}
/*
* add_itst_node_to_list
* add single node of interested keys to the target interested dis keys
* Parameters:
* @in result_list: input list to add node
* @in target_list: targetlist of current parse tree
* @in node: input iterested node
* @is_matching_key: whether to add to matching key
* Return:
* result dis keys with node added, or NIL if failed in exact match mode
*/
static List* add_itst_node_to_list(List* result_list, List* target_list, Expr* node, bool is_matching_key)
{
Var* var = locate_distribute_var(node);
if (var != NULL) {
/*
* Any item of interested key is (type cast) of var, so the varattno
* point to the key in subquery targetlist
*/
TargetEntry* tle = (TargetEntry*)list_nth(target_list, var->varattno - 1);
if (is_matching_key)
result_list = lappend(result_list, tle->expr);
else
result_list = list_append_unique(result_list, tle->expr);
} else if (is_matching_key) {
/* superset key can be partially inherited, and we remove duplicate for optimization */
list_free_ext(result_list);
result_list = NIL;
}
return result_list;
}
/*
* vector_engine_preprocess_walker:
* Before join planner, check the tables and expressions in query to see if
* vectorized plan can be generated.
* Parameters:
* @in node: parse or sub node to be check
* @in context: the context to mark base tables in the query
* Return:
* true if there's row store table or expressions that vector engine doesn't support
*/
static bool vector_engine_preprocess_walker(Node* node, void* rtables)
{
if (node == NULL) {
return false;
} else if (IsA(node, RangeTblRef)) {
/*
* If a row relation is found, all the plan will change to row engine.
* For subquery, we should recursively call this walker routine.
*/
int varno = ((RangeTblRef*)node)->rtindex;
RangeTblEntry* rte = rt_fetch(varno, (List*)rtables);
if (rte->rtekind == RTE_RELATION) {
if (rte->orientation == REL_ROW_ORIENTED)
return true;
} else if (rte->rtekind == RTE_SUBQUERY) {
Query* subquery = rte->subquery;
if (vector_engine_preprocess_walker((Node*)subquery, rtables)) {
return true;
}
}
} else if (IsA(node, Query)) {
Query* subquery = (Query*)node;
if (subquery->hasAggs || subquery->windowClause) {
/* Check if targetlist contains vector unsupported feature */
if (vector_engine_expression_walker((Node*)(subquery->targetList), NULL))
return true;
/* Check if qual contains vector unsupported feature */
if (vector_engine_expression_walker((Node*)(subquery->havingQual), NULL))
return true;
}
/* Check if contains unsupport windowagg option */
if (subquery->windowClause) {
ListCell* lc = NULL;
foreach (lc, subquery->windowClause) {
WindowClause* wc = (WindowClause*)lfirst(lc);
if (wc->frameOptions !=
(FRAMEOPTION_RANGE | FRAMEOPTION_START_UNBOUNDED_PRECEDING | FRAMEOPTION_END_CURRENT_ROW)) {
return true;
}
}
}
if (query_tree_walker(subquery, (bool (*)())vector_engine_preprocess_walker, (void*)subquery->rtable, 0)) {
return true;
}
}
return expression_tree_walker(node, (bool (*)())vector_engine_preprocess_walker, (void*)rtables);
}
/*
* @Description: return true if the plan node is ForeignScan node for HDFS/OBS
* foreign table.
* @param[IN] plan : current plan node
*/
static bool is_dfs_node(Plan* plan)
{
if (IsA(plan, VecForeignScan) || IsA(plan, ForeignScan)) {
ForeignScan* fs = (ForeignScan*)plan;
AssertEreport(InvalidOid != fs->scan_relid, MOD_OPT, "invalid oid of scan relation.");
ServerTypeOption srvType = getServerType(fs->scan_relid);
if (T_OBS_SERVER == srvType || T_HDFS_SERVER == srvType || T_TXT_CSV_OBS_SERVER == srvType) {
/* OBS and HDFS foreign table can NOT be in the same plan. */
if (((T_OBS_SERVER == srvType || T_TXT_CSV_OBS_SERVER == srvType) &&
u_sess->opt_cxt.srvtype == T_HDFS_SERVER) ||
((u_sess->opt_cxt.srvtype == T_OBS_SERVER || u_sess->opt_cxt.srvtype == T_TXT_CSV_OBS_SERVER) &&
T_HDFS_SERVER == srvType)) {
ereport(ERROR,
(errmodule(MOD_ACCELERATE),
errcode(ERRCODE_OPTIMIZER_INCONSISTENT_STATE),
errmsg("OBS and HDFS foreign table can NOT be in the same plan.")));
}
u_sess->opt_cxt.srvtype = srvType;
return true;
}
}
return false;
}
/*
* @Description: traverse the plan tree to find ForeignScan node for HDFS/OBS
* foreign table.
*
* @param[IN] plan : current plan node
* @return: if true, there are ForeignScan node(s) for HDFS/OBS foreign table in
* the plan tree.
*/
static bool dfs_node_exists(Plan* plan)
{
bool found = false;
check_stack_depth();
/* specical case for append and modifytable node */
if (IsA(plan, Append) || IsA(plan, ModifyTable) || IsA(plan, SubqueryScan) || IsA(plan, MergeAppend)) {
Plan* child = NULL;
List* plans = NIL;
ListCell* lc = NULL;
switch (nodeTag(plan)) {
case T_Append:
// VecAppend is the same as Append, so plan is casted to Append here.
plans = ((Append*)plan)->appendplans;
break;
case T_ModifyTable:
// VecModifyTable is the same as ModifyTable, so plan is casted to ModifyTable here.
plans = ((ModifyTable*)plan)->plans;
break;
case T_SubqueryScan:
plans = lappend(plans, ((SubqueryScan*)plan)->subplan);
break;
case T_MergeAppend:
plans = ((MergeAppend*)plan)->mergeplans;
break;
default:
break;
}
/* no leaf node */
if (plans == NIL) {
return false;
}
foreach (lc, plans) {
child = (Plan*)lfirst(lc);
/*
* NOT walk plan tree any more when find ForeignScan for HDFS/OBS
* foreign table
*/
if (dfs_node_exists(child))
return true;
}
/* there is no ForeignScan node in any subtree, so return false */
return false;
}
if (plan->lefttree == NULL && plan->righttree == NULL) {
/* plan is leaf node */
return is_dfs_node(plan);
}
/*
* return true directly, if find ForeignScan for HDFS/OBS foreign table in
* lefttree
*/
if (plan->lefttree)
found = dfs_node_exists(plan->lefttree);
if (found) {
return true;
}
/* walk righttree */
if (plan->righttree)
found = dfs_node_exists(plan->righttree);
if (found) {
return true;
}
/* there is no ForeignScan node in any subtree, so return false */
return false;
}
static bool contains_pushdown_constraint(Plan* plan)
{
/* is there subplan in plan node? */
List* subplan = check_subplan_list(plan);
/* if thers is/are subplan in child tree, do NOT pushdown the child plan
* tree to the compute pool.
*/
if (list_length(subplan) != 0) {
ereport(DEBUG1,
(errmsg("Can not push down the plan node "
"because there is/are subplan(s) in the targetlist or qual of the plan node.")));
return true;
}
/* is there (vartype > FirstNormalObjectId) in plan node? */
List* rs = check_vartype_list(plan);
/* if vartype > FirstNormalObjectId, do NOT pushdown the plan node to the
* compute pool.
*/
if (list_length(rs) != 0) {
ereport(DEBUG1,
(errmsg("Can not push down the plan node "
"because there is user-defined data type in the targetlist or qual of the plan node.")));
return true;
}
/* is there (func_oid > FirstNormalObjectId) in plan node? */
rs = check_func_list(plan);
/* if func_oid > FirstNormalObjectId, do NOT pushdown the plan node to the
* compute pool.
*/
if (list_length(rs) != 0) {
ereport(DEBUG1,
(errmsg("Can not push down the plan node "
"because there is user-defined function in the targetlist or qual of the plan node.")));
return true;
}
return false;
}
/*
* @Description: return true if the plan node can run on the compute pool.
*
* @param[IN] plan : current plan node
*/
static bool is_pushdown_node(Plan* plan)
{
const char* planname = "unknown";
bool found = false;
if (IsA(plan, Agg)) {
found = true;
planname = "Agg";
}
if (IsA(plan, ForeignScan)) {
planname = "ForeignScan";
ForeignScan* fs = (ForeignScan*)plan;
AssertEreport(InvalidOid != fs->scan_relid, MOD_OPT, "invalid oid of scan relation.");
ServerTypeOption srvType = getServerType(fs->scan_relid);
if (T_OBS_SERVER == srvType || T_HDFS_SERVER == srvType || T_TXT_CSV_OBS_SERVER == srvType) {
AssertEreport(InvalidOid != fs->scan_relid, MOD_OPT, "invalid oid of scan relation.");
found = (fs->objectNum > u_sess->pgxc_cxt.NumDataNodes);
Relation rel = heap_open(fs->scan_relid, NoLock);
const char* relname = RelationGetRelationName(rel);
heap_close(rel, NoLock);
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("relname: %s, file number: %ld, dn number: %d",
relname,
fs->objectNum,
u_sess->pgxc_cxt.NumDataNodes)));
if (!found) {
ereport(LOG,
(errmodule(MOD_ACCELERATE),
errmsg("%s: relname: %s, file number: %ld, dn number: %d",
planname,
relname,
fs->objectNum,
u_sess->pgxc_cxt.NumDataNodes)));
}
}
}
if (!found) {
return false;
}
if (contains_pushdown_constraint(plan))
return false;
return true;
}
/*
* @Description: modify the subplan to insert 2 gather nodes atop of the child.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
* @return: Plan*: accelerated sub plan(insert 2 gather node atop of the child)
*/
static Plan* insert_gather_node(Plan* child, PlannerInfo* root)
{
RemoteQuery* plan_router = NULL;
RemoteQuery* scan_gather = NULL;
Index dummy_rtindex;
char* rte_name = NULL;
RangeTblEntry* dummy_rte = NULL; /* RTE for the remote query node being added. */
/* make "PLAN ROUTER" RemoteQuery node */
plan_router = makeNode(RemoteQuery);
plan_router->position = PLAN_ROUTER;
/*
* Create and append the dummy range table entry to the range table.
* Note that this modifies the master copy the caller passed us, otherwise
* e.g EXPLAIN VERBOSE will fail to find the rte the Vars built below.
* NOTICE: If there is only a single table, should we set the table name as
* the name of the rte?
*/
rte_name = "_PLAN_ROUTER_";
dummy_rte = make_dummy_remote_rte(rte_name, makeAlias("_PLAN_ROUTER_", NIL));
root->parse->rtable = lappend(root->parse->rtable, dummy_rte);
dummy_rtindex = list_length(root->parse->rtable);
plan_router->scan.scanrelid = dummy_rtindex;
plan_router->scan.plan.targetlist = child->targetlist;
/*
* RemoteQuery is inserted between upper node and foreign scan, and exec_nodes
* is meaningless for RemoteQuery and foreign scan. BUT upper node needs
* exec_nodes, so do a copy of exec_nodes here for upper node.
*/
inherit_plan_locator_info((Plan*)plan_router, child);
/*
* ExecInitRemoteQuery() uses base_tlist as scan tupledesc, so do a copy of
* the plan->targetlist of child plan node here.
*/
plan_router->base_tlist = NIL;
/* appropriate values for data fields in RemoteQuery */
plan_router->is_simple = true;
plan_router->read_only = true;
plan_router->is_temp = false;
plan_router->force_autocommit = true;
plan_router->exec_type = EXEC_ON_DATANODES;
plan_router->spool_no_data = true;
plan_router->poll_multi_channel = true;
/* get better estimates */
plan_router->scan.plan.exec_type = EXEC_ON_DATANODES;
// queryonobs, more think about cost estimate
plan_router->scan.plan.total_cost = child->total_cost;
plan_router->scan.plan.startup_cost = child->startup_cost;
plan_router->scan.plan.plan_rows = child->plan_rows;
plan_router->scan.plan.plan_width = child->plan_width;
/* make "SCAN GATHER" RemoteQuery node */
scan_gather = (RemoteQuery*)copyObject(plan_router);
scan_gather->position = SCAN_GATHER;
scan_gather->poll_multi_channel = true;
rte_name = "_SCAN_GATHER_";
dummy_rte = make_dummy_remote_rte(rte_name, makeAlias("_SCAN_GATHER_", NIL));
root->parse->rtable = lappend(root->parse->rtable, dummy_rte);
dummy_rtindex = list_length(root->parse->rtable);
scan_gather->scan.scanrelid = dummy_rtindex;
/* assemble the child, "plan router" and "scan gather" together */
scan_gather->scan.plan.lefttree = child;
plan_router->scan.plan.lefttree = (Plan*)scan_gather;
add_metadata(child, root);
return (Plan*)plan_router;
}
/*
* @Description: append metadata to the plan node.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
* @return: void
*/
static void add_metadata(Plan* plan, PlannerInfo* root)
{
RangeTblEntry* rte = NULL;
while (plan->lefttree)
plan = plan->lefttree;
if (IsA(plan, ForeignScan)) {
ForeignScan* scan_plan = (ForeignScan*)plan;
rte = planner_rt_fetch(scan_plan->scan.scanrelid, root);
/*
* put the meta data, the configuration options of the foreign table into
* the ForeignScan node.
*/
AssertEreport(rte->relid != InvalidOid, MOD_OPT, "invalid oid of foreign scan relation.");
if (isObsOrHdfsTableFormTblOid(rte->relid) || IS_OBS_CSV_TXT_FOREIGN_TABLE(rte->relid)) {
/* package the meta data of the HDFS/OBS foreign table. */
Relation r = heap_open(rte->relid, NoLock);
scan_plan->rel = make_relmeta(r);
/* package the configuration options of the HDFS/OBS foreign table. */
scan_plan->options = setForeignOptions(rte->relid);
heap_close(r, NoLock);
}
}
}
/*
* @Description: adjust pl size if file format is orc.
*
* @param[IN] relid : relation oid of the obs foreign table
* @param[IN] plan_width : current plan width in ForeignScan node.
* @param[IN] pl_size : input pl size
* @param[IN/OUT] width: if not null, return the width of all columns in one relation.
* @return: void
*/
uint64 adjust_plsize(Oid relid, uint64 plan_width, uint64 pl_size, uint64* width)
{
uint64 rel_width = 0;
Relation rel = heap_open(relid, NoLock);
TupleDesc desc = rel->rd_att;
for (int i = 0; i < desc->natts; i++) {
Oid typoid = desc->attrs[i]->atttypid;
int32 typmod = desc->attrs[i]->atttypmod;
rel_width += get_typavgwidth(typoid, typmod);
}
heap_close(rel, NoLock);
if (width != NULL)
*width = rel_width;
double real_rate = 1.0;
if (rel_width > 0 && plan_width > 0 && plan_width < rel_width)
real_rate = double(plan_width) / double(rel_width);
pl_size = uint64((double)pl_size / real_rate);
return pl_size;
}
/*
* @Description: check whether push down the plan to the compute pool.
*
* @param[IN] plan : current plan node
* @param[IN] relname : the relation name of current ForeignScan node.
* @return: if true, the plan will run in the compute pool.
*
* PL do not need to be adjusted if data format is text/csv, becasue all data
* in files/objects will be scanned.
*
* If data format is orc, we need to adjust the PL according to the plan width
* of current foreign scan node and total width of all attributes in the relation.
* Because there is meta data in a orc file, these meta data can help reader
* to skip data that is unused in the query.
* For example, there are 10 columns in one relation, but there are just 2 columns
* in some query, so reader can skip 8 unused columns data in orc file.
*/
static bool estimate_acceleration_cost_for_obs(Plan* plan, const char* relname)
{
AssertEreport(plan, MOD_OPT, "invalid plan node.");
ereport(DEBUG5, (errmodule(MOD_ACCELERATE), errmsg("in %s", __FUNCTION__)));
ForeignScan* fsplan = (ForeignScan*)get_foreign_scan(plan);
List* fOptions = getFdwOptions(fsplan->scan_relid);
char* file_format = getFTOptionValue(fOptions, OPTION_NAME_FORMAT);
AssertEreport(file_format, MOD_OPT, "invalid file format.");
ComputePoolConfig** conf = get_cp_conninfo();
uint64 pl_size = conf[0]->pl;
pl_size *= 1024;
int rpthreshold = conf[0]->rpthreshold;
if (rpthreshold < 2) {
rpthreshold = 2;
}
uint64 rel_width = 0;
if (!pg_strcasecmp(file_format, "orc"))
pl_size = adjust_plsize(fsplan->scan_relid, (uint64)((Plan*)fsplan)->plan_width, pl_size, &rel_width);
int fnum = 0;
uint64 totalSize = get_datasize(plan, u_sess->opt_cxt.srvtype, &fnum);
uint64 size_per_file;
if (0 == fnum)
size_per_file = 0;
else
size_per_file = totalSize / fnum;
uint64 fnum_per_thread = fnum / (u_sess->pgxc_cxt.NumDataNodes * u_sess->opt_cxt.query_dop);
uint64 size_per_thread = totalSize / (u_sess->pgxc_cxt.NumDataNodes * u_sess->opt_cxt.query_dop);
if (unlikely(pl_size == 0)) {
ereport(ERROR, (errcode(ERRCODE_DIVISION_BY_ZERO), errmsg("pl_size should not be zero")));
}
uint64 rp_per_thread = size_per_thread / pl_size + 1;
bool fnum_cond = fnum_per_thread < 2;
bool rp_cond = rp_per_thread < (uint64)rpthreshold;
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("cp_runtime_info->freerp: %d, config->pl: %dKB",
u_sess->wlm_cxt->cp_runtime_info->freerp,
conf[0]->pl)));
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("u_sess->pgxc_cxt.NumDataNodes: %d, query_dop: %d",
u_sess->pgxc_cxt.NumDataNodes,
u_sess->opt_cxt.query_dop)));
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("relname: %s, totalSize: %lu, filenum: %d, size_per_file: %lu, fnum_per_thread: %lu, rp_per_thread: "
"%lu",
relname,
totalSize,
fnum,
size_per_file,
fnum_per_thread,
rp_per_thread)));
/* save the estimate detail to fdw_private list. */
if (u_sess->attr.attr_sql.show_acce_estimate_detail && u_sess->opt_cxt.srvtype == T_OBS_SERVER) {
uint64 orig_pl = uint64(conf[0]->pl) * 1024;
StringInfo estimate_detail = makeStringInfo();
appendStringInfo(
estimate_detail, "file format: %s, pl: %lu, rp threshold: %d, ", file_format, orig_pl, rpthreshold);
appendStringInfo(estimate_detail, "total size: %lu, file number: %d, ", totalSize, fnum);
appendStringInfo(
estimate_detail, "data size/thread: %lu, file number/thread: %lu, ", size_per_thread, fnum_per_thread);
appendStringInfo(estimate_detail,
"relation width: %lu, plan width: %d, adjusted pl: %lu, ",
rel_width,
fsplan->scan.plan.plan_width,
pl_size);
appendStringInfo(estimate_detail, "rp/thread: %lu", rp_per_thread);
Value* val = makeString(estimate_detail->data);
DefElem* elem = makeDefElem((char*)ESTIMATION_ITEM, (Node*)val);
fsplan->fdw_private = lappend(fsplan->fdw_private, elem);
}
if (fnum_cond || rp_cond) {
ereport(DEBUG1, (errmodule(MOD_ACCELERATE), errmsg("scan %s at the local cluster", relname)));
return false;
}
ereport(DEBUG1, (errmodule(MOD_ACCELERATE), errmsg("scan %s at the compute pool", relname)));
return true;
}
static void init_optimizer_context(PlannerGlobal* glob)
{
glob->plannerContext = (PlannerContext*)palloc0(sizeof(PlannerContext));
glob->plannerContext->plannerMemContext = AllocSetContextCreate(CurrentMemoryContext,
"PlannerContext",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
if (u_sess->opt_cxt.skew_strategy_opt != SKEW_OPT_OFF) {
glob->plannerContext->dataSkewMemContext = AllocSetContextCreate(glob->plannerContext->plannerMemContext,
"DataSkewContext",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
}
}
static void deinit_optimizer_context(PlannerGlobal* glob)
{
if (IS_NEED_FREE_MEMORY_CONTEXT(glob->plannerContext->plannerMemContext)) {
MemoryContextDelete(glob->plannerContext->plannerMemContext);
glob->plannerContext->plannerMemContext = NULL;
glob->plannerContext->dataSkewMemContext = NULL;
}
}
#ifdef ENABLE_UT
bool estimate_acceleration_cost_for_HDFS(Plan* plan, const char* relname)
#else
static bool estimate_acceleration_cost_for_HDFS(Plan* plan, const char* relname)
#endif
{
AssertEreport(plan, MOD_OPT, "invalid plan node.");
ereport(DEBUG5, (errmodule(MOD_ACCELERATE), errmsg("in %s", __FUNCTION__)));
if (u_sess->wlm_cxt->cp_runtime_info->dnnum == 0) {
ereport(DEBUG1, (errmodule(MOD_ACCELERATE), "No available dn in the compute pool."));
return false;
}
int fnum = 0;
uint64 totalSize = get_datasize(plan, u_sess->opt_cxt.srvtype, &fnum);
uint64 size_per_file;
if (0 == fnum)
size_per_file = 0;
else
size_per_file = totalSize / fnum;
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE), errmsg("relname: %s, totalSize: %lu, filenum: %d", relname, totalSize, fnum)));
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("u_sess->pgxc_cxt.NumDataNodes: %d, query_dop: %d",
u_sess->pgxc_cxt.NumDataNodes,
u_sess->opt_cxt.query_dop)));
uint64 fnum_per_thread = fnum / (u_sess->pgxc_cxt.NumDataNodes * u_sess->opt_cxt.query_dop);
uint64 size_per_thread = size_per_file * fnum_per_thread;
if (fnum_per_thread < 2 || size_per_thread < (uint64)u_sess->attr.attr_sql.acce_min_datasize_per_thread * 1024) {
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("scan %s at local cluster, reason: fnum_per_thread(%lu) < 2 || size_per_thread(%lu) < "
"u_sess->attr.attr_sql.acce_min_datasize_per_thread(%dMB)",
relname,
fnum_per_thread,
size_per_thread,
u_sess->attr.attr_sql.acce_min_datasize_per_thread / 1024)));
return false;
}
ereport(DEBUG1, (errmodule(MOD_ACCELERATE), errmsg("scan %s at the compute pool", relname)));
return true;
}
/*
* @Description:
*
* @param[IN] plan : current plan node
* @return: true if the plan node can be pushdown.
*/
static bool estimate_acceleration_cost(Plan* plan)
{
ForeignScan* fs = (ForeignScan*)get_foreign_scan(plan);
Relation rel = heap_open(fs->scan_relid, NoLock);
const char* rname = RelationGetRelationName(rel);
char* relname = pstrdup(rname);
heap_close(rel, NoLock);
DistributeBy* dist_type = getTableDistribution(fs->scan_relid);
AssertEreport(dist_type, MOD_OPT, "The distributeBy object is null.");
if (NULL == dist_type) {
ereport(DEBUG1, (errmodule(MOD_ACCELERATE), "no distribute mode in relation: %s.", relname));
return false;
}
if (DISTTYPE_ROUNDROBIN != dist_type->disttype) {
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE), "just foreign table with roundrobin option can run in the compute pool."));
return false;
}
if (u_sess->opt_cxt.srvtype == T_HDFS_SERVER) {
return estimate_acceleration_cost_for_HDFS(plan, relname);
} else if (u_sess->opt_cxt.srvtype == T_OBS_SERVER || u_sess->opt_cxt.srvtype == T_TXT_CSV_OBS_SERVER) {
return estimate_acceleration_cost_for_obs(plan, relname);
} else {
return false;
}
}
/*
* @Description: traverse the plan tree to add "PLAN ROUTER" and "SCAN GATHER"
* node for HDFS/OBS foreign table.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
* @return: if true, the lefttree is left-hand tree and can be pushed down to
* the compute pool.
*/
static bool walk_plan(Plan* plan, PlannerInfo* root)
{
check_stack_depth();
if (IsA(plan, Append) || IsA(plan, ModifyTable) || IsA(plan, SubqueryScan) || IsA(plan, MergeAppend)) {
walk_set_plan(plan, root);
return false;
} else {
return walk_normal_plan(plan, root);
}
}
/*
* @Description: traverse the subplans of the append and modifytable node to add
* "PLAN ROUTER" and "SCAN GATHER" node for HDFS/OBS foreign table.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
*/
static void walk_set_plan(Plan* plan, PlannerInfo* root)
{
RelOptInfo* rel = NULL;
List* plans = NIL;
switch (nodeTag(plan)) {
case T_Append:
plans = ((Append*)plan)->appendplans;
break;
case T_ModifyTable:
plans = ((ModifyTable*)plan)->plans;
break;
case T_SubqueryScan:
if (((SubqueryScan*)plan)->subplan == NULL)
return;
plans = lappend(plans, ((SubqueryScan*)plan)->subplan);
/* Need to look up the subquery's RelOptInfo, since we need its subroot */
rel = find_base_rel(root, ((SubqueryScan*)plan)->scan.scanrelid);
AssertEreport(rel->subroot, MOD_OPT, "invalid subroot for the relation.");
root = rel->subroot;
break;
case T_MergeAppend:
plans = ((MergeAppend*)plan)->mergeplans;
break;
default:
break;
}
if (plans == NIL) {
return;
}
ListCell* lc = NULL;
List* new_plans = NIL;
foreach (lc, plans) {
Plan* child = (Plan*)lfirst(lc);
if (walk_plan(child, root) && estimate_acceleration_cost(child)) {
child = insert_gather_node(child, root);
u_sess->opt_cxt.has_obsrel = true;
u_sess->opt_cxt.disable_dop_change = true;
}
new_plans = lappend(new_plans, child);
}
switch (nodeTag(plan)) {
case T_Append:
((Append*)plan)->appendplans = new_plans;
break;
case T_ModifyTable:
((ModifyTable*)plan)->plans = new_plans;
break;
case T_SubqueryScan:
((SubqueryScan*)plan)->subplan = (Plan*)linitial(new_plans);
rel->subplan = ((SubqueryScan*)plan)->subplan;
break;
case T_MergeAppend:
((MergeAppend*)plan)->mergeplans = new_plans;
break;
default:
break;
}
}
/*
* @Description: traverse the lefttree and righttree to add "PLAN ROUTER" and
* "SCAN GATHER" node for HDFS/OBS foreign table.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
* @return: case 1: true if plan is the leaf node and (T_ForeignScan or
* T_VecForeignScan);
* case 2: true if plan is the left-hand tree, the subplan of
* lefttree returns true, and (T_Agg or T_VecAgg)
* return false for other cases;
*/
static bool walk_normal_plan(Plan* plan, PlannerInfo* root)
{
bool left_found = false;
bool right_found = false;
/* can sub-tree pushdown ? */
if (plan->lefttree)
left_found = walk_plan(plan->lefttree, root);
if (plan->righttree)
right_found = walk_plan(plan->righttree, root);
/* leaf node */
if (plan->lefttree == NULL && plan->righttree == NULL) {
/* T_VecForeignScan, T_ForeignScan */
if (is_pushdown_node(plan))
return true;
return false;
}
/*
* intermediate node
*
* left-hand tree
*/
if (left_found && plan->righttree == NULL && is_pushdown_node(plan)) {
return true;
}
/* find right position and insert gather node */
if (left_found && estimate_acceleration_cost(plan->lefttree)) {
plan->lefttree = insert_gather_node(plan->lefttree, root);
u_sess->opt_cxt.has_obsrel = true;
u_sess->opt_cxt.disable_dop_change = true;
}
if (right_found && estimate_acceleration_cost(plan->righttree)) {
plan->righttree = insert_gather_node(plan->righttree, root);
u_sess->opt_cxt.has_obsrel = true;
u_sess->opt_cxt.disable_dop_change = true;
}
return false;
}
/*
* @Description: return true if HDFS/OBS foreign scan node found.
*
* @param[IN] plantree : the root of the plan tree
* @param[IN] glob : for subplan
* @return: true if HDFS/OBS foreign scan node found
*/
static bool has_dfs_node(Plan* plantree, PlannerGlobal* glob)
{
bool has_obsrel = false;
/* test guc option: u_sess->attr.attr_sql.acceleration_with_compute_pool */
if (!u_sess->attr.attr_sql.acceleration_with_compute_pool) {
return false;
}
if (!IS_PGXC_COORDINATOR || IsInitdb || u_sess->analyze_cxt.is_under_analyze) {
return false;
}
ereport(DEBUG5, (errmodule(MOD_ACCELERATE), errmsg("in %s", __FUNCTION__)));
has_obsrel = dfs_node_exists(plantree);
if (has_obsrel) {
return true;
} else {
ListCell* lp = NULL;
foreach (lp, glob->subplans) {
Plan* plan = (Plan*)lfirst(lp);
has_obsrel = dfs_node_exists(plan);
if (has_obsrel) {
return true;
}
}
}
return false;
}
/*
* @Description: return true if precheck are ok.
*
* @return: true if precheck are ok
*/
static bool precheck_before_accelerate()
{
ereport(DEBUG5, (errmodule(MOD_ACCELERATE), errmsg("in %s", __FUNCTION__)));
char* version = NULL;
bool available = false;
MemoryContext current_ctx;
PGXCNodeAllHandles* handles = NULL;
available = true;
current_ctx = CurrentMemoryContext;
PG_TRY();
{
handles = connect_compute_pool(u_sess->opt_cxt.srvtype);
get_cp_runtime_info(handles->datanode_handles[0]);
if (u_sess->wlm_cxt->cp_runtime_info == NULL) {
ereport(ERROR,
(errmodule(MOD_ACCELERATE),
errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("Failed to get the runtime info from the compute pool.")));
}
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("cp_runtime_info->dnnum : %d", u_sess->wlm_cxt->cp_runtime_info->dnnum)));
if (u_sess->opt_cxt.srvtype == T_OBS_SERVER) {
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("cp_runtime_info->freerp : %d", u_sess->wlm_cxt->cp_runtime_info->freerp)));
} else if (u_sess->opt_cxt.srvtype == T_HDFS_SERVER) {
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("cp active statements : %d", u_sess->wlm_cxt->cp_runtime_info->freerp)));
}
if (u_sess->wlm_cxt->cp_runtime_info->version)
ereport(DEBUG1,
(errmodule(MOD_ACCELERATE),
errmsg("cp_runtime_info->version: %s", u_sess->wlm_cxt->cp_runtime_info->version)));
if (!check_version_compatibility(u_sess->wlm_cxt->cp_runtime_info->version)) {
ereport(ERROR,
(errmodule(MOD_ACCELERATE),
errcode(ERRCODE_OPTIMIZER_INCONSISTENT_STATE),
errmsg("version is not compatible between local cluster and the compute pool")));
}
}
PG_CATCH();
{
/*
* the compute pool is unavailable, so reset memory contex and clear
* error stack.
*/
MemoryContextSwitchTo(current_ctx);
/* Save error info */
ErrorData* edata = CopyErrorData();
ereport(WARNING,
(errmodule(MOD_ACCELERATE),
errmsg("The compute pool is unavailable temporarily "
"when acceleration_with_compute_pool is on!\nreason: %s",
edata->message)));
FlushErrorState();
FreeErrorData(edata);
available = false;
}
PG_END_TRY();
if (version != NULL)
pfree_ext(version);
release_conn_to_compute_pool();
if (available == false) {
return false;
}
return true;
}
/*
* @Description: Try to accelerate plan by pushing scan/agg node down to the
* compute pool, and just for HDFS/OBS foreign table.
*
* @param[IN] top_plan : current plan node
* @param[IN] root : PlannerInfo*
* @return: Plan*: accelerated plan(insert 2 gather node), or leave unchanged
*/
static Plan* try_accelerate_plan(Plan* plan, PlannerInfo* root, PlannerGlobal* glob)
{
/* test guc option: u_sess->attr.attr_sql.acceleration_with_compute_pool */
if (!u_sess->attr.attr_sql.acceleration_with_compute_pool) {
return plan;
}
if (!IS_PGXC_COORDINATOR || IsInitdb || u_sess->analyze_cxt.is_under_analyze) {
return plan;
}
if (is_feature_disabled(EXPRESS_CLUSTER) == true) {
ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Express Cluster is not supported.")));
return plan;
}
ereport(DEBUG5, (errmodule(MOD_ACCELERATE), errmsg("in %s", __FUNCTION__)));
u_sess->opt_cxt.has_obsrel = false;
/* some precheck before do real job. */
if (false == precheck_before_accelerate()) {
return plan;
}
/* try to modify plan to add "PLAN ROUTER" and "SCAN GATHER" node */
(void)walk_plan(plan, root);
List* new_subplans = NIL;
ListCell* lc1 = NULL;
ListCell* lc2 = NULL;
forboth(lc1, glob->subplans, lc2, glob->subroots)
{
Plan* aplan = (Plan*)lfirst(lc1);
PlannerInfo* aroot = (PlannerInfo*)lfirst(lc2);
(void)walk_plan(aplan, aroot);
new_subplans = lappend(new_subplans, aplan);
}
glob->subplans = new_subplans;
return plan;
}
bool enable_check_implicit_cast()
{
if (u_sess->attr.attr_common.check_implicit_conversions_for_indexcol &&
!u_sess->attr.attr_sql.enable_fast_query_shipping && IS_PGXC_COORDINATOR && !IsConnFromCoord())
return true;
return false;
}
static void find_implicit_cast_var(Query* query)
{
g_index_vars = NIL;
ImplicitCastVarContext* ctx = (ImplicitCastVarContext*)palloc0(sizeof(ImplicitCastVarContext));
(void)implicit_cast_var_walker((Node*)query, (void*)ctx);
g_index_vars = ctx->vars;
}
static bool implicit_cast_var_walker(Node* node, void* context)
{
if (node == NULL)
return false;
ImplicitCastVarContext* ctx = (ImplicitCastVarContext*)context;
if (IsA(node, Query)) {
Query* query = (Query*)node;
ctx->queries = lappend(ctx->queries, query);
bool result = query_tree_walker(query, (bool (*)())implicit_cast_var_walker, context, 0);
ctx->queries = list_delete_ptr(ctx->queries, query);
return result;
} else if (IsA(node, FuncExpr)) {
FuncExpr* func = (FuncExpr*)node;
if (func->funcformat == COERCE_IMPLICIT_CAST && list_length(func->args) == 1) {
Node* arg = (Node*)linitial(func->args);
save_implicit_cast_var(arg, context);
return false;
}
} else
return expression_tree_walker(node, (bool (*)())implicit_cast_var_walker, (void*)context);
return false;
}
static void save_implicit_cast_var(Node* node, void* context)
{
if (!IsA(node, Var))
return;
Var* var = (Var*)node;
if (var->varlevelsup != 0 || var->varattno < 1)
return;
ImplicitCastVarContext* ctx = (ImplicitCastVarContext*)context;
Query* query = (Query*)llast(ctx->queries);
RangeTblEntry* rtable = (RangeTblEntry*)list_nth(query->rtable, var->varno - 1);
if (rtable == NULL)
return;
if (rtable->rtekind != RTE_RELATION)
return;
Relation rel = heap_open(rtable->relid, AccessShareLock);
IndexVar* new_node = makeNode(IndexVar);
new_node->relid = rtable->relid;
new_node->attno = var->varattno;
new_node->indexcol = false;
new_node->relname = pstrdup(rtable->relname);
new_node->attname = pstrdup(rel->rd_att->attrs[var->varattno - 1]->attname.data);
heap_close(rel, AccessShareLock);
ctx->vars = lappend(ctx->vars, new_node);
}
/*
* notify user to check plan for potential problem, if does not
* create index path for index column with type conversion.
*/
static void check_index_column()
{
bool found = false;
ListCell* lc = NULL;
StringInfo si;
si = makeStringInfo();
foreach (lc, g_index_vars) {
IndexVar* var = (IndexVar*)lfirst(lc);
if (var == NULL || !var->indexcol)
continue;
if (!var->indexpath) {
found = true;
appendStringInfo(si, "\"%s\".\"%s\", ", var->relname, var->attname);
}
}
g_index_vars = NIL;
if (found) {
si->data[si->len - 2] = '\0';
ereport(ERROR,
(errcode(ERRCODE_WARNING),
errmsg("There is no optional index path for index column: %s.\n"
"Please check for potential performance problem.",
si->data)));
}
}
#ifdef ENABLE_MULTIPLE_NODES
/*
* @Description: find (vec)agg -> (vec)foreignscan structure.
*
* @param[IN] plan : current plan node
*/
static bool find_right_agg(Plan* plan)
{
/* check plan structure, just for agg -> foreignscan */
if (plan && (IsA(plan, Agg) || IsA(plan, VecAgg))) {
ereport(DEBUG1, (errmodule(MOD_COOP_ANALYZE), errmsg("Agg node found")));
/* Is there any subplan, vartyep, or user-defined function in agg node? */
if (contains_pushdown_constraint(plan))
return false;
} else
return false;
if (plan->lefttree && (IsA(plan->lefttree, ForeignScan) || IsA(plan->lefttree, VecForeignScan))) {
ereport(DEBUG1, (errmodule(MOD_COOP_ANALYZE), errmsg("ForeignScan node found")));
} else
return false;
/* check fdw type */
ForeignScan* fscan = (ForeignScan*)plan->lefttree;
if (IsSpecifiedFDWFromRelid(fscan->scan_relid, GC_FDW)) {
ereport(DEBUG1, (errmodule(MOD_COOP_ANALYZE), errmsg("ForeignScan node is gc_fdw type")));
} else
return false;
/* Is there any subplan, vartyep, or user-defined function in foreignscan node? */
if (contains_pushdown_constraint((Plan*)fscan))
return false;
/* if there is local qual in foreignscan node, don't pushdown agg to remote server. */
if (plan->lefttree->qual)
return false;
return true;
}
/*
* @Description: traverse the plan to deparse agg node.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
*/
static bool walk_plan_for_coop_analyze(Plan* plan, PlannerInfo* root)
{
Query* query = root->parse;
/*
* These optimizations do not work in presence of the window functions,
* because of the target list adjustments. The targetlist set for the passed
* in Group/Agg plan nodes contains window functions if any, but gets
* changed while planning for windowing. So, for now stay away :)
*/
if (query->hasWindowFuncs)
return false;
/*
* To Ap function, all aggregation operators need be computed on coordinator when plan can not push-down
* because need do more than once group by handles and different group
* need separate by GROUPINGID.
*/
if (query->groupingSets)
return false;
check_stack_depth();
if (IsA(plan, Append) || IsA(plan, VecAppend) || IsA(plan, ModifyTable) || IsA(plan, VecModifyTable) ||
IsA(plan, SubqueryScan) || IsA(plan, VecSubqueryScan) || IsA(plan, MergeAppend) || IsA(plan, VecMergeAppend)) {
walk_set_plan_for_coop_analyze(plan, root);
return false;
} else {
return walk_normal_plan_for_coop_analyze(plan, root);
}
}
/*
* @Description: traverse the subplans of the append and modifytable node.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
*/
static void walk_set_plan_for_coop_analyze(Plan* plan, PlannerInfo* root)
{
RelOptInfo* rel = NULL;
List* plans = NIL;
if (IsA(plan, Append) || IsA(plan, VecAppend)) {
plans = ((Append*)plan)->appendplans;
}
if (IsA(plan, ModifyTable) || IsA(plan, VecModifyTable)) {
plans = ((ModifyTable*)plan)->plans;
}
if (IsA(plan, SubqueryScan) || IsA(plan, VecSubqueryScan)) {
if (((SubqueryScan*)plan)->subplan == NULL)
return;
plans = lappend(plans, ((SubqueryScan*)plan)->subplan);
/* Need to look up the subquery's RelOptInfo, since we need its subroot */
int relid = ((SubqueryScan*)plan)->scan.scanrelid;
AssertEreport(relid > 0, MOD_OPT, "invalid oid of scan relation.");
if (relid < root->simple_rel_array_size) {
rel = root->simple_rel_array[relid];
if (rel != NULL && rel->subroot && rel->reloptkind == RELOPT_BASEREL && rel->rtekind == RTE_SUBQUERY) {
root = rel->subroot;
} else
return;
} else
return;
}
if (IsA(plan, MergeAppend) || IsA(plan, VecMergeAppend)) {
plans = ((MergeAppend*)plan)->mergeplans;
}
if (plans == NIL) {
return;
}
ListCell* lc = NULL;
List* new_plans = NIL;
foreach (lc, plans) {
Plan* child = (Plan*)lfirst(lc);
if (walk_plan_for_coop_analyze(child, root) && find_right_agg(child)) {
child = deparse_agg_node(child, root);
}
new_plans = lappend(new_plans, child);
}
if (IsA(plan, Append) || IsA(plan, VecAppend)) {
((Append*)plan)->appendplans = new_plans;
}
if (IsA(plan, ModifyTable) || IsA(plan, VecModifyTable)) {
((ModifyTable*)plan)->plans = new_plans;
}
if (IsA(plan, SubqueryScan) || IsA(plan, VecSubqueryScan)) {
((SubqueryScan*)plan)->subplan = (Plan*)linitial(new_plans);
rel->subplan = ((SubqueryScan*)plan)->subplan;
}
if (IsA(plan, MergeAppend) || IsA(plan, VecMergeAppend)) {
((MergeAppend*)plan)->mergeplans = new_plans;
}
}
/*
* @Description: traverse the lefttree and righttree to find (vec)agg -> (vec)foreignscan
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
* @return: case 1: true if plan is the leaf node and (T_ForeignScan or
* T_VecForeignScan);
* case 2: true if plan is the left-hand tree, the subplan of
* lefttree returns true, and (T_Agg or T_VecAgg)
* return false for other cases;
*/
static bool walk_normal_plan_for_coop_analyze(Plan* plan, PlannerInfo* root)
{
bool left_found = false;
bool right_found = false;
/* can sub-tree pushdown ? */
if (plan->lefttree)
left_found = walk_plan_for_coop_analyze(plan->lefttree, root);
if (plan->righttree)
right_found = walk_plan_for_coop_analyze(plan->righttree, root);
/* leaf node */
if (plan->lefttree == NULL && plan->righttree == NULL) {
if (IsA(plan, Agg) || IsA(plan, ForeignScan) || IsA(plan, VecAgg) || IsA(plan, VecForeignScan))
return true;
return false;
}
/*
* intermediate node
*
* left-hand tree
*/
if (left_found && plan->righttree == NULL &&
(IsA(plan, Agg) || IsA(plan, ForeignScan) || IsA(plan, VecAgg) || IsA(plan, VecForeignScan))) {
return true;
}
/* find right position and deparse agg node */
if (left_found && find_right_agg(plan->lefttree)) {
plan->lefttree = deparse_agg_node(plan->lefttree, root);
}
if (right_found && find_right_agg(plan->righttree)) {
plan->righttree = deparse_agg_node(plan->righttree, root);
}
return false;
}
/*
* return true if pg relation exists.
*/
static bool has_pgfdw_rel(PlannerInfo* root)
{
for (int i = 1; i < root->simple_rel_array_size; i++) {
RangeTblEntry* rte = root->simple_rte_array[i];
if (rte->rtekind == RTE_RELATION && (rte->relkind == RELKIND_FOREIGN_TABLE || rte->relkind == RELKIND_STREAM)
&& IsSpecifiedFDWFromRelid(rte->relid, GC_FDW)) {
return true;
}
}
return false;
}
#endif
/*
* @Description: just for cooperation analysis on client cluster,
* try to deparse agg node to remote sql in ForeignScan node.
*
* @param[IN] top_plan : current plan node
* @param[IN] root : PlannerInfo*
* @return: Plan*: remote sql includes agg functions, or leave unchanged
*/
static Plan* try_deparse_agg(Plan* plan, PlannerInfo* root, PlannerGlobal* glob)
#ifndef ENABLE_MULTIPLE_NODES
{
return plan;
}
#else
{
if (IS_PGXC_DATANODE || !u_sess->attr.attr_sql.enable_agg_pushdown_for_cooperation_analysis)
return plan;
MemoryContext current_ctx = CurrentMemoryContext;
/* walk plantree to deparse agg node to remote sql in foreign scan. */
PG_TRY();
{
if (true == has_pgfdw_rel(root)) {
/* try to modify plan to covert agg node to sql */
(void)walk_plan_for_coop_analyze(plan, root);
}
}
PG_CATCH();
{
/* reset memory contex and clear error stack. */
MemoryContextSwitchTo(current_ctx);
/* Save error info */
ErrorData* edata = CopyErrorData();
StringInfo warning = makeStringInfo();
appendStringInfo(warning, "Failed to deparse agg node. cause: %s", edata->message);
glob->hint_warning = lappend(glob->hint_warning, makeString(warning->data));
FlushErrorState();
FreeErrorData(edata);
}
PG_END_TRY();
/* walk all sub-plantree to deparse agg node to remote sql in foreign scan. */
ListCell* lc1 = NULL;
ListCell* lc2 = NULL;
forboth(lc1, glob->subplans, lc2, glob->subroots)
{
Plan* aplan = (Plan*)lfirst(lc1);
PlannerInfo* aroot = (PlannerInfo*)lfirst(lc2);
if (false == has_pgfdw_rel(aroot))
continue;
current_ctx = CurrentMemoryContext;
PG_TRY();
{
(void)walk_plan_for_coop_analyze(aplan, aroot);
}
PG_CATCH();
{
/* reset memory contex and clear error stack. */
MemoryContextSwitchTo(current_ctx);
/* Save error info */
ErrorData* edata = CopyErrorData();
StringInfo warning = makeStringInfo();
appendStringInfo(warning, "Failed to deparse agg node. cause: %s", edata->message);
glob->hint_warning = lappend(glob->hint_warning, makeString(warning->data));
FlushErrorState();
FreeErrorData(edata);
}
PG_END_TRY();
}
return plan;
}
#endif
/*
* @Description: find the remote query node in plan tree and modify nodelst in exec_nodes
* just for cooperation analysis on source data cluster,
* reassign dn list scaned of RemoteQuery node for the request from client cluster.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
*/
static void find_remotequery_in_normal_plan(Plan* plan, PlannerInfo* root)
{
if (IsA(plan, RemoteQuery) || IsA(plan, VecRemoteQuery)) {
/*
* currently, sql from client cluster just includes ONE relation.
* so, root->simple_rte_array[1] is always valid.
*/
RangeTblEntry* rte = root->simple_rte_array[1];
RemoteQuery* rq = (RemoteQuery*)plan;
rq->exec_nodes->nodeList = reassign_nodelist(rte, rq->exec_nodes->nodeList);
return;
}
if (plan->lefttree)
find_remotequery(plan->lefttree, root);
if (plan->righttree)
find_remotequery(plan->righttree, root);
}
/*
* @Description: find the remote query node in plan tree and modify nodelst in exec_nodes
* just for cooperation analysis on source data cluster,
* reassign dn list scaned of RemoteQuery node for the request from client cluster.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
*/
static void find_remotequery_in_set_plan(Plan* plan, PlannerInfo* root)
{
RelOptInfo* rel = NULL;
List* plans = NIL;
if (IsA(plan, Append)) {
plans = ((Append*)plan)->appendplans;
}
if (IsA(plan, ModifyTable)) {
plans = ((ModifyTable*)plan)->plans;
}
if (IsA(plan, SubqueryScan)) {
if (((SubqueryScan*)plan)->subplan == NULL)
return;
plans = lappend(plans, ((SubqueryScan*)plan)->subplan);
/* Need to look up the subquery's RelOptInfo, since we need its subroot */
rel = find_base_rel(root, ((SubqueryScan*)plan)->scan.scanrelid);
/* special case for inlist join */
if (rel->subroot == NULL) {
/* rel->alternatives's length always be 1. */
AssertEreport(rel->alternatives, MOD_OPT, "invalid alternative reltion.");
AssertEreport(list_length(rel->alternatives) == 1, MOD_OPT, "invalid length of alternative relation list.");
RelOptInfo* subquery_rel = (RelOptInfo*)linitial(rel->alternatives);
AssertEreport(subquery_rel->subroot, MOD_OPT, "invalid PlannerInfo object for subquery relation.");
root = subquery_rel->subroot;
} else
root = rel->subroot;
}
if (IsA(plan, MergeAppend)) {
plans = ((MergeAppend*)plan)->mergeplans;
}
if (plans == NIL) {
return;
}
ListCell* lc = NULL;
foreach (lc, plans) {
Plan* child = (Plan*)lfirst(lc);
find_remotequery(child, root);
}
}
/*
* @Description: find the remote query node in plan tree and modify nodelst in exec_nodes
* just for cooperation analysis on source data cluster,
* reassign dn list scaned of RemoteQuery node for the request from client cluster.
*
* @param[IN] plan : current plan node
* @param[IN] root : PlannerInfo*
*/
static void find_remotequery(Plan* plan, PlannerInfo* root)
{
check_stack_depth();
/* gc_fdw_max_idx means that request is from client cluster */
if (IS_PGXC_COORDINATOR && u_sess->pgxc_cxt.is_gc_fdw && u_sess->pgxc_cxt.gc_fdw_max_idx > 0) {
if (IsA(plan, Append) || IsA(plan, ModifyTable) || IsA(plan, SubqueryScan) || IsA(plan, MergeAppend)) {
find_remotequery_in_set_plan(plan, root);
} else {
find_remotequery_in_normal_plan(plan, root);
}
}
}
/*
* - contains recursive subplan
*/
bool ContainRecursiveUnionSubplan(PlannedStmt* pstmt)
{
bool with_recursive = false;
ListCell* lc = NULL;
List* subplans = pstmt->subplans;
Assert(pstmt != NULL);
if (!u_sess->attr.attr_sql.enable_stream_recursive || subplans == NIL) {
return false;
}
foreach (lc, subplans) {
Plan* plan = (Plan*)lfirst(lc);
if (plan && IsA(plan, RecursiveUnion)) {
with_recursive = true;
break;
}
}
return with_recursive;
}
/*
* @Description: get all remotequerys and fill in g_RemoteQueryList.
* @in stmt - PlannedStmt information
* @in queryString - executed statement or NULL when simple query
* @out - void
*/
void GetRemoteQuery(PlannedStmt* stmt, const char* queryString)
{
FindRQContext context;
context.rqs = NIL;
context.include_all_plans = false;
context.has_modify_table = false;
context.under_mergeinto = false;
context.elevel = 0;
ListCell* lc = NULL;
if (stmt == NULL)
return;
GetRemoteQueryWalker(stmt->planTree, &context, queryString);
foreach (lc, stmt->subplans) {
Plan* plan = (Plan*)lfirst(lc);
GetRemoteQueryWalker(plan, &context, queryString);
}
u_sess->exec_cxt.remotequery_list = context.rqs;
context.rqs = NIL;
}
/*
* @Description: remotequery walker for find remotequery plannode.
* @in plan - Plan information
* @in context - FindRQContext
* @in queryString - execute sql statement in pbe.
* @out - void
*/
void GetRemoteQueryWalker(Plan* plan, void* context, const char* queryString)
{
if (plan == NULL)
return;
if (IsA(plan, RemoteQuery) || IsA(plan, VecRemoteQuery)) {
RemoteQuery* rq = (RemoteQuery*)plan;
if (rq->position == PLAN_ROUTER || rq->position == SCAN_GATHER)
return;
/* is_simple is ture means do not push down query */
if (rq->is_simple)
return;
/* remember queryString in sql_statement */
if (queryString != NULL) {
rq->execute_statement = (char*)queryString;
} else {
rq->execute_statement = rq->sql_statement;
}
FindRQContext* ctx = (FindRQContext*)context;
ctx->rqs = lappend(ctx->rqs, plan);
return;
}
PlanTreeWalker(plan, GetRemoteQueryWalker, context, queryString);
}
static void
collect_exec_nodes(ExecNodes *exec_nodes, void *context)
{
Assert(context != NULL);
if (exec_nodes == NULL) {
return;
}
FindNodesContext *ctx = (FindNodesContext *)context;
ctx->nodeList = list_concat_unique_int(ctx->nodeList, exec_nodes->nodeList);
}
/* Process the top node. */
static void gtm_process_top_node(Plan *plan, void *context)
{
FindNodesContext *ctx = (FindNodesContext*)context;
if (IsA(plan, RemoteQuery) || IsA(plan, VecRemoteQuery)) {
RemoteQuery *rq = (RemoteQuery*)plan;
/* Refer to ng_get_dest_execnodes */
if (rq->exec_nodes != NULL) {
collect_exec_nodes(rq->exec_nodes, context);
} else {
collect_exec_nodes(plan->exec_nodes, context);
}
if (rq->position != PLAN_ROUTER && rq->position != SCAN_GATHER) {
ctx->remote_query_count++;
}
} else if (IsA(plan, ModifyTable) || IsA(plan, VecModifyTable)) {
FindNodesContext *ctx = (FindNodesContext*)context;
ctx->has_modify_table = true;
} else if (IsA(plan, Stream) || IsA(plan, VecStream)) {
Stream *st= (Stream*)plan;
collect_exec_nodes(st->scan.plan.exec_nodes, context);
collect_exec_nodes(st->consumer_nodes, context);
}
}
/*
* ONLY used in GTM-Free mode.
*
* This function will iterate the plan tree and find:
* 1) How many RemoteQuery/VecRemoteQuery in this plan
* 2) The DNs on which the query will execute.
* 3) Will this query write to the database?
*/
static void
gtm_free_rqs_nodes_walker(Plan *plan, void *context)
{
ListCell *lc = NULL;
List *children_nodes = NIL;
Assert(context != NULL);
if (plan == NULL) {
return;
}
gtm_process_top_node(plan, context);
/* Find the children node and call gtm_free_rqs_nodes_walker recursively. */
if (IsA(plan, Append) || IsA(plan, VecAppend)) {
children_nodes = ((Append*)plan)->appendplans;
} else if (IsA(plan, ModifyTable) || IsA(plan, VecModifyTable)) {
/* list_concat will destory the plantree, so use lappend */
foreach(lc, ((ModifyTable*)plan)->plans) {
children_nodes = lappend(children_nodes, lfirst(lc));
}
foreach(lc, ((ModifyTable*)plan)->remote_plans) {
children_nodes = lappend(children_nodes, lfirst(lc));
}
foreach(lc, ((ModifyTable*)plan)->remote_insert_plans) {
children_nodes = lappend(children_nodes, lfirst(lc));
}
foreach(lc, ((ModifyTable*)plan)->remote_update_plans) {
children_nodes = lappend(children_nodes, lfirst(lc));
}
foreach(lc, ((ModifyTable*)plan)->remote_delete_plans) {
children_nodes = lappend(children_nodes, lfirst(lc));
}
} else if (IsA(plan, MergeAppend) || IsA(plan, VecMergeAppend)) {
children_nodes = ((MergeAppend*)plan)->mergeplans;
} else if (IsA(plan, SubqueryScan) || IsA(plan, VecSubqueryScan)) {
children_nodes = lappend(children_nodes, ((SubqueryScan*)plan)->subplan);
} else {
if (plan->lefttree) {
children_nodes = lappend(children_nodes, plan->lefttree);
}
if (plan->righttree) {
children_nodes = lappend(children_nodes, plan->righttree);
}
}
foreach(lc, children_nodes) {
Plan *child = (Plan*)lfirst(lc);
gtm_free_rqs_nodes_walker(child, context);
}
}
/*
* @Description: plan tree walker for find remotequery plannode.
* @in plan - Plan information
* @in walker - function pointer
* @in context - FindRQContext
* @in queryString - execute sql statement in pbe.
* @out - void
*/
void PlanTreeWalker(Plan* plan, void (*walker)(Plan*, void*, const char*), void* context, const char* queryString)
{
ListCell* lc = NULL;
List* plans = NIL;
if (plan == NULL)
return;
if (IsA(plan, Append) || IsA(plan, VecAppend)) {
plans = ((Append*)plan)->appendplans;
} else if (IsA(plan, ModifyTable) || IsA(plan, VecModifyTable)) {
plans = ((ModifyTable*)plan)->plans;
} else if (IsA(plan, MergeAppend) || IsA(plan, VecMergeAppend)) {
plans = ((MergeAppend*)plan)->mergeplans;
} else if (IsA(plan, SubqueryScan) || IsA(plan, VecSubqueryScan)) {
plans = lappend(plans, ((SubqueryScan*)plan)->subplan);
} else {
if (plan->lefttree)
plans = lappend(plans, plan->lefttree);
if (plan->righttree)
plans = lappend(plans, plan->righttree);
}
if (plans == NIL)
return;
foreach (lc, plans) {
Plan* child = (Plan*)lfirst(lc);
walker(child, context, queryString);
}
}
/*
* Fill FindNodesContext, see FindNodesContext for more information.
*
* ONLY used in GTM-Free mode.
*/
static void
collect_query_info(PlannedStmt *stmt, FindNodesContext *context)
{
Assert(stmt != NULL);
Assert(context != NULL);
context->remote_query_count = 0;
context->has_modify_table = false;
context->nodeList = NIL;
gtm_free_rqs_nodes_walker(stmt->planTree, context);
ListCell* lc = NULL;
foreach(lc, stmt->subplans) {
Plan *plan = (Plan*)lfirst(lc);
gtm_free_rqs_nodes_walker(plan, context);
}
}
/*
* Report error if 1) the query need to split into multiple queries and 2) the query need to write to the database.
* see FindNodesContext for more information.
*
* ONLY used in GTM-Free mode.
*/
static void
block_write_when_split_queries(PlannedStmt *stmt, FindNodesContext *context, int elevel)
{
if (context->remote_query_count > 1 && context->has_modify_table) {
ereport(elevel,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("INSERT/UPDATE/DELETE/MERGE contains multiple remote queries under GTM-free mode"),
errhint("modify your SQL to generate light-proxy or fast-query-shipping plan")));
}
}
/*
* ONLY used in GTM-Free mode.
*
* If the query needs more than one DN to be involved in:
*
* When multinode hint is used:
* allow the query to be executed, no warning/error
* When multinode hint is not used:
* if application_type is:
* not_perfect_sharding_type(Default): allow the query to be executed, no warning/error
* perfect_sharding_type: report ERROR
*/
static void
block_query_need_multi_nodes(PlannedStmt *stmt, FindNodesContext *context, int elevel)
{
if (u_sess->attr.attr_sql.application_type != PERFECT_SHARDING_TYPE) {
return;
}
if (stmt->multi_node_hint) {
return;
}
if (list_length(context->nodeList) > 1) {
const char *sql = t_thrd.postgres_cxt.debug_query_string;
if (sql == NULL) {
sql = "No sql in this query, please check other warnings and ignore this one.";
}
ereport(elevel,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("Your SQL needs more than one datanode to be involved in. SQL Statement:\n%s", sql),
errhint("(1) use hint /*+ multinode */\n"
" (2) make your SQL be perfect sharding."
"\nChoose either of the above options is OK.")));
}
}
/*
* ONLY used in GTM-Free mode.
*
* In GTM-Free mode, we should apply some restrictions on the query.
* check_gtm_free_plan is the ENTRY to check if the query satisfies these restrictioins.
*/
void check_gtm_free_plan(PlannedStmt *stmt, int elevel)
{
if (stmt == NULL)
return;
/* only check under gtm free */
if (!g_instance.attr.attr_storage.enable_gtm_free)
return;
if (u_sess->attr.attr_sql.enable_cluster_resize)
return;
/* no need to check on DN for now */
if (IS_PGXC_DATANODE || IS_SINGLE_NODE)
return;
FindNodesContext context;
collect_query_info(stmt, &context);
block_write_when_split_queries(stmt, &context, elevel);
block_query_need_multi_nodes(stmt, &context, elevel);
}
/*
* @Description: plan walker to find MERGE INTO -> BROADCAST pattern.
* @in node - Plan information
* @in context - FindRQContext
* @in queryString - execute sql statement in pbe.
* @out - void
*/
static void check_plan_mergeinto_replicate_walker(Plan* node, void* context, const char *queryString)
{
Assert(context != NULL);
if (node == NULL) {
return;
}
FindRQContext *ctx = (FindRQContext*)context;
if (ctx->under_mergeinto) {
if (IsA(node, Stream) || IsA(node, VecStream)) {
ereport(ctx->elevel,
(errmodule(MOD_OPT),
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("MERGE INTO on replicated table does not yet support followed stream.")));
}
}
ctx->under_mergeinto = ctx->under_mergeinto ||
(IsA(node, ModifyTable) && ((ModifyTable*)node)->operation == CMD_MERGE) ||
(IsA(node, VecModifyTable) && ((VecModifyTable*)node)->operation == CMD_MERGE);
PlanTreeWalker(node, check_plan_mergeinto_replicate_walker, context, NULL);
}
/**
* @Description: plan tree walker for find remotequery plannode.
* Here we walk through the entire plantree for MERGE INTO statement with
* replcate target relation to find the pattern:
* MODIFYTABLE (MERGE INTO)
* ...
* └─ STREAM
* where SREAM appears under MERGE INTO if merge target is replicated.
* This is because when join results are streamed to other datanode, the
* junkfilter ctid will become a mess. This may be an overkill since planner
* only allows all replicate relations in source subquery. However, we
* double-check the plan here for extra safety.
* This should be later fixed by replacing ctid by PK or attributes which
* yields to unique and not null constraint.
* @in stmt - Plan information
* @out - void
*/
void check_plan_mergeinto_replicate(PlannedStmt* stmt, int elevel)
{
if (stmt == NULL || stmt->commandType != CMD_MERGE) {
return;
}
/* No need to check if none of the targets are replicate */
bool no_replicate = true;
ListCell* lc = NULL;
foreach(lc, stmt->resultRelations) {
Index rti = lfirst_int(lc);
RangeTblEntry* target = rt_fetch(rti, stmt->rtable);
if (GetLocatorType(target->relid) == LOCATOR_TYPE_REPLICATED) {
no_replicate = false;
break;
}
}
if (no_replicate) {
return;
}
FindRQContext context;
context.rqs = NIL;
context.include_all_plans = false;
context.has_modify_table = false;
context.under_mergeinto = false;
context.elevel = elevel;
PlanTreeWalker((Plan*)stmt->planTree, check_plan_mergeinto_replicate_walker, (void*) &context, NULL);
}
/*
* @Description:
* For mergeinto commands with replicated target table,
* we only allow source relation/subquery to be fully
* replicated.
*
* @param[IN] parse: parsed query tree.
*
* @return void
*/
void check_entry_mergeinto_replicate(Query* parse)
{
RangeTblEntry* target = rt_fetch(parse->mergeTarget_relation, parse->rtable);
Assert(target != NULL);
/* only deal with replicated target */
if (GetLocatorType(target->relid) != LOCATOR_TYPE_REPLICATED) {
return;
}
ListCell* lc = NULL;
foreach(lc, parse->rtable) {
RangeTblEntry* rte = (RangeTblEntry*)lfirst(lc);
/* only deal with plain relations */
if (rte->rtekind != RTE_RELATION)
continue;
if (GetLocatorType(rte->relid) != LOCATOR_TYPE_REPLICATED) {
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("MERGE INTO on replicated table does not yet support using distributed tables.")));
}
}
}
static bool check_sort_for_upsert(PlannerInfo* root)
{
PlannerInfo* plan_info = root;
Query* parse = plan_info->parse;
/* ORDER BY LIMIT can guarentee the ordering */
if (parse->limitCount || parse->limitOffset)
return false;
while (plan_info->parent_root != NULL) {
plan_info = plan_info->parent_root;
parse = plan_info->parse;
if (parse != NULL && parse->upsertClause != NULL) {
return true;
}
}
return false;
}
List* get_plan_list(Plan* plan)
{
List* plan_list = NIL;
switch (nodeTag(plan)) {
case T_ModifyTable:
case T_VecModifyTable: {
ModifyTable* mt = (ModifyTable*)plan;
Assert(mt->plans != NULL);
plan_list = mt->plans;
break;
}
case T_Append:
case T_VecAppend: {
Append* append = (Append*)plan;
Assert(append->appendplans != NULL);
plan_list = append->appendplans;
break;
}
case T_MergeAppend:
case T_VecMergeAppend: {
MergeAppend* append = (MergeAppend*)plan;
Assert(append->mergeplans != NULL);
plan_list = append->mergeplans;
break;
}
case T_BitmapAnd:
case T_BitmapOr:
case T_CStoreIndexAnd:
case T_CStoreIndexOr: {
BitmapAnd* ba = (BitmapAnd*)plan;
Assert(ba->bitmapplans != NULL);
plan_list = ba->bitmapplans;
break;
}
default:
/* do nothing */
break;
}
return plan_list;
}
/*
* @Description: find ForeignScan node in left-hand tree.
*
* @param[IN] plan : the root of the sub plan tree
* @return: Plan*: ForeignScan node found.
*/
Plan* get_foreign_scan(Plan* plan)
{
Assert(plan);
ereport(DEBUG5, (errmodule(MOD_ACCELERATE), errmsg("in %s", __FUNCTION__)));
bool found = false;
while (plan != NULL) {
if (T_ForeignScan == nodeTag(plan) || T_VecForeignScan == nodeTag(plan)) {
found = true;
break;
}
plan = plan->lefttree;
}
if (!found) {
ereport(ERROR,
(errcode(ERRCODE_NO_DATA_FOUND), errmodule(MOD_ACCELERATE), errmsg("Fail to find ForeignScan node!")));
}
return plan;
}