Files
openGauss-server/src/gausskernel/optimizer/path/pgxcpath_single.cpp
wuchenglin 58e6120553 !2998 SRF执行优化
* fix srf factcheck case
* SRF执行优化
2023-03-06 09:29:04 +00:00

232 lines
8.8 KiB
C++

/* ---------------------------------------------------------------------------------------
*
* pgxcpath_single.cpp
* Routines to find possible remote query paths for various relations and
* their costs.
*
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
* Portions Copyright (c) 2012 Postgres-XC Development Group
*
* IDENTIFICATION
* src/gausskernel/optimizer/path/pgxcpath_single.cpp
*
* ---------------------------------------------------------------------------------------
*/
#include "postgres.h"
#include "commands/tablecmds.h"
#include "nodes/makefuncs.h"
#include "optimizer/cost.h"
#include "optimizer/paths.h"
#include "optimizer/pathnode.h"
#include "optimizer/pgxcship.h"
#include "optimizer/restrictinfo.h"
#include "parser/parsetree.h"
#include "pgxc/pgxc.h"
#include "optimizer/pgxcplan.h"
#pragma GCC diagnostic ignored "-Wunused-function"
static RemoteQueryPath* pgxc_find_remotequery_path(RelOptInfo* rel);
static RemoteQueryPath* create_remotequery_path(PlannerInfo* root, RelOptInfo* rel, ExecNodes* exec_nodes,
RemoteQueryPath* leftpath, RemoteQueryPath* rightpath, JoinType jointype, List* join_restrictlist);
/*
* create_remotequery_path
* Creates a path for given RelOptInfo (for base rel or a join rel) so that
* the results corresponding to this RelOptInfo are obtained by querying
* datanode/s. When RelOptInfo represents a JOIN, we leftpath and rightpath
* represents the RemoteQuery paths for left and right relations resp,
* jointype gives the type of JOIN and join_restrictlist gives the
* restrictinfo list for the JOIN. For a base relation, these should be
* NULL.
* ExecNodes is the set of datanodes to which the query should be sent to.
* This function also marks the path with shippability of the quals.
* If any of the relations involved in this path is a temporary relation,
* record that fact.
*/
static RemoteQueryPath* create_remotequery_path(PlannerInfo* root, RelOptInfo* rel, ExecNodes* exec_nodes,
RemoteQueryPath* leftpath, RemoteQueryPath* rightpath, JoinType jointype, List* join_restrictlist)
{
RemoteQueryPath* rqpath = makeNode(RemoteQueryPath);
bool unshippable_quals = false;
if (rel->reloptkind == RELOPT_JOINREL && (!leftpath || !rightpath))
elog(ERROR, "a join rel requires both the left path and right path");
rqpath->path.pathtype = T_RemoteQuery;
rqpath->path.parent = rel;
rqpath->path.pathtarget = rel->reltarget;
/* PGXC_TODO: do we want to care about it */
rqpath->path.param_info = NULL;
rqpath->path.pathkeys = NIL; /* result is always unordered */
rqpath->rqpath_en = exec_nodes;
rqpath->leftpath = leftpath;
rqpath->rightpath = rightpath;
rqpath->jointype = jointype;
rqpath->join_restrictlist = join_restrictlist;
switch (rel->reloptkind) {
case RELOPT_BASEREL:
case RELOPT_OTHER_MEMBER_REL: {
RangeTblEntry* rte = rt_fetch(rel->relid, root->parse->rtable);
if (rte->rtekind != RTE_RELATION)
elog(ERROR, "can not create remote path for ranges of type %d", rte->rtekind);
rqpath->rqhas_temp_rel = IsTempTable(rte->relid);
unshippable_quals =
!pgxc_is_expr_shippable((Expr*)extract_actual_clauses(rel->baserestrictinfo, false), NULL);
} break;
case RELOPT_JOINREL: {
rqpath->rqhas_temp_rel = leftpath->rqhas_temp_rel || rightpath->rqhas_temp_rel;
unshippable_quals = !pgxc_is_expr_shippable((Expr*)extract_actual_clauses(join_restrictlist, false), NULL);
} break;
default:
elog(ERROR, "can not create remote path for relation of type %d", rel->reloptkind);
}
rqpath->rqhas_unshippable_qual = unshippable_quals;
rqpath->rqhas_unshippable_tlist = !pgxc_is_expr_shippable((Expr*)rel->reltarget->exprs, NULL);
/* set cost properly */
cost_remotequery(rqpath, root, rel);
return rqpath;
}
/*
* create_plainrel_rqpath
* Create a RemoteQuery path for a plain relation residing on datanode/s and add
* it to the pathlist in corresponding RelOptInfo. The function returns true, if
* it creates a remote query path and adds it, otherwise it returns false.
* The caller can decide whether to add the scan paths depending upon the return
* value.
*/
extern bool create_plainrel_rqpath(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte)
{
#ifndef ENABLE_MULTIPLE_NODES
return false;
#else
List* quals = NIL;
ExecNodes* exec_nodes = NULL;
/*
* If we are on the Coordinator, we always want to use
* the remote query path unless relation is local to coordinator or the
* query is to entirely executed on coordinator.
*/
if (!IS_PGXC_COORDINATOR || IsConnFromCoord() || root->parse->is_local)
return false;
quals = extract_actual_clauses(rel->baserestrictinfo, false);
exec_nodes = GetRelationNodesByQuals(rte->relid, rel->relid, (Node*)quals, RELATION_ACCESS_READ);
if (!exec_nodes)
return false;
if (IsExecNodesDistributedByValue(exec_nodes)) {
Var* dist_var = pgxc_get_dist_var(rel->relid, rte, rel->reltarget->exprs);
exec_nodes->en_dist_vars = list_make1(dist_var);
}
/* We don't have subpaths for a plain base relation */
add_path(rel, (Path*)create_remotequery_path(root, rel, exec_nodes, NULL, NULL, 0, NULL));
return true;
#endif
}
/*
* pgxc_find_remotequery_path
* Search the path list for the rel for existence of a RemoteQuery path, return
* if one found, NULL otherwise. There should be only one RemoteQuery path for
* each rel, but we don't check for this.
*/
static RemoteQueryPath* pgxc_find_remotequery_path(RelOptInfo* rel)
{
ListCell* cell = NULL;
foreach (cell, rel->pathlist) {
Path* path = (Path*)lfirst(cell);
if (IsA(path, RemoteQueryPath))
return (RemoteQueryPath*)path;
}
return NULL;
}
/*
* pgxc_ship_remotejoin
* If there are RemoteQuery paths for the rels being joined, check if the join
* is shippable to the datanodes, and if so, create a remotequery path for this
* JOIN.
*/
extern void create_joinrel_rqpath(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel,
List* restrictlist, JoinType jointype, SpecialJoinInfo* sjinfo)
{
#ifndef ENABLE_MULTIPLE_NODES
return;
#else
RemoteQueryPath* innerpath = NULL;
RemoteQueryPath* outerpath = NULL;
ExecNodes* inner_en = NULL;
ExecNodes* outer_en = NULL;
ExecNodes* join_en = NULL;
List* join_quals = NIL;
List* other_quals = NIL;
/* If GUC does not allow remote join optimization, so be it */
if (!enable_remotejoin)
return;
innerpath = pgxc_find_remotequery_path(innerrel);
outerpath = pgxc_find_remotequery_path(outerrel);
/*
* If one of the relation does not have RemoteQuery path, the join can not
* be shipped to the datanodes.
* If one of the relation has an unshippable qual, it needs to be evaluated
* before joining the two relations. Hence this JOIN is not shippable.
* PGXC_TODO: In case of INNER join above condition can be relaxed by
* attaching the unshippable qual to the join itself, and thus shipping join
* but evaluating the qual on join result. But we don't attempt it for now
*/
if (!innerpath || !outerpath || innerpath->rqhas_unshippable_qual || outerpath->rqhas_unshippable_qual)
return;
inner_en = innerpath->rqpath_en;
outer_en = outerpath->rqpath_en;
if (!inner_en || !outer_en)
elog(ERROR, "No node list provided for remote query path");
/*
* Collect quals from restrictions so as to check the shippability of a JOIN
* between distributed relations.
*/
extract_actual_join_clauses(restrictlist, &join_quals, &other_quals);
/*
* If the joining qual is not shippable and it's an OUTER JOIN, we can not
* ship the JOIN, since that would impact JOIN result.
*/
if (jointype != JOIN_INNER && !pgxc_is_expr_shippable((Expr*)join_quals, NULL))
return;
/*
* For INNER JOIN there is no distinction between JOIN and non-JOIN clauses,
* so let the JOIN reduction algorithm take all of them into consideration
* to decide whether a JOIN is reducible or not based on quals (if
* required).
*/
if (jointype == JOIN_INNER)
join_quals = list_concat(join_quals, other_quals);
/*
* If the nodelists on both the sides of JOIN can be merged, the JOIN is
* shippable.
*/
join_en = pgxc_is_join_shippable(inner_en,
outer_en,
innerpath->rqhas_unshippable_tlist,
outerpath->rqhas_unshippable_tlist,
jointype,
(Node*)join_quals);
if (join_en)
add_path(joinrel,
(Path*)create_remotequery_path(root, joinrel, join_en, outerpath, innerpath, jointype, restrictlist));
return;
#endif
}