Support parallel joins, and make related improvements.

The core innovation of this patch is the introduction of the concept
of a partial path; that is, a path which if executed in parallel will
generate a subset of the output rows in each process.  Gathering a
partial path produces an ordinary (complete) path.  This allows us to
generate paths for parallel joins by joining a partial path for one
side (which at the baserel level is currently always a Partial Seq
Scan) to an ordinary path on the other side.  This is subject to
various restrictions at present, especially that this strategy seems
unlikely to be sensible for merge joins, so only nested loops and
hash joins paths are generated.

This also allows an Append node to be pushed below a Gather node in
the case of a partitioned table.

Testing revealed that early versions of this patch made poor decisions
in some cases, which turned out to be caused by the fact that the
original cost model for Parallel Seq Scan wasn't very good.  So this
patch tries to make some modest improvements in that area.

There is much more to be done in the area of generating good parallel
plans in all cases, but this seems like a useful step forward.

Patch by me, reviewed by Dilip Kumar and Amit Kapila.
This commit is contained in:
Robert Haas
2016-01-20 14:29:22 -05:00
parent a7de3dc5c3
commit 45be99f8cd
15 changed files with 875 additions and 119 deletions

View File

@ -458,6 +458,7 @@ typedef struct RelOptInfo
List *reltargetlist; /* Vars to be output by scan of relation */
List *pathlist; /* Path structures */
List *ppilist; /* ParamPathInfos used in pathlist */
List *partial_pathlist; /* partial Paths */
struct Path *cheapest_startup_path;
struct Path *cheapest_total_path;
struct Path *cheapest_unique_path;
@ -761,6 +762,8 @@ typedef struct Path
RelOptInfo *parent; /* the relation this path can build */
ParamPathInfo *param_info; /* parameterization info, or NULL if none */
bool parallel_aware; /* engage parallel-aware logic? */
bool parallel_safe; /* OK to use as part of parallel plan? */
int parallel_degree; /* desired parallel degree; 0 = not parallel */
/* estimated size/costs for path (see costsize.c for more info) */
double rows; /* estimated number of result tuples */
@ -1064,7 +1067,6 @@ typedef struct GatherPath
{
Path path;
Path *subpath; /* path for each worker */
int num_workers; /* number of workers sought to help */
bool single_copy; /* path must not be executed >1x */
} GatherPath;

View File

@ -72,7 +72,7 @@ extern double clamp_row_est(double nrows);
extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
double index_pages, PlannerInfo *root);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info, int nworkers);
ParamPathInfo *param_info);
extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info);
extern void cost_index(IndexPath *path, PlannerInfo *root,

View File

@ -29,9 +29,12 @@ extern void add_path(RelOptInfo *parent_rel, Path *new_path);
extern bool add_path_precheck(RelOptInfo *parent_rel,
Cost startup_cost, Cost total_cost,
List *pathkeys, Relids required_outer);
extern void add_partial_path(RelOptInfo *parent_rel, Path *new_path);
extern bool add_partial_path_precheck(RelOptInfo *parent_rel,
Cost total_cost, List *pathkeys);
extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer, int nworkers);
Relids required_outer, int parallel_degree);
extern Path *create_samplescan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer);
extern IndexPath *create_index_path(PlannerInfo *root,
@ -59,19 +62,18 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
List *tidquals, Relids required_outer);
extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths,
Relids required_outer);
Relids required_outer, int parallel_degree);
extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
List *pathkeys,
Relids required_outer);
extern ResultPath *create_result_path(List *quals);
extern ResultPath *create_result_path(RelOptInfo *rel, List *quals);
extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath, SpecialJoinInfo *sjinfo);
extern GatherPath *create_gather_path(PlannerInfo *root,
RelOptInfo *rel, Path *subpath, Relids required_outer,
int nworkers);
RelOptInfo *rel, Path *subpath, Relids required_outer);
extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
List *pathkeys, Relids required_outer);
extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,

View File

@ -50,6 +50,8 @@ extern RelOptInfo *make_one_rel(PlannerInfo *root, List *joinlist);
extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
List *initial_rels);
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
#endif