mirror of
https://git.postgresql.org/git/postgresql.git
synced 2026-02-12 09:27:04 +08:00
Add TID Range Scans to support efficient scanning ranges of TIDs
This adds a new executor node named TID Range Scan. The query planner will generate paths for TID Range scans when quals are discovered on base relations which search for ranges on the table's ctid column. These ranges may be open at either end. For example, WHERE ctid >= '(10,0)'; will return all tuples on page 10 and over. To support this, two new optional callback functions have been added to table AM. scan_set_tidrange is used to set the scan range to just the given range of TIDs. scan_getnextslot_tidrange fetches the next tuple in the given range. For AMs were scanning ranges of TIDs would not make sense, these functions can be set to NULL in the TableAmRoutine. The query planner won't generate TID Range Scan Paths in that case. Author: Edmund Horner, David Rowley Reviewed-by: David Rowley, Tomas Vondra, Tom Lane, Andres Freund, Zhihong Yu Discussion: https://postgr.es/m/CAMyN-kB-nFTkF=VA_JPwFNo08S0d-Yk0F741S2B7LDmYAi8eyA@mail.gmail.com
This commit is contained in:
@ -121,7 +121,11 @@ extern void heap_endscan(TableScanDesc scan);
|
||||
extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction);
|
||||
extern bool heap_getnextslot(TableScanDesc sscan,
|
||||
ScanDirection direction, struct TupleTableSlot *slot);
|
||||
|
||||
extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
|
||||
ItemPointer maxtid);
|
||||
extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
|
||||
ScanDirection direction,
|
||||
TupleTableSlot *slot);
|
||||
extern bool heap_fetch(Relation relation, Snapshot snapshot,
|
||||
HeapTuple tuple, Buffer *userbuf);
|
||||
extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
|
||||
|
||||
@ -36,6 +36,10 @@ typedef struct TableScanDescData
|
||||
int rs_nkeys; /* number of scan keys */
|
||||
struct ScanKeyData *rs_key; /* array of scan key descriptors */
|
||||
|
||||
/* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */
|
||||
ItemPointerData rs_mintid;
|
||||
ItemPointerData rs_maxtid;
|
||||
|
||||
/*
|
||||
* Information about type and behaviour of the scan, a bitmask of members
|
||||
* of the ScanOptions enum (see tableam.h).
|
||||
|
||||
@ -49,18 +49,19 @@ typedef enum ScanOptions
|
||||
SO_TYPE_BITMAPSCAN = 1 << 1,
|
||||
SO_TYPE_SAMPLESCAN = 1 << 2,
|
||||
SO_TYPE_TIDSCAN = 1 << 3,
|
||||
SO_TYPE_ANALYZE = 1 << 4,
|
||||
SO_TYPE_TIDRANGESCAN = 1 << 4,
|
||||
SO_TYPE_ANALYZE = 1 << 5,
|
||||
|
||||
/* several of SO_ALLOW_* may be specified */
|
||||
/* allow or disallow use of access strategy */
|
||||
SO_ALLOW_STRAT = 1 << 5,
|
||||
SO_ALLOW_STRAT = 1 << 6,
|
||||
/* report location to syncscan logic? */
|
||||
SO_ALLOW_SYNC = 1 << 6,
|
||||
SO_ALLOW_SYNC = 1 << 7,
|
||||
/* verify visibility page-at-a-time? */
|
||||
SO_ALLOW_PAGEMODE = 1 << 7,
|
||||
SO_ALLOW_PAGEMODE = 1 << 8,
|
||||
|
||||
/* unregister snapshot at scan end? */
|
||||
SO_TEMP_SNAPSHOT = 1 << 8
|
||||
SO_TEMP_SNAPSHOT = 1 << 9
|
||||
} ScanOptions;
|
||||
|
||||
/*
|
||||
@ -325,6 +326,34 @@ typedef struct TableAmRoutine
|
||||
ScanDirection direction,
|
||||
TupleTableSlot *slot);
|
||||
|
||||
/*-----------
|
||||
* Optional functions to provide scanning for ranges of ItemPointers.
|
||||
* Implementations must either provide both of these functions, or neither
|
||||
* of them.
|
||||
*
|
||||
* Implementations of scan_set_tidrange must themselves handle
|
||||
* ItemPointers of any value. i.e, they must handle each of the following:
|
||||
*
|
||||
* 1) mintid or maxtid is beyond the end of the table; and
|
||||
* 2) mintid is above maxtid; and
|
||||
* 3) item offset for mintid or maxtid is beyond the maximum offset
|
||||
* allowed by the AM.
|
||||
*
|
||||
* Implementations can assume that scan_set_tidrange is always called
|
||||
* before can_getnextslot_tidrange or after scan_rescan and before any
|
||||
* further calls to scan_getnextslot_tidrange.
|
||||
*/
|
||||
void (*scan_set_tidrange) (TableScanDesc scan,
|
||||
ItemPointer mintid,
|
||||
ItemPointer maxtid);
|
||||
|
||||
/*
|
||||
* Return next tuple from `scan` that's in the range of TIDs defined by
|
||||
* scan_set_tidrange.
|
||||
*/
|
||||
bool (*scan_getnextslot_tidrange) (TableScanDesc scan,
|
||||
ScanDirection direction,
|
||||
TupleTableSlot *slot);
|
||||
|
||||
/* ------------------------------------------------------------------------
|
||||
* Parallel table scan related functions.
|
||||
@ -1015,6 +1044,64 @@ table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableS
|
||||
return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* TID Range scanning related functions.
|
||||
* ----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* table_beginscan_tidrange is the entry point for setting up a TableScanDesc
|
||||
* for a TID range scan.
|
||||
*/
|
||||
static inline TableScanDesc
|
||||
table_beginscan_tidrange(Relation rel, Snapshot snapshot,
|
||||
ItemPointer mintid,
|
||||
ItemPointer maxtid)
|
||||
{
|
||||
TableScanDesc sscan;
|
||||
uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
|
||||
|
||||
sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
|
||||
|
||||
/* Set the range of TIDs to scan */
|
||||
sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
|
||||
|
||||
return sscan;
|
||||
}
|
||||
|
||||
/*
|
||||
* table_rescan_tidrange resets the scan position and sets the minimum and
|
||||
* maximum TID range to scan for a TableScanDesc created by
|
||||
* table_beginscan_tidrange.
|
||||
*/
|
||||
static inline void
|
||||
table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid,
|
||||
ItemPointer maxtid)
|
||||
{
|
||||
/* Ensure table_beginscan_tidrange() was used. */
|
||||
Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
|
||||
|
||||
sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
|
||||
sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch the next tuple from `sscan` for a TID range scan created by
|
||||
* table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
|
||||
* or returns false if no more tuples exist in the range.
|
||||
*/
|
||||
static inline bool
|
||||
table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
|
||||
TupleTableSlot *slot)
|
||||
{
|
||||
/* Ensure table_beginscan_tidrange() was used. */
|
||||
Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
|
||||
|
||||
return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
|
||||
direction,
|
||||
slot);
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Parallel table scan related functions.
|
||||
|
||||
@ -237,15 +237,15 @@
|
||||
oprname => '<', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
|
||||
oprcom => '>(tid,tid)', oprnegate => '>=(tid,tid)', oprcode => 'tidlt',
|
||||
oprrest => 'scalarltsel', oprjoin => 'scalarltjoinsel' },
|
||||
{ oid => '2800', descr => 'greater than',
|
||||
{ oid => '2800', oid_symbol => 'TIDGreaterOperator', descr => 'greater than',
|
||||
oprname => '>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
|
||||
oprcom => '<(tid,tid)', oprnegate => '<=(tid,tid)', oprcode => 'tidgt',
|
||||
oprrest => 'scalargtsel', oprjoin => 'scalargtjoinsel' },
|
||||
{ oid => '2801', descr => 'less than or equal',
|
||||
{ oid => '2801', oid_symbol => 'TIDLessEqOperator', descr => 'less than or equal',
|
||||
oprname => '<=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
|
||||
oprcom => '>=(tid,tid)', oprnegate => '>(tid,tid)', oprcode => 'tidle',
|
||||
oprrest => 'scalarlesel', oprjoin => 'scalarlejoinsel' },
|
||||
{ oid => '2802', descr => 'greater than or equal',
|
||||
{ oid => '2802', oid_symbol => 'TIDGreaterEqOperator', descr => 'greater than or equal',
|
||||
oprname => '>=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
|
||||
oprcom => '<=(tid,tid)', oprnegate => '<(tid,tid)', oprcode => 'tidge',
|
||||
oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' },
|
||||
|
||||
24
src/include/executor/nodeTidrangescan.h
Normal file
24
src/include/executor/nodeTidrangescan.h
Normal file
@ -0,0 +1,24 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* nodeTidrangescan.h
|
||||
*
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/executor/nodeTidrangescan.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef NODETIDRANGESCAN_H
|
||||
#define NODETIDRANGESCAN_H
|
||||
|
||||
#include "nodes/execnodes.h"
|
||||
|
||||
extern TidRangeScanState *ExecInitTidRangeScan(TidRangeScan *node,
|
||||
EState *estate, int eflags);
|
||||
extern void ExecEndTidRangeScan(TidRangeScanState *node);
|
||||
extern void ExecReScanTidRangeScan(TidRangeScanState *node);
|
||||
|
||||
#endif /* NODETIDRANGESCAN_H */
|
||||
@ -1624,6 +1624,24 @@ typedef struct TidScanState
|
||||
HeapTupleData tss_htup;
|
||||
} TidScanState;
|
||||
|
||||
/* ----------------
|
||||
* TidRangeScanState information
|
||||
*
|
||||
* trss_tidexprs list of TidOpExpr structs (see nodeTidrangescan.c)
|
||||
* trss_mintid the lowest TID in the scan range
|
||||
* trss_maxtid the highest TID in the scan range
|
||||
* trss_inScan is a scan currently in progress?
|
||||
* ----------------
|
||||
*/
|
||||
typedef struct TidRangeScanState
|
||||
{
|
||||
ScanState ss; /* its first field is NodeTag */
|
||||
List *trss_tidexprs;
|
||||
ItemPointerData trss_mintid;
|
||||
ItemPointerData trss_maxtid;
|
||||
bool trss_inScan;
|
||||
} TidRangeScanState;
|
||||
|
||||
/* ----------------
|
||||
* SubqueryScanState information
|
||||
*
|
||||
|
||||
@ -59,6 +59,7 @@ typedef enum NodeTag
|
||||
T_BitmapIndexScan,
|
||||
T_BitmapHeapScan,
|
||||
T_TidScan,
|
||||
T_TidRangeScan,
|
||||
T_SubqueryScan,
|
||||
T_FunctionScan,
|
||||
T_ValuesScan,
|
||||
@ -116,6 +117,7 @@ typedef enum NodeTag
|
||||
T_BitmapIndexScanState,
|
||||
T_BitmapHeapScanState,
|
||||
T_TidScanState,
|
||||
T_TidRangeScanState,
|
||||
T_SubqueryScanState,
|
||||
T_FunctionScanState,
|
||||
T_TableFuncScanState,
|
||||
@ -229,6 +231,7 @@ typedef enum NodeTag
|
||||
T_BitmapAndPath,
|
||||
T_BitmapOrPath,
|
||||
T_TidPath,
|
||||
T_TidRangePath,
|
||||
T_SubqueryScanPath,
|
||||
T_ForeignPath,
|
||||
T_CustomPath,
|
||||
|
||||
@ -621,6 +621,10 @@ typedef struct PartitionSchemeData *PartitionScheme;
|
||||
* to simplify matching join clauses to those lists.
|
||||
*----------
|
||||
*/
|
||||
|
||||
/* Bitmask of flags supported by table AMs */
|
||||
#define AMFLAG_HAS_TID_RANGE (1 << 0)
|
||||
|
||||
typedef enum RelOptKind
|
||||
{
|
||||
RELOPT_BASEREL,
|
||||
@ -710,6 +714,8 @@ typedef struct RelOptInfo
|
||||
PlannerInfo *subroot; /* if subquery */
|
||||
List *subplan_params; /* if subquery */
|
||||
int rel_parallel_workers; /* wanted number of parallel workers */
|
||||
uint32 amflags; /* Bitmask of optional features supported by
|
||||
* the table AM */
|
||||
|
||||
/* Information about foreign tables and foreign joins */
|
||||
Oid serverid; /* identifies server for the table or join */
|
||||
@ -1323,6 +1329,18 @@ typedef struct TidPath
|
||||
List *tidquals; /* qual(s) involving CTID = something */
|
||||
} TidPath;
|
||||
|
||||
/*
|
||||
* TidRangePath represents a scan by a continguous range of TIDs
|
||||
*
|
||||
* tidrangequals is an implicitly AND'ed list of qual expressions of the form
|
||||
* "CTID relop pseudoconstant", where relop is one of >,>=,<,<=.
|
||||
*/
|
||||
typedef struct TidRangePath
|
||||
{
|
||||
Path path;
|
||||
List *tidrangequals;
|
||||
} TidRangePath;
|
||||
|
||||
/*
|
||||
* SubqueryScanPath represents a scan of an unflattened subquery-in-FROM
|
||||
*
|
||||
|
||||
@ -485,6 +485,19 @@ typedef struct TidScan
|
||||
List *tidquals; /* qual(s) involving CTID = something */
|
||||
} TidScan;
|
||||
|
||||
/* ----------------
|
||||
* tid range scan node
|
||||
*
|
||||
* tidrangequals is an implicitly AND'ed list of qual expressions of the form
|
||||
* "CTID relop pseudoconstant", where relop is one of >,>=,<,<=.
|
||||
* ----------------
|
||||
*/
|
||||
typedef struct TidRangeScan
|
||||
{
|
||||
Scan scan;
|
||||
List *tidrangequals; /* qual(s) involving CTID op something */
|
||||
} TidRangeScan;
|
||||
|
||||
/* ----------------
|
||||
* subquery scan node
|
||||
*
|
||||
|
||||
@ -83,6 +83,9 @@ extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
|
||||
extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
|
||||
extern void cost_tidscan(Path *path, PlannerInfo *root,
|
||||
RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info);
|
||||
extern void cost_tidrangescan(Path *path, PlannerInfo *root,
|
||||
RelOptInfo *baserel, List *tidrangequals,
|
||||
ParamPathInfo *param_info);
|
||||
extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
|
||||
RelOptInfo *baserel, ParamPathInfo *param_info);
|
||||
extern void cost_functionscan(Path *path, PlannerInfo *root,
|
||||
|
||||
@ -63,6 +63,10 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
|
||||
List *bitmapquals);
|
||||
extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
|
||||
List *tidquals, Relids required_outer);
|
||||
extern TidRangePath *create_tidrangescan_path(PlannerInfo *root,
|
||||
RelOptInfo *rel,
|
||||
List *tidrangequals,
|
||||
Relids required_outer);
|
||||
extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
|
||||
List *subpaths, List *partial_subpaths,
|
||||
List *pathkeys, Relids required_outer,
|
||||
|
||||
@ -202,5 +202,7 @@ typedef ItemPointerData *ItemPointer;
|
||||
|
||||
extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
|
||||
extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2);
|
||||
extern void ItemPointerInc(ItemPointer pointer);
|
||||
extern void ItemPointerDec(ItemPointer pointer);
|
||||
|
||||
#endif /* ITEMPTR_H */
|
||||
|
||||
Reference in New Issue
Block a user