Files
postgresql/src/include/access/relscan.h
Peter Geoghegan 0fbceae841 Show index search count in EXPLAIN ANALYZE, take 2.
Expose the count of index searches/index descents in EXPLAIN ANALYZE's
output for index scan/index-only scan/bitmap index scan nodes.  This
information is particularly useful with scans that use ScalarArrayOp
quals, where the number of index searches can be unpredictable due to
implementation details that interact with physical index characteristics
(at least with nbtree SAOP scans, since Postgres 17 commit 5bf748b8).
The information shown also provides useful context when EXPLAIN ANALYZE
runs a plan with an index scan node that successfully applied the skip
scan optimization (set to be added to nbtree by an upcoming patch).

The instrumentation works by teaching all index AMs to increment a new
nsearches counter whenever a new index search begins.  The counter is
incremented at exactly the same point that index AMs already increment
the pg_stat_*_indexes.idx_scan counter (we're counting the same event,
but at the scan level rather than the relation level).  Parallel queries
have workers copy their local counter struct into shared memory when an
index scan node ends -- even when it isn't a parallel aware scan node.
An earlier version of this patch that only worked with parallel aware
scans became commit 5ead85fb (though that was quickly reverted by commit
d00107cd following "debug_parallel_query=regress" buildfarm failures).

Our approach doesn't match the approach used when tracking other index
scan related costs (e.g., "Rows Removed by Filter:").  It is comparable
to the approach used in similar cases involving costs that are only
readily accessible inside an access method, not from the executor proper
(e.g., "Heap Blocks:" output for a Bitmap Heap Scan, which was recently
enhanced to show per-worker costs by commit 5a1e6df3, using essentially
the same scheme as the one used here).  It is necessary for index AMs to
have direct responsibility for maintaining the new counter, since the
counter might need to be incremented multiple times per amgettuple call
(or per amgetbitmap call).  But it is also necessary for the executor
proper to manage the shared memory now used to transfer each worker's
counter struct to the leader.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Reviewed-By: Tomas Vondra <tomas@vondra.me>
Reviewed-By: Masahiro Ikeda <ikedamsh@oss.nttdata.com>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzkRqvaqR2CTNqTZP0z6FuL4-3ED6eQB0yx38XBNj1v-4Q@mail.gmail.com
Discussion: https://postgr.es/m/CAH2-Wz=PKR6rB7qbx+Vnd7eqeB5VTcrW=iJvAsTsKbdG+kW_UA@mail.gmail.com
2025-03-11 09:20:50 -04:00

218 lines
7.3 KiB
C

/*-------------------------------------------------------------------------
*
* relscan.h
* POSTGRES relation scan descriptor definitions.
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/access/relscan.h
*
*-------------------------------------------------------------------------
*/
#ifndef RELSCAN_H
#define RELSCAN_H
#include "access/htup_details.h"
#include "access/itup.h"
#include "nodes/tidbitmap.h"
#include "port/atomics.h"
#include "storage/buf.h"
#include "storage/relfilelocator.h"
#include "storage/spin.h"
#include "utils/relcache.h"
struct ParallelTableScanDescData;
/*
* Generic descriptor for table scans. This is the base-class for table scans,
* which needs to be embedded in the scans of individual AMs.
*/
typedef struct TableScanDescData
{
/* scan parameters */
Relation rs_rd; /* heap relation descriptor */
struct SnapshotData *rs_snapshot; /* snapshot to see */
int rs_nkeys; /* number of scan keys */
struct ScanKeyData *rs_key; /* array of scan key descriptors */
/*
* Scan type-specific members
*/
union
{
/* Iterator for Bitmap Table Scans */
TBMIterator rs_tbmiterator;
/*
* Range of ItemPointers for table_scan_getnextslot_tidrange() to
* scan.
*/
struct
{
ItemPointerData rs_mintid;
ItemPointerData rs_maxtid;
} tidrange;
} st;
/*
* Information about type and behaviour of the scan, a bitmask of members
* of the ScanOptions enum (see tableam.h).
*/
uint32 rs_flags;
struct ParallelTableScanDescData *rs_parallel; /* parallel scan
* information */
} TableScanDescData;
typedef struct TableScanDescData *TableScanDesc;
/*
* Shared state for parallel table scan.
*
* Each backend participating in a parallel table scan has its own
* TableScanDesc in backend-private memory, and those objects all contain a
* pointer to this structure. The information here must be sufficient to
* properly initialize each new TableScanDesc as workers join the scan, and it
* must act as a information what to scan for those workers.
*/
typedef struct ParallelTableScanDescData
{
RelFileLocator phs_locator; /* physical relation to scan */
bool phs_syncscan; /* report location to syncscan logic? */
bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */
Size phs_snapshot_off; /* data for snapshot */
} ParallelTableScanDescData;
typedef struct ParallelTableScanDescData *ParallelTableScanDesc;
/*
* Shared state for parallel table scans, for block oriented storage.
*/
typedef struct ParallelBlockTableScanDescData
{
ParallelTableScanDescData base;
BlockNumber phs_nblocks; /* # blocks in relation at start of scan */
slock_t phs_mutex; /* mutual exclusion for setting startblock */
BlockNumber phs_startblock; /* starting block number */
pg_atomic_uint64 phs_nallocated; /* number of blocks allocated to
* workers so far. */
} ParallelBlockTableScanDescData;
typedef struct ParallelBlockTableScanDescData *ParallelBlockTableScanDesc;
/*
* Per backend state for parallel table scan, for block-oriented storage.
*/
typedef struct ParallelBlockTableScanWorkerData
{
uint64 phsw_nallocated; /* Current # of blocks into the scan */
uint32 phsw_chunk_remaining; /* # blocks left in this chunk */
uint32 phsw_chunk_size; /* The number of blocks to allocate in
* each I/O chunk for the scan */
} ParallelBlockTableScanWorkerData;
typedef struct ParallelBlockTableScanWorkerData *ParallelBlockTableScanWorker;
/*
* Base class for fetches from a table via an index. This is the base-class
* for such scans, which needs to be embedded in the respective struct for
* individual AMs.
*/
typedef struct IndexFetchTableData
{
Relation rel;
} IndexFetchTableData;
struct IndexScanInstrumentation;
/*
* We use the same IndexScanDescData structure for both amgettuple-based
* and amgetbitmap-based index scans. Some fields are only relevant in
* amgettuple-based scans.
*/
typedef struct IndexScanDescData
{
/* scan parameters */
Relation heapRelation; /* heap relation descriptor, or NULL */
Relation indexRelation; /* index relation descriptor */
struct SnapshotData *xs_snapshot; /* snapshot to see */
int numberOfKeys; /* number of index qualifier conditions */
int numberOfOrderBys; /* number of ordering operators */
struct ScanKeyData *keyData; /* array of index qualifier descriptors */
struct ScanKeyData *orderByData; /* array of ordering op descriptors */
bool xs_want_itup; /* caller requests index tuples */
bool xs_temp_snap; /* unregister snapshot at scan end? */
/* signaling to index AM about killing index tuples */
bool kill_prior_tuple; /* last-returned tuple is dead */
bool ignore_killed_tuples; /* do not return killed entries */
bool xactStartedInRecovery; /* prevents killing/seeing killed
* tuples */
/* index access method's private state */
void *opaque; /* access-method-specific info */
/*
* Instrumentation counters maintained by all index AMs during both
* amgettuple calls and amgetbitmap calls (unless field remains NULL)
*/
struct IndexScanInstrumentation *instrument;
/*
* In an index-only scan, a successful amgettuple call must fill either
* xs_itup (and xs_itupdesc) or xs_hitup (and xs_hitupdesc) to provide the
* data returned by the scan. It can fill both, in which case the heap
* format will be used.
*/
IndexTuple xs_itup; /* index tuple returned by AM */
struct TupleDescData *xs_itupdesc; /* rowtype descriptor of xs_itup */
HeapTuple xs_hitup; /* index data returned by AM, as HeapTuple */
struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */
ItemPointerData xs_heaptid; /* result */
bool xs_heap_continue; /* T if must keep walking, potential
* further results */
IndexFetchTableData *xs_heapfetch;
bool xs_recheck; /* T means scan keys must be rechecked */
/*
* When fetching with an ordering operator, the values of the ORDER BY
* expressions of the last returned tuple, according to the index. If
* xs_recheckorderby is true, these need to be rechecked just like the
* scan keys, and the values returned here are a lower-bound on the actual
* values.
*/
Datum *xs_orderbyvals;
bool *xs_orderbynulls;
bool xs_recheckorderby;
/* parallel index scan information, in shared memory */
struct ParallelIndexScanDescData *parallel_scan;
} IndexScanDescData;
/* Generic structure for parallel scans */
typedef struct ParallelIndexScanDescData
{
RelFileLocator ps_locator; /* physical table relation to scan */
RelFileLocator ps_indexlocator; /* physical index relation to scan */
Size ps_offset_ins; /* Offset to SharedIndexScanInstrumentation */
Size ps_offset_am; /* Offset to am-specific structure */
char ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
} ParallelIndexScanDescData;
struct TupleTableSlot;
/* Struct for storage-or-index scans of system tables */
typedef struct SysScanDescData
{
Relation heap_rel; /* catalog being scanned */
Relation irel; /* NULL if doing heap scan */
struct TableScanDescData *scan; /* only valid in storage-scan case */
struct IndexScanDescData *iscan; /* only valid in index-scan case */
struct SnapshotData *snapshot; /* snapshot to unregister at end of scan */
struct TupleTableSlot *slot;
} SysScanDescData;
#endif /* RELSCAN_H */