Files
postgresql/src/include/executor/instrument.h
Fujii Masao d8735b8b46 Fix issues in pg_stat_wal.
1) Previously there were both pgstat_send_wal() and pgstat_report_wal()
   in order to send WAL activity to the stats collector. With the former being
   used by wal writer, the latter by most other processes. They were a bit
   redundant and so this commit merges them into pgstat_send_wal() to
   simplify the code.

2) Previously WAL global statistics counters were calculated and then
   compared with zero-filled buffer in order to determine whether any WAL
   activity has happened since the last submission. These calculation and
   comparison were not cheap. This was regularly exercised even in read-only
   workloads. This commit fixes the issue by making some WAL activity
   counters directly be checked to determine if there's WAL activity stats
   to send.

3) Previously pgstat_report_stat() did not check if there's WAL activity
   stats to send as part of the "Don't expend a clock check if nothing to do"
   check at the top. It's probably rare to have pending WAL stats without
   also passing one of the other conditions, but for safely this commit
   changes pgstat_report_stats() so that it checks also some WAL activity
   counters at the top.

This commit also adds the comments about the design of WAL stats.

Reported-by: Andres Freund
Author: Masahiro Ikeda
Reviewed-by: Kyotaro Horiguchi, Atsushi Torikoshi, Andres Freund, Fujii Masao
Discussion: https://postgr.es/m/20210324232224.vrfiij2rxxwqqjjb@alap3.anarazel.de
2021-05-19 11:38:34 +09:00

117 lines
4.7 KiB
C

/*-------------------------------------------------------------------------
*
* instrument.h
* definitions for run-time statistics collection
*
*
* Copyright (c) 2001-2021, PostgreSQL Global Development Group
*
* src/include/executor/instrument.h
*
*-------------------------------------------------------------------------
*/
#ifndef INSTRUMENT_H
#define INSTRUMENT_H
#include "portability/instr_time.h"
/*
* BufferUsage and WalUsage counters keep being incremented infinitely,
* i.e., must never be reset to zero, so that we can calculate how much
* the counters are incremented in an arbitrary period.
*/
typedef struct BufferUsage
{
int64 shared_blks_hit; /* # of shared buffer hits */
int64 shared_blks_read; /* # of shared disk blocks read */
int64 shared_blks_dirtied; /* # of shared blocks dirtied */
int64 shared_blks_written; /* # of shared disk blocks written */
int64 local_blks_hit; /* # of local buffer hits */
int64 local_blks_read; /* # of local disk blocks read */
int64 local_blks_dirtied; /* # of local blocks dirtied */
int64 local_blks_written; /* # of local disk blocks written */
int64 temp_blks_read; /* # of temp blocks read */
int64 temp_blks_written; /* # of temp blocks written */
instr_time blk_read_time; /* time spent reading */
instr_time blk_write_time; /* time spent writing */
} BufferUsage;
/*
* WalUsage tracks only WAL activity like WAL records generation that
* can be measured per query and is displayed by EXPLAIN command,
* pg_stat_statements extension, etc. It does not track other WAL activity
* like WAL writes that it's not worth measuring per query. That's tracked
* by WAL global statistics counters in WalStats, instead.
*/
typedef struct WalUsage
{
int64 wal_records; /* # of WAL records produced */
int64 wal_fpi; /* # of WAL full page images produced */
uint64 wal_bytes; /* size of WAL records produced */
} WalUsage;
/* Flag bits included in InstrAlloc's instrument_options bitmask */
typedef enum InstrumentOption
{
INSTRUMENT_TIMER = 1 << 0, /* needs timer (and row counts) */
INSTRUMENT_BUFFERS = 1 << 1, /* needs buffer usage */
INSTRUMENT_ROWS = 1 << 2, /* needs row count */
INSTRUMENT_WAL = 1 << 3, /* needs WAL usage */
INSTRUMENT_ALL = PG_INT32_MAX
} InstrumentOption;
typedef struct Instrumentation
{
/* Parameters set at node creation: */
bool need_timer; /* true if we need timer data */
bool need_bufusage; /* true if we need buffer usage data */
bool need_walusage; /* true if we need WAL usage data */
bool async_mode; /* true if node is in async mode */
/* Info about current plan cycle: */
bool running; /* true if we've completed first tuple */
instr_time starttime; /* start time of current iteration of node */
instr_time counter; /* accumulated runtime for this node */
double firsttuple; /* time for first tuple of this cycle */
double tuplecount; /* # of tuples emitted so far this cycle */
BufferUsage bufusage_start; /* buffer usage at start */
WalUsage walusage_start; /* WAL usage at start */
/* Accumulated statistics across all completed cycles: */
double startup; /* total startup time (in seconds) */
double total; /* total time (in seconds) */
double ntuples; /* total tuples produced */
double ntuples2; /* secondary node-specific tuple counter */
double nloops; /* # of run cycles for this node */
double nfiltered1; /* # of tuples removed by scanqual or joinqual */
double nfiltered2; /* # of tuples removed by "other" quals */
BufferUsage bufusage; /* total buffer usage */
WalUsage walusage; /* total WAL usage */
} Instrumentation;
typedef struct WorkerInstrumentation
{
int num_workers; /* # of structures that follow */
Instrumentation instrument[FLEXIBLE_ARRAY_MEMBER];
} WorkerInstrumentation;
extern PGDLLIMPORT BufferUsage pgBufferUsage;
extern PGDLLIMPORT WalUsage pgWalUsage;
extern Instrumentation *InstrAlloc(int n, int instrument_options,
bool async_mode);
extern void InstrInit(Instrumentation *instr, int instrument_options);
extern void InstrStartNode(Instrumentation *instr);
extern void InstrStopNode(Instrumentation *instr, double nTuples);
extern void InstrUpdateTupleCount(Instrumentation *instr, double nTuples);
extern void InstrEndLoop(Instrumentation *instr);
extern void InstrAggNode(Instrumentation *dst, Instrumentation *add);
extern void InstrStartParallelQuery(void);
extern void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage);
extern void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage);
extern void BufferUsageAccumDiff(BufferUsage *dst,
const BufferUsage *add, const BufferUsage *sub);
extern void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add,
const WalUsage *sub);
#endif /* INSTRUMENT_H */