Add new replication mode synchronous_commit = 'remote_apply'.

In this mode, the master waits for the transaction to be applied on
the remote side, not just written to disk.  That means that you can
count on a transaction started on the standby to see all commits
previously acknowledged by the master.

To make this work, the standby sends a reply after replaying each
commit record generated with synchronous_commit >= 'remote_apply'.
This introduces a small inefficiency: the extra replies will be sent
even by standbys that aren't the current synchronous standby.  But
previously-existing synchronous_commit levels make no attempt at all
to optimize which replies are sent based on what the primary cares
about, so this is no worse, and at least avoids any extra replies for
people not using the feature at all.

Thomas Munro, reviewed by Michael Paquier and by me.  Some additional
tweaks by me.
This commit is contained in:
Robert Haas
2016-03-29 21:16:12 -04:00
parent a898b409f6
commit 314cbfc5da
16 changed files with 208 additions and 66 deletions

View File

@ -60,7 +60,9 @@ typedef enum
SYNCHRONOUS_COMMIT_LOCAL_FLUSH, /* wait for local flush only */
SYNCHRONOUS_COMMIT_REMOTE_WRITE, /* wait for local flush and remote
* write */
SYNCHRONOUS_COMMIT_REMOTE_FLUSH /* wait for local and remote flush */
SYNCHRONOUS_COMMIT_REMOTE_FLUSH, /* wait for local and remote flush */
SYNCHRONOUS_COMMIT_REMOTE_APPLY /* wait for local flush and remote
* apply */
} SyncCommitLevel;
/* Define the default setting for synchonous_commit */
@ -144,10 +146,13 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
* EOXact... routines which run at the end of the original transaction
* completion.
*/
#define XACT_COMPLETION_APPLY_FEEDBACK (1U << 29)
#define XACT_COMPLETION_UPDATE_RELCACHE_FILE (1U << 30)
#define XACT_COMPLETION_FORCE_SYNC_COMMIT (1U << 31)
/* Access macros for above flags */
#define XactCompletionApplyFeedback(xinfo) \
((xinfo & XACT_COMPLETION_APPLY_FEEDBACK) != 0)
#define XactCompletionRelcacheInitFileInval(xinfo) \
((xinfo & XACT_COMPLETION_UPDATE_RELCACHE_FILE) != 0)
#define XactCompletionForceSyncCommit(xinfo) \

View File

@ -267,6 +267,8 @@ extern bool CheckPromoteSignal(void);
extern void WakeupRecovery(void);
extern void SetWalWriterSleeping(bool sleeping);
extern void XLogRequestWalReceiverReply(void);
extern void assign_max_wal_size(int newval, void *extra);
extern void assign_checkpoint_completion_target(double newval, void *extra);

View File

@ -31,7 +31,7 @@
/*
* Each page of XLOG file has a header like this:
*/
#define XLOG_PAGE_MAGIC 0xD088 /* can be used as WAL version indicator */
#define XLOG_PAGE_MAGIC 0xD089 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{

View File

@ -23,8 +23,9 @@
#define SYNC_REP_NO_WAIT -1
#define SYNC_REP_WAIT_WRITE 0
#define SYNC_REP_WAIT_FLUSH 1
#define SYNC_REP_WAIT_APPLY 2
#define NUM_SYNC_REP_WAIT_MODE 2
#define NUM_SYNC_REP_WAIT_MODE 3
/* syncRepState */
#define SYNC_REP_NOT_WAITING 0
@ -35,7 +36,7 @@
extern char *SyncRepStandbyNames;
/* called by user backend */
extern void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN);
extern void SyncRepWaitForLSN(XLogRecPtr lsn, bool commit);
/* called at backend exit */
extern void SyncRepCleanupAtProcExit(void);

View File

@ -112,10 +112,17 @@ typedef struct
slock_t mutex; /* locks shared variables shown above */
/*
* force walreceiver reply? This doesn't need to be locked; memory
* barriers for ordering are sufficient.
*/
bool force_reply;
/*
* Latch used by startup process to wake up walreceiver after telling it
* where to start streaming (after setting receiveStart and
* receiveStartTLI).
* receiveStartTLI), and also to tell it to send apply feedback to the
* primary whenever specially marked commit records are applied.
*/
Latch latch;
} WalRcvData;
@ -138,7 +145,7 @@ extern PGDLLIMPORT walrcv_startstreaming_type walrcv_startstreaming;
typedef void (*walrcv_endstreaming_type) (TimeLineID *next_tli);
extern PGDLLIMPORT walrcv_endstreaming_type walrcv_endstreaming;
typedef int (*walrcv_receive_type) (int timeout, char **buffer);
typedef int (*walrcv_receive_type) (char **buffer, int *wait_fd);
extern PGDLLIMPORT walrcv_receive_type walrcv_receive;
typedef void (*walrcv_send_type) (const char *buffer, int nbytes);
@ -162,5 +169,6 @@ extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
extern int GetReplicationApplyDelay(void);
extern int GetReplicationTransferLatency(void);
extern void WalRcvForceReply(void);
#endif /* _WALRECEIVER_H */