From 12ed218eb0e710a7b6305b27eddc5b52e46d193a Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 28 Sep 2020 19:18:06 +0800 Subject: [PATCH] =?UTF-8?q?gs=5Fprobackup=20=E5=8A=9F=E8=83=BD=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bin/Makefile | 6 +- src/bin/pg_probackup/Makefile | 94 + src/bin/pg_probackup/atomics.cpp | 249 ++ src/bin/pg_probackup/atomics.h | 528 ++++ src/bin/pg_probackup/atomics/arch-arm.h | 26 + src/bin/pg_probackup/atomics/arch-hppa.h | 17 + src/bin/pg_probackup/atomics/arch-ia64.h | 29 + src/bin/pg_probackup/atomics/arch-ppc.h | 29 + src/bin/pg_probackup/atomics/arch-x86.h | 252 ++ src/bin/pg_probackup/atomics/fallback.h | 169 + src/bin/pg_probackup/atomics/generic-acc.h | 106 + src/bin/pg_probackup/atomics/generic-gcc.h | 286 ++ src/bin/pg_probackup/atomics/generic-msvc.h | 101 + src/bin/pg_probackup/atomics/generic-sunpro.h | 106 + src/bin/pg_probackup/atomics/generic-xlc.h | 142 + src/bin/pg_probackup/atomics/generic.h | 401 +++ src/bin/pg_probackup/backup.cpp | 2357 ++++++++++++++ src/bin/pg_probackup/catalog.cpp | 2775 +++++++++++++++++ src/bin/pg_probackup/configuration.cpp | 1492 +++++++++ src/bin/pg_probackup/configuration.h | 107 + src/bin/pg_probackup/configure.cpp | 699 +++++ src/bin/pg_probackup/data.cpp | 2239 +++++++++++++ src/bin/pg_probackup/delete.cpp | 1115 +++++++ src/bin/pg_probackup/dir.cpp | 1856 +++++++++++ src/bin/pg_probackup/fetch.cpp | 110 + src/bin/pg_probackup/file.cpp | 2743 ++++++++++++++++ src/bin/pg_probackup/file.h | 151 + src/bin/pg_probackup/help.cpp | 754 +++++ src/bin/pg_probackup/init.cpp | 131 + src/bin/pg_probackup/json.cpp | 147 + src/bin/pg_probackup/json.h | 33 + src/bin/pg_probackup/logger.cpp | 712 +++++ src/bin/pg_probackup/logger.h | 67 + src/bin/pg_probackup/merge.cpp | 1399 +++++++++ src/bin/pg_probackup/parray.cpp | 213 ++ src/bin/pg_probackup/parray.h | 36 + src/bin/pg_probackup/parsexlog.cpp | 1890 +++++++++++ src/bin/pg_probackup/pg_lzcompress.cpp | 773 +++++ src/bin/pg_probackup/pg_lzcompress.h | 91 + src/bin/pg_probackup/pg_probackup.cpp | 818 +++++ src/bin/pg_probackup/pg_probackup.h | 1123 +++++++ src/bin/pg_probackup/pgut.cpp | 1546 +++++++++ src/bin/pg_probackup/pgut.h | 107 + src/bin/pg_probackup/psprintf.cpp | 198 ++ src/bin/pg_probackup/ptrack.cpp | 198 ++ src/bin/pg_probackup/remote.cpp | 261 ++ src/bin/pg_probackup/remote.h | 24 + src/bin/pg_probackup/restore.cpp | 1826 +++++++++++ src/bin/pg_probackup/s_lock.cpp | 398 +++ src/bin/pg_probackup/show.cpp | 1116 +++++++ src/bin/pg_probackup/thread.cpp | 109 + src/bin/pg_probackup/thread.h | 41 + src/bin/pg_probackup/util.cpp | 591 ++++ src/bin/pg_probackup/validate.cpp | 682 ++++ src/common/backend/catalog/builtin_funcs.ini | 8 + src/common/backend/utils/misc/Makefile | 2 +- src/common/backend/utils/misc/guc.cpp | 3 - .../backend/utils/misc/pg_controldata.cpp | 277 ++ .../process/postmaster/postmaster.cpp | 2 + src/include/access/xlog_internal.h | 20 + src/include/pg_getopt.h | 56 + src/include/pgtar.h | 26 + src/include/storage/bufpage.h | 5 + src/include/utils/builtins.h | 4 + src/include/utils/pg_crc.h | 22 +- src/lib/pgcommon/fe_memutils.cpp | 6 + src/test/regress/expected/opr_sanity.out | 4 +- src/test/regress/expected/rangefuncs.out | 3 +- .../expected/single_node_opr_sanity.out | 6 +- 69 files changed, 33901 insertions(+), 12 deletions(-) create mode 100644 src/bin/pg_probackup/Makefile create mode 100644 src/bin/pg_probackup/atomics.cpp create mode 100644 src/bin/pg_probackup/atomics.h create mode 100644 src/bin/pg_probackup/atomics/arch-arm.h create mode 100644 src/bin/pg_probackup/atomics/arch-hppa.h create mode 100644 src/bin/pg_probackup/atomics/arch-ia64.h create mode 100644 src/bin/pg_probackup/atomics/arch-ppc.h create mode 100644 src/bin/pg_probackup/atomics/arch-x86.h create mode 100644 src/bin/pg_probackup/atomics/fallback.h create mode 100644 src/bin/pg_probackup/atomics/generic-acc.h create mode 100644 src/bin/pg_probackup/atomics/generic-gcc.h create mode 100644 src/bin/pg_probackup/atomics/generic-msvc.h create mode 100644 src/bin/pg_probackup/atomics/generic-sunpro.h create mode 100644 src/bin/pg_probackup/atomics/generic-xlc.h create mode 100644 src/bin/pg_probackup/atomics/generic.h create mode 100644 src/bin/pg_probackup/backup.cpp create mode 100644 src/bin/pg_probackup/catalog.cpp create mode 100644 src/bin/pg_probackup/configuration.cpp create mode 100644 src/bin/pg_probackup/configuration.h create mode 100644 src/bin/pg_probackup/configure.cpp create mode 100644 src/bin/pg_probackup/data.cpp create mode 100644 src/bin/pg_probackup/delete.cpp create mode 100644 src/bin/pg_probackup/dir.cpp create mode 100644 src/bin/pg_probackup/fetch.cpp create mode 100644 src/bin/pg_probackup/file.cpp create mode 100644 src/bin/pg_probackup/file.h create mode 100644 src/bin/pg_probackup/help.cpp create mode 100644 src/bin/pg_probackup/init.cpp create mode 100644 src/bin/pg_probackup/json.cpp create mode 100644 src/bin/pg_probackup/json.h create mode 100644 src/bin/pg_probackup/logger.cpp create mode 100644 src/bin/pg_probackup/logger.h create mode 100644 src/bin/pg_probackup/merge.cpp create mode 100644 src/bin/pg_probackup/parray.cpp create mode 100644 src/bin/pg_probackup/parray.h create mode 100644 src/bin/pg_probackup/parsexlog.cpp create mode 100644 src/bin/pg_probackup/pg_lzcompress.cpp create mode 100644 src/bin/pg_probackup/pg_lzcompress.h create mode 100644 src/bin/pg_probackup/pg_probackup.cpp create mode 100644 src/bin/pg_probackup/pg_probackup.h create mode 100644 src/bin/pg_probackup/pgut.cpp create mode 100644 src/bin/pg_probackup/pgut.h create mode 100644 src/bin/pg_probackup/psprintf.cpp create mode 100644 src/bin/pg_probackup/ptrack.cpp create mode 100644 src/bin/pg_probackup/remote.cpp create mode 100644 src/bin/pg_probackup/remote.h create mode 100644 src/bin/pg_probackup/restore.cpp create mode 100644 src/bin/pg_probackup/s_lock.cpp create mode 100644 src/bin/pg_probackup/show.cpp create mode 100644 src/bin/pg_probackup/thread.cpp create mode 100644 src/bin/pg_probackup/thread.h create mode 100644 src/bin/pg_probackup/util.cpp create mode 100644 src/bin/pg_probackup/validate.cpp create mode 100644 src/common/backend/utils/misc/pg_controldata.cpp create mode 100644 src/include/pg_getopt.h create mode 100644 src/include/pgtar.h diff --git a/src/bin/Makefile b/src/bin/Makefile index 7d3f7fa4a..62620d4ac 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -32,7 +32,8 @@ SUBDIRS = \ gs_cgroup \ gsqlerr \ pg_upgrade \ - pg_basebackup + pg_basebackup \ + pg_probackup ifeq ($(PORTNAME), win32) SUBDIRS += pgevent @@ -51,7 +52,8 @@ SUBDIRS = \ pg_resetxlog \ gs_guc \ gsqlerr \ - pg_basebackup + pg_basebackup \ + pg_probackup endif diff --git a/src/bin/pg_probackup/Makefile b/src/bin/pg_probackup/Makefile new file mode 100644 index 000000000..dc625cc57 --- /dev/null +++ b/src/bin/pg_probackup/Makefile @@ -0,0 +1,94 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/bin/pg_probackup +# +# Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group +# +#------------------------------------------------------------------------- + +PROGRAM = gs_probackup + +# utils +OBJS = configuration.o json.o logger.o \ + parray.o pgut.o thread.o remote.o file.o psprintf.o \ + atomics.o s_lock.o + +OBJS += backup.o catalog.o configure.o data.o \ + delete.o dir.o fetch.o help.o init.o merge.o \ + ptrack.o pg_probackup.o restore.o show.o util.o \ + validate.o parsexlog.o + +OBJS += datapagemap.o receivelog.o streamutil.o \ + xlogreader.o pg_lzcompress.o +OBJS += $(top_builddir)/src/lib/pgcommon/libpgcommon.a + +EXTRA_CLEAN = datapagemap.cpp datapagemap.h \ + receivelog.cpp receivelog.h streamutil.cpp streamutil.h \ + xlogreader.cpp instr_time.h + +subdir = src/bin/pg_probackup +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global +EXTRA_CLEAN += logging.h + +CFLAGS = -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0 -fsigned-char -DHAVE_LIBZ -DSTREAMPLAN -DPGXC -O0 -g -DENABLE_GSTRACE -fpermissive +override CXXFLAGS := -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0 -fsigned-char -DHAVE_LIBZ -DSTREAMPLAN -DPGXC -O0 -g -DENABLE_GSTRACE -fpermissive +LDFLAGS += -L$(LZ4_LIB_PATH) +LIBS += -lgssapi_krb5_gauss -lgssrpc_gauss -lkrb5_gauss -lkrb5support_gauss -lk5crypto_gauss -lcom_err_gauss -llz4 +PG_CPPFLAGS = -I$(libpq_srcdir) ${PTHREAD_CFLAGS} -Isrc -I$(top_builddir)/$(subdir) -I$(LZ4_INCLUDE_PATH) -I$(ZLIB_INCLUDE_PATH) +override CPPFLAGS := -DFRONTEND $(CPPFLAGS) $(PG_CPPFLAGS) +PG_LIBS_INTERNAL = $(libpq_pgport) ${PTHREAD_CFLAGS} + +all: $(PROGRAM) + +gs_probackup: $(OBJS) | submake-libpq submake-libpgport + $(CC) $(CXXFLAGS) $(OBJS) $(LIBS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) -o $@$(X) + +configuration.o: datapagemap.h +backup.o: receivelog.h streamutil.h +ifneq (,$(wildcard $(top_builddir)/src/bin/pg_rewind/logging.h)) +datapagemap.o: logging.h +endif +$(top_builddir)/src/lib/pgcommon/libpgcommon.a: + $(MAKE) -C $(top_builddir)/src/lib/pgcommon libpgcommon.a +atomics.h: $(top_builddir)/src/include/utils/atomics.h + rm -f $@ && $(LN_S) $(top_builddir)/src/include/utils/atomics.h $@ +instr_time.h: $(top_builddir)/src/include/portability/instr_time.h + rm -f $@ && $(LN_S) $(top_builddir)/src/include/portability/instr_time.h $@ +datapagemap.cpp: $(top_builddir)/src/bin/pg_rewind/datapagemap.cpp + rm -f $@ && $(LN_S) $(top_builddir)/src/bin/pg_rewind/datapagemap.cpp $@ +datapagemap.h: $(top_builddir)/src/bin/pg_rewind/datapagemap.h + rm -f $@ && $(LN_S) $(top_builddir)/src/bin/pg_rewind/datapagemap.h $@ +receivelog.cpp: $(top_builddir)/src/bin/pg_basebackup/receivelog.cpp + rm -f $@ && $(LN_S) $(top_builddir)/src/bin/pg_basebackup/receivelog.cpp $@ +ifneq (,$(wildcard $(top_builddir)/src/bin/pg_basebackup/walmethods.c)) +receivelog.h: src/walmethods.h $(top_builddir)/src/bin/pg_basebackup/receivelog.h +else +receivelog.h: $(top_builddir)/src/bin/pg_basebackup/receivelog.h +endif + rm -f $@ && $(LN_S) $(top_builddir)/src/bin/pg_basebackup/receivelog.h $@ +streamutil.cpp: $(top_builddir)/src/bin/pg_basebackup/streamutil.cpp + rm -f $@ && $(LN_S) $(top_builddir)/src/bin/pg_basebackup/streamutil.cpp $@ +streamutil.h: $(top_builddir)/src/bin/pg_basebackup/streamutil.h + rm -f $@ && $(LN_S) $(top_builddir)/src/bin/pg_basebackup/streamutil.h $@ +xlogreader.cpp: $(top_builddir)/src/gausskernel/storage/access/transam/xlogreader.cpp + rm -f $@ && $(LN_S) $(top_builddir)/src/gausskernel/storage/access/transam/xlogreader.cpp $@ +logging.h: $(top_builddir)/src/bin/pg_rewind/logging.h + rm -f $@ && $(LN_S) $(top_builddir)/src/bin/pg_rewind/logging.h $@ + +ifeq ($(PORTNAME), aix) + CC=xlc_r +endif + +install: all installdirs + $(INSTALL_PROGRAM) gs_probackup$(X) '$(DESTDIR)$(bindir)/gs_probackup$(X)' + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(bindir)' + +uninstall: + rm -f '$(DESTDIR)$(bindir)/gs_probackup$(X)' +.PHONY : clean +clean distclean maintainer-clean: + rm -f gs_probackup $(OBJS) $(EXTRA_CLEAN) diff --git a/src/bin/pg_probackup/atomics.cpp b/src/bin/pg_probackup/atomics.cpp new file mode 100644 index 000000000..a45e99d5c --- /dev/null +++ b/src/bin/pg_probackup/atomics.cpp @@ -0,0 +1,249 @@ +/*------------------------------------------------------------------------- + * + * atomics.c + * Non-Inline parts of the atomics implementation + * + * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/port/atomics.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" + +#ifdef FRONTEND +#undef FRONTEND +#include "atomics.h" +#define FRONTEND +#else +#include "atomics.h" +#endif +//#undef HAVE_SPINLOCKS +#include "storage/spin.h" + + +#ifdef PG_HAVE_MEMORY_BARRIER_EMULATION +#ifdef WIN32 +#error "barriers are required (and provided) on WIN32 platforms" +#endif +#include +#endif + +#ifdef PG_HAVE_MEMORY_BARRIER_EMULATION +void +pg_spinlock_barrier(void) +{ + /* + * NB: we have to be reentrant here, some barriers are placed in signal + * handlers. + * + * We use kill(0) for the fallback barrier as we assume that kernels on + * systems old enough to require fallback barrier support will include an + * appropriate barrier while checking the existence of the postmaster pid. + */ + (void) kill(PostmasterPid, 0); +} +#endif + +#ifdef PG_HAVE_COMPILER_BARRIER_EMULATION +void +pg_extern_compiler_barrier(void) +{ + /* do nothing */ +} +#endif + + +#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION + +void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t), + "size mismatch of atomic_flag vs slock_t"); + +#ifndef HAVE_SPINLOCKS + + /* + * NB: If we're using semaphore based TAS emulation, be careful to use a + * separate set of semaphores. Otherwise we'd get in trouble if an atomic + * var would be manipulated while spinlock is held. + */ + s_init_lock_sema((slock_t *) &ptr->sema, true); +#else + SpinLockInit((slock_t *) &ptr->sema); +#endif + + ptr->value = false; +} + +bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + uint32 oldval; + + SpinLockAcquire((slock_t *) &ptr->sema); + oldval = ptr->value; + ptr->value = true; + SpinLockRelease((slock_t *) &ptr->sema); + + return oldval == 0; +} + +void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + SpinLockAcquire((slock_t *) &ptr->sema); + ptr->value = false; + SpinLockRelease((slock_t *) &ptr->sema); +} + +bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return ptr->value == 0; +} + +#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */ +#undef PG_HAVE_ATOMIC_U32_SIMULATION +#ifdef PG_HAVE_ATOMIC_U32_SIMULATION +void +pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_) +{ + StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t), + "size mismatch of atomic_uint32 vs slock_t"); + + /* + * If we're using semaphore based atomic flags, be careful about nested + * usage of atomics while a spinlock is held. + */ +#ifndef HAVE_SPINLOCKS + s_init_lock_sema((slock_t *) &ptr->sema, true); +#else + SpinLockInit((slock_t *) &ptr->sema); +#endif + ptr->value = val_; +} + +void +pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + /* + * One might think that an unlocked write doesn't need to acquire the + * spinlock, but one would be wrong. Even an unlocked write has to cause a + * concurrent pg_atomic_compare_exchange_u32() (et al) to fail. + */ + SpinLockAcquire((slock_t *) &ptr->sema); + ptr->value = val; + SpinLockRelease((slock_t *) &ptr->sema); +} + +bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + + /* + * Do atomic op under a spinlock. It might look like we could just skip + * the cmpxchg if the lock isn't available, but that'd just emulate a + * 'weak' compare and swap. I.e. one that allows spurious failures. Since + * several algorithms rely on a strong variant and that is efficiently + * implementable on most major architectures let's emulate it here as + * well. + */ + SpinLockAcquire((slock_t *) &ptr->sema); + + /* perform compare/exchange logic */ + ret = ptr->value == *expected; + *expected = ptr->value; + if (ret) + ptr->value = newval; + + /* and release lock */ + SpinLockRelease((slock_t *) &ptr->sema); + + return ret; +} + +uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 oldval; + + SpinLockAcquire((slock_t *) &ptr->sema); + oldval = ptr->value; + ptr->value += add_; + SpinLockRelease((slock_t *) &ptr->sema); + return oldval; +} + +#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */ + +#undef PG_HAVE_ATOMIC_U64_SIMULATION +#ifdef PG_HAVE_ATOMIC_U64_SIMULATION + +void +pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_) +{ + StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t), + "size mismatch of atomic_uint64 vs slock_t"); + + /* + * If we're using semaphore based atomic flags, be careful about nested + * usage of atomics while a spinlock is held. + */ +#ifndef HAVE_SPINLOCKS + s_init_lock_sema((slock_t *) &ptr->sema, true); +#else + SpinLockInit((slock_t *) &ptr->sema); +#endif + ptr->value = val_; +} + +bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + + /* + * Do atomic op under a spinlock. It might look like we could just skip + * the cmpxchg if the lock isn't available, but that'd just emulate a + * 'weak' compare and swap. I.e. one that allows spurious failures. Since + * several algorithms rely on a strong variant and that is efficiently + * implementable on most major architectures let's emulate it here as + * well. + */ + SpinLockAcquire((slock_t *) &ptr->sema); + + /* perform compare/exchange logic */ + ret = ptr->value == *expected; + *expected = ptr->value; + if (ret) + ptr->value = newval; + + /* and release lock */ + SpinLockRelease((slock_t *) &ptr->sema); + + return ret; +} + +uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 oldval; + + SpinLockAcquire((slock_t *) &ptr->sema); + oldval = ptr->value; + ptr->value += add_; + SpinLockRelease((slock_t *) &ptr->sema); + return oldval; +} + +#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */ diff --git a/src/bin/pg_probackup/atomics.h b/src/bin/pg_probackup/atomics.h new file mode 100644 index 000000000..c73e507ed --- /dev/null +++ b/src/bin/pg_probackup/atomics.h @@ -0,0 +1,528 @@ +/*------------------------------------------------------------------------- + * + * atomics.h + * Atomic operations. + * + * Hardware and compiler dependent functions for manipulating memory + * atomically and dealing with cache coherency. Used to implement locking + * facilities and lockless algorithms/data structures. + * + * To bring up postgres on a platform/compiler at the very least + * implementations for the following operations should be provided: + * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier() + * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32() + * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag() + * * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY should be defined if appropriate. + * + * There exist generic, hardware independent, implementations for several + * compilers which might be sufficient, although possibly not optimal, for a + * new platform. If no such generic implementation is available spinlocks (or + * even OS provided semaphores) will be used to implement the API. + * + * Implement _u64 atomics if and only if your platform can use them + * efficiently (and obviously correctly). + * + * Use higher level functionality (lwlocks, spinlocks, heavyweight locks) + * whenever possible. Writing correct code using these facilities is hard. + * + * For an introduction to using memory barriers within the PostgreSQL backend, + * see src/backend/storage/lmgr/README.barrier + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics.h + * + *------------------------------------------------------------------------- + */ +#ifndef ATOMICS_H +#define ATOMICS_H + +#ifdef FRONTEND +#error "atomics.h may not be included from frontend code" +#endif + +#define INSIDE_ATOMICS_H + +#include + +/* + * First a set of architecture specific files is included. + * + * These files can provide the full set of atomics or can do pretty much + * nothing if all the compilers commonly used on these platforms provide + * usable generics. + * + * Don't add an inline assembly of the actual atomic operations if all the + * common implementations of your platform provide intrinsics. Intrinsics are + * much easier to understand and potentially support more architectures. + * + * It will often make sense to define memory barrier semantics here, since + * e.g. generic compiler intrinsics for x86 memory barriers can't know that + * postgres doesn't need x86 read/write barriers do anything more than a + * compiler barrier. + * + */ +#if defined(__arm__) || defined(__arm) || \ + defined(__aarch64__) || defined(__aarch64) +#include "atomics/arch-arm.h" +#elif defined(__i386__) || defined(__i386) || defined(__x86_64__) +#include "atomics/arch-x86.h" +#elif defined(__ia64__) || defined(__ia64) +#include "atomics/arch-ia64.h" +#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) +#include "atomics/arch-ppc.h" +#elif defined(__hppa) || defined(__hppa__) +#include "atomics/arch-hppa.h" +#endif + +/* + * Compiler specific, but architecture independent implementations. + * + * Provide architecture independent implementations of the atomic + * facilities. At the very least compiler barriers should be provided, but a + * full implementation of + * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier() + * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32() + * using compiler intrinsics are a good idea. + */ +/* + * Given a gcc-compatible xlc compiler, prefer the xlc implementation. The + * ppc64le "IBM XL C/C++ for Linux, V13.1.2" implements both interfaces, but + * __sync_lock_test_and_set() of one-byte types elicits SIGSEGV. + */ +#if defined(__IBMC__) || defined(__IBMCPP__) +#include "atomics/generic-xlc.h" +/* gcc or compatible, including clang and icc */ +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) +#include "atomics/generic-gcc.h" +#elif defined(_MSC_VER) +#include "atomics/generic-msvc.h" +#elif defined(__hpux) && defined(__ia64) && !defined(__GNUC__) +#include "atomics/generic-acc.h" +#elif defined(__SUNPRO_C) && !defined(__GNUC__) +#include "atomics/generic-sunpro.h" +#else +/* + * Unsupported compiler, we'll likely use slower fallbacks... At least + * compiler barriers should really be provided. + */ +#endif + +/* + * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and + * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics + * support. In the case of spinlock backed atomics the emulation is expected + * to be efficient, although less so than native atomics support. + */ +#include "atomics/fallback.h" + +/* + * Provide additional operations using supported infrastructure. These are + * expected to be efficient if the underlying atomic operations are efficient. + */ +#include "atomics/generic.h" + + +/* + * pg_compiler_barrier - prevent the compiler from moving code across + * + * A compiler barrier need not (and preferably should not) emit any actual + * machine code, but must act as an optimization fence: the compiler must not + * reorder loads or stores to main memory around the barrier. However, the + * CPU may still reorder loads or stores at runtime, if the architecture's + * memory model permits this. + */ +//#define pg_compiler_barrier() pg_compiler_barrier_impl() + +/* + * pg_memory_barrier - prevent the CPU from reordering memory access + * + * A memory barrier must act as a compiler barrier, and in addition must + * guarantee that all loads and stores issued prior to the barrier are + * completed before any loads or stores issued after the barrier. Unless + * loads and stores are totally ordered (which is not the case on most + * architectures) this requires issuing some sort of memory fencing + * instruction. + */ +//#define pg_memory_barrier() pg_memory_barrier_impl() + +/* + * pg_(read|write)_barrier - prevent the CPU from reordering memory access + * + * A read barrier must act as a compiler barrier, and in addition must + * guarantee that any loads issued prior to the barrier are completed before + * any loads issued after the barrier. Similarly, a write barrier acts + * as a compiler barrier, and also orders stores. Read and write barriers + * are thus weaker than a full memory barrier, but stronger than a compiler + * barrier. In practice, on machines with strong memory ordering, read and + * write barriers may require nothing more than a compiler barrier. + */ +//#define pg_read_barrier() pg_read_barrier_impl() +//#define pg_write_barrier() pg_write_barrier_impl() + +/* + * Spinloop delay - Allow CPU to relax in busy loops + */ +#define pg_spin_delay() pg_spin_delay_impl() + +/* + * pg_atomic_init_flag - initialize atomic flag. + * + * No barrier semantics. + */ +static inline void +pg_atomic_init_flag(volatile pg_atomic_flag *ptr) +{ + pg_atomic_init_flag_impl(ptr); +} + +/* + * pg_atomic_test_and_set_flag - TAS() + * + * Returns true if the flag has successfully been set, false otherwise. + * + * Acquire (including read barrier) semantics. + */ +static inline bool +pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_test_set_flag_impl(ptr); +} + +/* + * pg_atomic_unlocked_test_flag - Check if the lock is free + * + * Returns true if the flag currently is not set, false otherwise. + * + * No barrier semantics. + */ +static inline bool +pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_unlocked_test_flag_impl(ptr); +} + +/* + * pg_atomic_clear_flag - release lock set by TAS() + * + * Release (including write barrier) semantics. + */ +static inline void +pg_atomic_clear_flag(volatile pg_atomic_flag *ptr) +{ + pg_atomic_clear_flag_impl(ptr); +} + +#ifdef USE_ATOMIC_32 +/* + * pg_atomic_init_u32 - initialize atomic variable + * + * Has to be done before any concurrent usage.. + * + * No barrier semantics. + */ +static inline void +pg_atomic_init_u32(volatile pg_atomic_uint32_local *ptr, uint32 val) +{ + AssertPointerAlignment(ptr, 4); + + pg_atomic_init_u32_impl(ptr, val); +} + +/* + * pg_atomic_read_u32 - unlocked read from atomic variable. + * + * The read is guaranteed to return a value as it has been written by this or + * another process at some point in the past. There's however no cache + * coherency interaction guaranteeing the value hasn't since been written to + * again. + * + * No barrier semantics. + */ +static inline uint32 +pg_atomic_read_u32(volatile pg_atomic_uint32_local *ptr) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_read_u32_impl(ptr); +} + +/* + * pg_atomic_write_u32 - write to atomic variable. + * + * The write is guaranteed to succeed as a whole, i.e. it's not possible to + * observe a partial write for any reader. Note that this correctly interacts + * with pg_atomic_compare_exchange_u32, in contrast to + * pg_atomic_unlocked_write_u32(). + * + * No barrier semantics. + */ +static inline void +pg_atomic_write_u32(volatile pg_atomic_uint32_local *ptr, uint32 val) +{ + AssertPointerAlignment(ptr, 4); + + pg_atomic_write_u32_impl(ptr, val); +} + +/* + * pg_atomic_unlocked_write_u32 - unlocked write to atomic variable. + * + * The write is guaranteed to succeed as a whole, i.e. it's not possible to + * observe a partial write for any reader. But note that writing this way is + * not guaranteed to correctly interact with read-modify-write operations like + * pg_atomic_compare_exchange_u32. This should only be used in cases where + * minor performance regressions due to atomics emulation are unacceptable. + * + * No barrier semantics. + */ +static inline void +pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32_local *ptr, uint32 val) +{ + AssertPointerAlignment(ptr, 4); + + pg_atomic_unlocked_write_u32_impl(ptr, val); +} + +/* + * pg_atomic_exchange_u32 - exchange newval with current value + * + * Returns the old value of 'ptr' before the swap. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_exchange_u32(volatile pg_atomic_uint32_local *ptr, uint32 newval) +{ + AssertPointerAlignment(ptr, 4); + + return pg_atomic_exchange_u32_impl(ptr, newval); +} + +/* + * pg_atomic_compare_exchange_u32 - CAS operation + * + * Atomically compare the current value of ptr with *expected and store newval + * iff ptr and *expected have the same value. The current value of *ptr will + * always be stored in *expected. + * + * Return true if values have been exchanged, false otherwise. + * + * Full barrier semantics. + */ +static inline bool +pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32_local *ptr, + uint32 *expected, uint32 newval) +{ + AssertPointerAlignment(ptr, 4); + AssertPointerAlignment(expected, 4); + + return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval); +} + +/* + * pg_atomic_fetch_add_u32 - atomically add to variable + * + * Returns the value of ptr before the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_add_u32(volatile pg_atomic_uint32_local *ptr, int32 add_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_fetch_add_u32_impl(ptr, add_); +} + +/* + * pg_atomic_fetch_sub_u32 - atomically subtract from variable + * + * Returns the value of ptr before the arithmetic operation. Note that sub_ + * may not be INT_MIN due to platform limitations. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32_local *ptr, int32 sub_) +{ + AssertPointerAlignment(ptr, 4); + Assert(sub_ != INT_MIN); + return pg_atomic_fetch_sub_u32_impl(ptr, sub_); +} + +/* + * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable + * + * Returns the value of ptr before the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_and_u32(volatile pg_atomic_uint32_local *ptr, uint32 and_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_fetch_and_u32_impl(ptr, and_); +} + +/* + * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable + * + * Returns the value of ptr before the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_or_u32(volatile pg_atomic_uint32_local *ptr, uint32 or_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_fetch_or_u32_impl(ptr, or_); +} + +/* + * pg_atomic_add_fetch_u32 - atomically add to variable + * + * Returns the value of ptr after the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_add_fetch_u32(volatile pg_atomic_uint32_local *ptr, int32 add_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_add_fetch_u32_impl(ptr, add_); +} + +/* + * pg_atomic_sub_fetch_u32 - atomically subtract from variable + * + * Returns the value of ptr after the arithmetic operation. Note that sub_ may + * not be INT_MIN due to platform limitations. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32_local *ptr, int32 sub_) +{ + AssertPointerAlignment(ptr, 4); + Assert(sub_ != INT_MIN); + return pg_atomic_sub_fetch_u32_impl(ptr, sub_); +} + +/* ---- + * The 64 bit operations have the same semantics as their 32bit counterparts + * if they are available. Check the corresponding 32bit function for + * documentation. + * ---- + */ +static inline void +pg_atomic_init_u64(volatile pg_atomic_uint64_local *ptr, uint64 val) +{ + /* + * Can't necessarily enforce alignment - and don't need it - when using + * the spinlock based fallback implementation. Therefore only assert when + * not using it. + */ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + pg_atomic_init_u64_impl(ptr, val); +} + +static inline uint64 +pg_atomic_read_u64(volatile pg_atomic_uint64_local *ptr) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_read_u64_impl(ptr); +} + +static inline void +pg_atomic_write_u64(volatile pg_atomic_uint64_local *ptr, uint64 val) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + pg_atomic_write_u64_impl(ptr, val); +} + +static inline uint64 +pg_atomic_exchange_u64(volatile pg_atomic_uint64_local *ptr, uint64 newval) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_exchange_u64_impl(ptr, newval); +} + +static inline bool +pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64_local *ptr, + uint64 *expected, uint64 newval) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); + AssertPointerAlignment(expected, 8); +#endif + return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval); +} + +static inline uint64 +pg_atomic_fetch_add_u64(volatile pg_atomic_uint64_local *ptr, int64 add_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_fetch_add_u64_impl(ptr, add_); +} + +static inline uint64 +pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64_local *ptr, int64 sub_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + Assert(sub_ != PG_INT64_MIN); + return pg_atomic_fetch_sub_u64_impl(ptr, sub_); +} + +static inline uint64 +pg_atomic_fetch_and_u64(volatile pg_atomic_uint64_local *ptr, uint64 and_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_fetch_and_u64_impl(ptr, and_); +} + +static inline uint64 +pg_atomic_fetch_or_u64(volatile pg_atomic_uint64_local *ptr, uint64 or_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_fetch_or_u64_impl(ptr, or_); +} + +static inline uint64 +pg_atomic_add_fetch_u64(volatile pg_atomic_uint64_local *ptr, int64 add_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_add_fetch_u64_impl(ptr, add_); +} + +static inline uint64 +pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64_local *ptr, int64 sub_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + Assert(sub_ != PG_INT64_MIN); + return pg_atomic_sub_fetch_u64_impl(ptr, sub_); +} +#endif +#undef INSIDE_ATOMICS_H + +#endif /* ATOMICS_H */ diff --git a/src/bin/pg_probackup/atomics/arch-arm.h b/src/bin/pg_probackup/atomics/arch-arm.h new file mode 100644 index 000000000..ca78eb699 --- /dev/null +++ b/src/bin/pg_probackup/atomics/arch-arm.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * arch-arm.h + * Atomic operations considerations specific to ARM + * + * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group + * + * NOTES: + * + * src/include/port/atomics/arch-arm.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +/* + * 64 bit atomics on ARM32 are implemented using kernel fallbacks and thus + * might be slow, so disable entirely. On ARM64 that problem doesn't exist. + */ +#if !defined(__aarch64__) && !defined(__aarch64) +#define PG_DISABLE_64_BIT_ATOMICS +#endif /* __aarch64__ || __aarch64 */ diff --git a/src/bin/pg_probackup/atomics/arch-hppa.h b/src/bin/pg_probackup/atomics/arch-hppa.h new file mode 100644 index 000000000..43c2e56bf --- /dev/null +++ b/src/bin/pg_probackup/atomics/arch-hppa.h @@ -0,0 +1,17 @@ +/*------------------------------------------------------------------------- + * + * arch-hppa.h + * Atomic operations considerations specific to HPPA + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-hppa.h + * + *------------------------------------------------------------------------- + */ + +/* HPPA doesn't do either read or write reordering */ +#define pg_memory_barrier_impl() pg_compiler_barrier_impl() diff --git a/src/bin/pg_probackup/atomics/arch-ia64.h b/src/bin/pg_probackup/atomics/arch-ia64.h new file mode 100644 index 000000000..e1cf56f95 --- /dev/null +++ b/src/bin/pg_probackup/atomics/arch-ia64.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * + * arch-ia64.h + * Atomic operations considerations specific to intel itanium + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-ia64.h + * + *------------------------------------------------------------------------- + */ + +/* + * Itanium is weakly ordered, so read and write barriers require a full + * fence. + */ +#if defined(__INTEL_COMPILER) +# define pg_memory_barrier_impl() __mf() +#elif defined(__GNUC__) +# define pg_memory_barrier_impl() __asm__ __volatile__ ("mf" : : : "memory") +#elif defined(__hpux) +# define pg_memory_barrier_impl() _Asm_mf() +#endif + +/* per architecture manual doubleword accesses have single copy atomicity */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/bin/pg_probackup/atomics/arch-ppc.h b/src/bin/pg_probackup/atomics/arch-ppc.h new file mode 100644 index 000000000..344b39449 --- /dev/null +++ b/src/bin/pg_probackup/atomics/arch-ppc.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * + * arch-ppc.h + * Atomic operations considerations specific to PowerPC + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-ppc.h + * + *------------------------------------------------------------------------- + */ + +#if defined(__GNUC__) + +/* + * lwsync orders loads with respect to each other, and similarly with stores. + * But a load can be performed before a subsequent store, so sync must be used + * for a full memory barrier. + */ +#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory") +#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#endif + +/* per architecture manual doubleword accesses have single copy atomicity */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/bin/pg_probackup/atomics/arch-x86.h b/src/bin/pg_probackup/atomics/arch-x86.h new file mode 100644 index 000000000..79f79a160 --- /dev/null +++ b/src/bin/pg_probackup/atomics/arch-x86.h @@ -0,0 +1,252 @@ +/*------------------------------------------------------------------------- + * + * arch-x86.h + * Atomic operations considerations specific to intel x86 + * + * Note that we actually require a 486 upwards because the 386 doesn't have + * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere + * anymore that's not much of a restriction luckily. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-x86.h + * + *------------------------------------------------------------------------- + */ + +/* + * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads, + * or stores to be reordered with other stores, but a load can be performed + * before a subsequent store. + * + * Technically, some x86-ish chips support uncached memory access and/or + * special instructions that are weakly ordered. In those cases we'd need + * the read and write barriers to be lfence and sfence. But since we don't + * do those things, a compiler barrier should be enough. + * + * "lock; addl" has worked for longer than "mfence". It's also rumored to be + * faster in many scenarios. + */ + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#if defined(__i386__) || defined(__i386) +#define pg_memory_barrier_impl() \ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc") +#elif defined(__x86_64__) +#define pg_memory_barrier_impl() \ + __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc") +#endif +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +#define pg_read_barrier_impl() pg_compiler_barrier_impl() +#define pg_write_barrier_impl() pg_compiler_barrier_impl() + +/* + * Provide implementation for atomics using inline assembly on x86 gcc. It's + * nice to support older gcc's and the compare/exchange implementation here is + * actually more efficient than the * __sync variant. + */ +#if defined(HAVE_ATOMICS) + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef struct pg_atomic_flag +{ + volatile char value; +} pg_atomic_flag; + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* + * It's too complicated to write inline asm for 64bit types on 32bit and the + * 486 can't do it anyway. + */ +#ifdef __x86_64__ +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* alignment guaranteed due to being on a 64bit platform */ + volatile uint64 value; +} pg_atomic_uint64; +#endif /* __x86_64__ */ + +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +#endif /* defined(HAVE_ATOMICS) */ + +#if !defined(PG_HAVE_SPIN_DELAY) +/* + * This sequence is equivalent to the PAUSE instruction ("rep" is + * ignored by old IA32 processors if the following instruction is + * not a string operation); the IA-32 Architecture Software + * Developer's Manual, Vol. 3, Section 7.7.2 describes why using + * PAUSE in the inner loop of a spin lock is necessary for good + * performance: + * + * The PAUSE instruction improves the performance of IA-32 + * processors supporting Hyper-Threading Technology when + * executing spin-wait loops and other routines where one + * thread is accessing a shared lock or semaphore in a tight + * polling loop. When executing a spin-wait loop, the + * processor can suffer a severe performance penalty when + * exiting the loop because it detects a possible memory order + * violation and flushes the core processor's pipeline. The + * PAUSE instruction provides a hint to the processor that the + * code sequence is a spin-wait loop. The processor uses this + * hint to avoid the memory order violation and prevent the + * pipeline flush. In addition, the PAUSE instruction + * de-pipelines the spin-wait loop to prevent it from + * consuming execution resources excessively. + */ +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define PG_HAVE_SPIN_DELAY +static __inline__ void +pg_spin_delay_impl(void) +{ + __asm__ __volatile__(" rep; nop \n"); +} +#elif defined(_MSC_VER) && defined(__x86_64__) +#define PG_HAVE_SPIN_DELAY +static __forceinline void +pg_spin_delay_impl(void) +{ + _mm_pause(); +} +#elif defined(_MSC_VER) +#define PG_HAVE_SPIN_DELAY +static __forceinline void +pg_spin_delay_impl(void) +{ + /* See comment for gcc code. Same code, MASM syntax */ + __asm rep nop; +} +#endif +#endif /* !defined(PG_HAVE_SPIN_DELAY) */ + + +#if defined(HAVE_ATOMICS) + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + register char _res = 1; + + __asm__ __volatile__( + " lock \n" + " xchgb %0,%1 \n" +: "+q"(_res), "+m"(ptr->value) +: +: "memory"); + return _res == 0; +} + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* + * On a TSO architecture like x86 it's sufficient to use a compiler + * barrier to achieve release semantics. + */ + __asm__ __volatile__("" ::: "memory"); + ptr->value = 0; +} + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + char ret; + + /* + * Perform cmpxchg and use the zero flag which it implicitly sets when + * equal to measure the success. + */ + __asm__ __volatile__( + " lock \n" + " cmpxchgl %4,%5 \n" + " setz %2 \n" +: "=a" (*expected), "=m"(ptr->value), "=q" (ret) +: "a" (*expected), "r" (newval), "m"(ptr->value) +: "memory", "cc"); + return (bool) ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 res; + __asm__ __volatile__( + " lock \n" + " xaddl %0,%1 \n" +: "=q"(res), "=m"(ptr->value) +: "0" (add_), "m"(ptr->value) +: "memory", "cc"); + return res; +} + +#ifdef __x86_64__ + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + char ret; + + /* + * Perform cmpxchg and use the zero flag which it implicitly sets when + * equal to measure the success. + */ + __asm__ __volatile__( + " lock \n" + " cmpxchgq %4,%5 \n" + " setz %2 \n" +: "=a" (*expected), "=m"(ptr->value), "=q" (ret) +: "a" (*expected), "r" (newval), "m"(ptr->value) +: "memory", "cc"); + return (bool) ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 res; + __asm__ __volatile__( + " lock \n" + " xaddq %0,%1 \n" +: "=q"(res), "=m"(ptr->value) +: "0" (add_), "m"(ptr->value) +: "memory", "cc"); + return res; +} + +#endif /* __x86_64__ */ + +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +/* + * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms + * since at least the 586. As well as on all x86-64 cpus. + */ +#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \ + (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \ + defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY +#endif /* 8 byte single-copy atomicity */ + +#endif /* HAVE_ATOMICS */ diff --git a/src/bin/pg_probackup/atomics/fallback.h b/src/bin/pg_probackup/atomics/fallback.h new file mode 100644 index 000000000..1240a23b8 --- /dev/null +++ b/src/bin/pg_probackup/atomics/fallback.h @@ -0,0 +1,169 @@ +/*------------------------------------------------------------------------- + * + * fallback.h + * Fallback for platforms without spinlock and/or atomics support. Slower + * than native atomics support, but not unusably slow. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics/fallback.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +# error "should be included via atomics.h" +#endif + +#ifndef pg_memory_barrier_impl +/* + * If we have no memory barrier implementation for this architecture, we + * fall back to acquiring and releasing a spinlock. This might, in turn, + * fall back to the semaphore-based spinlock implementation, which will be + * amazingly slow. + * + * It's not self-evident that every possible legal implementation of a + * spinlock acquire-and-release would be equivalent to a full memory barrier. + * For example, I'm not sure that Itanium's acq and rel add up to a full + * fence. But all of our actual implementations seem OK in this regard. + */ +#define PG_HAVE_MEMORY_BARRIER_EMULATION + +extern void pg_spinlock_barrier(void); +#define pg_memory_barrier_impl pg_spinlock_barrier +#endif + +#ifndef pg_compiler_barrier_impl +/* + * If the compiler/arch combination does not provide compiler barriers, + * provide a fallback. The fallback simply consists of a function call into + * an externally defined function. That should guarantee compiler barrier + * semantics except for compilers that do inter translation unit/global + * optimization - those better provide an actual compiler barrier. + * + * A native compiler barrier for sure is a lot faster than this... + */ +#define PG_HAVE_COMPILER_BARRIER_EMULATION +extern void pg_extern_compiler_barrier(void); +#define pg_compiler_barrier_impl pg_extern_compiler_barrier +#endif + + +/* + * If we have atomics implementation for this platform, fall back to providing + * the atomics API using a spinlock to protect the internal state. Possibly + * the spinlock implementation uses semaphores internally... + * + * We have to be a bit careful here, as it's not guaranteed that atomic + * variables are mapped to the same address in every process (e.g. dynamic + * shared memory segments). We can't just hash the address and use that to map + * to a spinlock. Instead assign a spinlock on initialization of the atomic + * variable. + */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && !defined(PG_HAVE_ATOMIC_U32_SUPPORT) + +#define PG_HAVE_ATOMIC_FLAG_SIMULATION +#define PG_HAVE_ATOMIC_FLAG_SUPPORT + +typedef struct pg_atomic_flag +{ + /* + * To avoid circular includes we can't use s_lock as a type here. Instead + * just reserve enough space for all spinlock types. Some platforms would + * be content with just one byte instead of 4, but that's not too much + * waste. + */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */ + int sema[4]; +#else + int sema; +#endif + volatile bool value; +} pg_atomic_flag; + +#endif /* PG_HAVE_ATOMIC_FLAG_SUPPORT */ + +#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) + +#define PG_HAVE_ATOMIC_U32_SIMULATION + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32_local +{ +#if defined(__hppa) || defined(__hppa__) + int sema[4]; +#else + int sema; +#endif + volatile uint32 value; +} pg_atomic_uint32_local; + +#endif /* PG_HAVE_ATOMIC_U32_SUPPORT */ + +#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) + +#define PG_HAVE_ATOMIC_U64_SIMULATION + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64_local +{ + /* Check pg_atomic_flag's definition above for an explanation */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */ + int sema[4]; +#else + int sema; +#endif + volatile uint64 value; +} pg_atomic_uint64_local; + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + +#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_FLAG +extern void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +extern bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +extern void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +extern bool pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr); + +#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */ + +#ifdef PG_HAVE_ATOMIC_U32_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_U32 +extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 val_); + +#define PG_HAVE_ATOMIC_WRITE_U32 +extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 val); + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32_local *ptr, + uint32 *expected, uint32 newval); + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32_local *ptr, int32 add_); + +#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */ + + +#ifdef PG_HAVE_ATOMIC_U64_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_U64 +extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64_local *ptr, uint64 val_); + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64_local *ptr, + uint64 *expected, uint64 newval); + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64_local *ptr, int64 add_); + +#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */ diff --git a/src/bin/pg_probackup/atomics/generic-acc.h b/src/bin/pg_probackup/atomics/generic-acc.h new file mode 100644 index 000000000..eec5063cb --- /dev/null +++ b/src/bin/pg_probackup/atomics/generic-acc.h @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------- + * + * generic-acc.h + * Atomic operations support when using HPs acc on HPUX + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * inline assembly for Itanium-based HP-UX: + * http://h21007.www2.hp.com/portal/download/files/unprot/Itanium/inline_assem_ERS.pdf + * * Implementing Spinlocks on the Intel (R) Itanium (R) Architecture and PA-RISC + * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf + * + * Itanium only supports a small set of numbers (6, -8, -4, -1, 1, 4, 8, 16) + * for atomic add/sub, so we just implement everything but compare_exchange + * via the compare_exchange fallbacks in atomics/generic.h. + * + * src/include/port/atomics/generic-acc.h + * + * ------------------------------------------------------------------------- + */ + +#include + +#define pg_compiler_barrier_impl() _Asm_sched_fence() + +#if defined(HAVE_ATOMICS) + +/* IA64 always has 32/64 bit atomics */ + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* + * Alignment is guaranteed to be 64bit. Search for "Well-behaved + * application restrictions" => "Data alignment and data sharing" on HP's + * website. Unfortunately the URL doesn't seem to stable enough to + * include. + */ + volatile uint64 value; +} pg_atomic_uint64; + + +#define MINOR_FENCE (_Asm_fence) (_UP_CALL_FENCE | _UP_SYS_FENCE | \ + _DOWN_CALL_FENCE | _DOWN_SYS_FENCE ) + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + + _Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE); + /* + * We want a barrier, not just release/acquire semantics. + */ + _Asm_mf(); + /* + * Notes: + * DOWN_MEM_FENCE | _UP_MEM_FENCE prevents reordering by the compiler + */ + current = _Asm_cmpxchg(_SZ_W, /* word */ + _SEM_REL, + &ptr->value, + newval, _LDHINT_NONE, + _DOWN_MEM_FENCE | _UP_MEM_FENCE); + ret = current == *expected; + *expected = current; + return ret; +} + + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + + _Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE); + _Asm_mf(); + current = _Asm_cmpxchg(_SZ_D, /* doubleword */ + _SEM_REL, + &ptr->value, + newval, _LDHINT_NONE, + _DOWN_MEM_FENCE | _UP_MEM_FENCE); + ret = current == *expected; + *expected = current; + return ret; +} + +#undef MINOR_FENCE + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/bin/pg_probackup/atomics/generic-gcc.h b/src/bin/pg_probackup/atomics/generic-gcc.h new file mode 100644 index 000000000..ba3a5f5f6 --- /dev/null +++ b/src/bin/pg_probackup/atomics/generic-gcc.h @@ -0,0 +1,286 @@ +/*------------------------------------------------------------------------- + * + * generic-gcc.h + * Atomic operations, implemented using gcc (or compatible) intrinsics. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * Legacy __sync Built-in Functions for Atomic Memory Access + * http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html + * * Built-in functions for memory model aware atomic operations + * http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html + * + * src/include/port/atomics/generic-gcc.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +/* + * An empty asm block should be a sufficient compiler barrier. + */ +#define pg_compiler_barrier_impl() __asm__ __volatile__("" ::: "memory") + +/* + * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier + * out of this compiler built-in. But we prefer to rely on platform specific + * definitions where possible, and use this only as a fallback. + */ +#if !defined(pg_memory_barrier_impl) +# if defined(HAVE_GCC__ATOMIC_INT32_CAS) +# define pg_memory_barrier_impl() __atomic_thread_fence(__ATOMIC_SEQ_CST) +# elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +# define pg_memory_barrier_impl() __sync_synchronize() +# endif +#endif /* !defined(pg_memory_barrier_impl) */ + +#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +/* acquire semantics include read barrier semantics */ +# define pg_read_barrier_impl() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#endif + +#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +/* release semantics include write barrier semantics */ +# define pg_write_barrier_impl() __atomic_thread_fence(__ATOMIC_RELEASE) +#endif + + +#ifdef HAVE_ATOMICS + +/* generic gcc based atomic flag implementation */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \ + && (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS)) + +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef struct pg_atomic_flag +{ + /* + * If we have a choice, use int-width TAS, because that is more efficient + * and/or more reliably implemented on most non-Intel platforms. (Note + * that this code isn't used on x86[_64]; see arch-x86.h for that.) + */ +#ifdef HAVE_GCC__SYNC_INT32_TAS + volatile int value; +#else + volatile char value; +#endif +} pg_atomic_flag; + +#endif /* !ATOMIC_FLAG_SUPPORT && SYNC_INT32_TAS */ + +/* generic gcc based atomic uint32 implementation */ +#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) \ + && (defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS)) + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#endif /* defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS) */ + +/* generic gcc based atomic uint64 implementation */ +#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) \ + && !defined(PG_DISABLE_64_BIT_ATOMICS) \ + && (defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS)) + +#define PG_HAVE_ATOMIC_U64_SUPPORT + +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS) */ + +#ifdef PG_HAVE_ATOMIC_FLAG_SUPPORT + +#if defined(HAVE_GCC__SYNC_CHAR_TAS) || defined(HAVE_GCC__SYNC_INT32_TAS) + +#ifndef PG_HAVE_ATOMIC_TEST_SET_FLAG +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* NB: only an acquire barrier, not a full one */ + /* some platform only support a 1 here */ + return __sync_lock_test_and_set(&ptr->value, 1) == 0; +} +#endif + +#endif /* defined(HAVE_GCC__SYNC_*_TAS) */ + +#ifndef PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return ptr->value == 0; +} +#endif + +#ifndef PG_HAVE_ATOMIC_CLEAR_FLAG +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + __sync_lock_release(&ptr->value); +} +#endif + +#ifndef PG_HAVE_ATOMIC_INIT_FLAG +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_clear_flag_impl(ptr); +} +#endif + +#endif /* defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) */ + +/* prefer __atomic, it has a better API */ +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + /* FIXME: we can probably use a lower consistency model */ + return __atomic_compare_exchange_n(&ptr->value, expected, newval, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + current = __sync_val_compare_and_swap(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} +#endif + +/* if we have 32-bit __sync_val_compare_and_swap, assume we have these too: */ + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return __sync_fetch_and_add(&ptr->value, add_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_SUB_U32 +static inline uint32 +pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + return __sync_fetch_and_sub(&ptr->value, sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_AND_U32 +static inline uint32 +pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_) +{ + return __sync_fetch_and_and(&ptr->value, and_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_OR_U32 +static inline uint32 +pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_) +{ + return __sync_fetch_and_or(&ptr->value, or_); +} +#endif + + +#if !defined(PG_DISABLE_64_BIT_ATOMICS) + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__ATOMIC_INT64_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + return __atomic_compare_exchange_n(&ptr->value, expected, newval, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + current = __sync_val_compare_and_swap(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} +#endif + +/* if we have 64-bit __sync_val_compare_and_swap, assume we have these too: */ + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return __sync_fetch_and_add(&ptr->value, add_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_SUB_U64 +static inline uint64 +pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ + return __sync_fetch_and_sub(&ptr->value, sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_AND_U64 +static inline uint64 +pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_) +{ + return __sync_fetch_and_and(&ptr->value, and_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_OR_U64 +static inline uint64 +pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_) +{ + return __sync_fetch_and_or(&ptr->value, or_); +} +#endif + +#endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */ + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/bin/pg_probackup/atomics/generic-msvc.h b/src/bin/pg_probackup/atomics/generic-msvc.h new file mode 100644 index 000000000..b53f0eec4 --- /dev/null +++ b/src/bin/pg_probackup/atomics/generic-msvc.h @@ -0,0 +1,101 @@ +/*------------------------------------------------------------------------- + * + * generic-msvc.h + * Atomic operations support when using MSVC + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * Interlocked Variable Access + * http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx + * + * src/include/port/atomics/generic-msvc.h + * + *------------------------------------------------------------------------- + */ +#include + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +#pragma intrinsic(_ReadWriteBarrier) +#define pg_compiler_barrier_impl() _ReadWriteBarrier() + +#ifndef pg_memory_barrier_impl +#define pg_memory_barrier_impl() MemoryBarrier() +#endif + +#if defined(HAVE_ATOMICS) + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct __declspec(align(8)) pg_atomic_uint64 +{ + volatile uint64 value; +} pg_atomic_uint64; + + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + current = InterlockedCompareExchange(&ptr->value, newval, *expected); + ret = current == *expected; + *expected = current; + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return InterlockedExchangeAdd(&ptr->value, add_); +} + +/* + * The non-intrinsics versions are only available in vista upwards, so use the + * intrinsic version. Only supported on >486, but we require XP as a minimum + * baseline, which doesn't support the 486, so we don't need to add checks for + * that case. + */ +#pragma intrinsic(_InterlockedCompareExchange64) + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + current = _InterlockedCompareExchange64(&ptr->value, newval, *expected); + ret = current == *expected; + *expected = current; + return ret; +} + +/* Only implemented on itanium and 64bit builds */ +#ifdef _WIN64 +#pragma intrinsic(_InterlockedExchangeAdd64) + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return _InterlockedExchangeAdd64(&ptr->value, add_); +} +#endif /* _WIN64 */ + +#endif /* HAVE_ATOMICS */ diff --git a/src/bin/pg_probackup/atomics/generic-sunpro.h b/src/bin/pg_probackup/atomics/generic-sunpro.h new file mode 100644 index 000000000..4b03c66ad --- /dev/null +++ b/src/bin/pg_probackup/atomics/generic-sunpro.h @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------- + * + * generic-sunpro.h + * Atomic operations for solaris' CC + * + * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group + * + * NOTES: + * + * Documentation: + * * manpage for atomic_cas(3C) + * http://www.unix.com/man-page/opensolaris/3c/atomic_cas/ + * http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html + * + * src/include/port/atomics/generic-sunpro.h + * + * ------------------------------------------------------------------------- + */ + +#if defined(HAVE_ATOMICS) + +#ifdef HAVE_MBARRIER_H +#include + +#define pg_compiler_barrier_impl() __compiler_barrier() + +#ifndef pg_memory_barrier_impl +/* + * Despite the name this is actually a full barrier. Expanding to mfence/ + * membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad on x86/sparc + * respectively. + */ +# define pg_memory_barrier_impl() __machine_rw_barrier() +#endif +#ifndef pg_read_barrier_impl +# define pg_read_barrier_impl() __machine_r_barrier() +#endif +#ifndef pg_write_barrier_impl +# define pg_write_barrier_impl() __machine_w_barrier() +#endif + +#endif /* HAVE_MBARRIER_H */ + +/* Older versions of the compiler don't have atomic.h... */ +#ifdef HAVE_ATOMIC_H + +#include + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* + * Syntax to enforce variable alignment should be supported by versions + * supporting atomic.h, but it's hard to find accurate documentation. If + * it proves to be a problem, we'll have to add more version checks for 64 + * bit support. + */ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* HAVE_ATOMIC_H */ + +#endif /* defined(HAVE_ATOMICS) */ + + +#if defined(HAVE_ATOMICS) + +#ifdef HAVE_ATOMIC_H + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + + current = atomic_cas_32(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + + current = atomic_cas_64(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} + +#endif /* HAVE_ATOMIC_H */ + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/bin/pg_probackup/atomics/generic-xlc.h b/src/bin/pg_probackup/atomics/generic-xlc.h new file mode 100644 index 000000000..8b5c73297 --- /dev/null +++ b/src/bin/pg_probackup/atomics/generic-xlc.h @@ -0,0 +1,142 @@ +/*------------------------------------------------------------------------- + * + * generic-xlc.h + * Atomic operations for IBM's CC + * + * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group + * + * NOTES: + * + * Documentation: + * * Synchronization and atomic built-in functions + * http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.2/com.ibm.xlcpp131.aix.doc/compiler_ref/bifs_sync_atomic.html + * + * src/include/port/atomics/generic-xlc.h + * + * ------------------------------------------------------------------------- + */ + +#if defined(HAVE_ATOMICS) + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + + +/* 64bit atomics are only supported in 64bit mode */ +#ifdef __64BIT__ +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* __64BIT__ */ + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + + /* + * atomics.h specifies sequential consistency ("full barrier semantics") + * for this interface. Since "lwsync" provides acquire/release + * consistency only, do not use it here. GCC atomics observe the same + * restriction; see its rs6000_pre_atomic_barrier(). + */ + __asm__ __volatile__ (" sync \n" ::: "memory"); + + /* + * XXX: __compare_and_swap is defined to take signed parameters, but that + * shouldn't matter since we don't perform any arithmetic operations. + */ + ret = __compare_and_swap((volatile int*)&ptr->value, + (int *)expected, (int)newval); + + /* + * xlc's documentation tells us: + * "If __compare_and_swap is used as a locking primitive, insert a call to + * the __isync built-in function at the start of any critical sections." + * + * The critical section begins immediately after __compare_and_swap(). + */ + __isync(); + + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 _t; + uint32 res; + + /* + * xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc + * 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In + * xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic + * would add redundant syncs on xlc 12. + */ + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#ifdef PG_HAVE_ATOMIC_U64_SUPPORT + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + + __asm__ __volatile__ (" sync \n" ::: "memory"); + + ret = __compare_and_swaplp((volatile long*)&ptr->value, + (long *)expected, (long)newval); + + __isync(); + + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/bin/pg_probackup/atomics/generic.h b/src/bin/pg_probackup/atomics/generic.h new file mode 100644 index 000000000..75aaf559c --- /dev/null +++ b/src/bin/pg_probackup/atomics/generic.h @@ -0,0 +1,401 @@ +/*------------------------------------------------------------------------- + * + * generic.h + * Implement higher level operations based on some lower level atomic + * operations. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics/generic.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +# error "should be included via atomics.h" +#endif + +/* + * If read or write barriers are undefined, we upgrade them to full memory + * barriers. + */ +#if !defined(pg_read_barrier_impl) +# define pg_read_barrier_impl pg_memory_barrier_impl +#endif +#if !defined(pg_write_barrier_impl) +# define pg_write_barrier_impl pg_memory_barrier_impl +#endif + +#ifndef PG_HAVE_SPIN_DELAY +#define PG_HAVE_SPIN_DELAY +#define pg_spin_delay_impl() ((void)0) +#endif + + +/* provide fallback */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && defined(PG_HAVE_ATOMIC_U32_SUPPORT) +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef pg_atomic_uint32_local pg_atomic_flag; +#endif + +#ifndef PG_HAVE_ATOMIC_READ_U32 +#define PG_HAVE_ATOMIC_READ_U32 +static inline uint32 +pg_atomic_read_u32_impl(volatile pg_atomic_uint32_local *ptr) +{ + return ptr->value; +} +#endif + +#ifndef PG_HAVE_ATOMIC_WRITE_U32 +#define PG_HAVE_ATOMIC_WRITE_U32 +static inline void +pg_atomic_write_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 val) +{ + ptr->value = val; +} +#endif + +#ifndef PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32 +#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32 +static inline void +pg_atomic_unlocked_write_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 val) +{ + ptr->value = val; +} +#endif + +/* + * provide fallback for test_and_set using atomic_exchange if available + */ +#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_EXCHANGE_U32) + +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_write_u32_impl(ptr, 0); +} + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_exchange_u32_impl(ptr, &value, 1) == 0; +} + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_read_u32_impl(ptr) == 0; +} + + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* XXX: release semantics suffice? */ + pg_memory_barrier_impl(); + pg_atomic_write_u32_impl(ptr, 0); +} + +/* + * provide fallback for test_and_set using atomic_compare_exchange if + * available. + */ +#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) + +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_write_u32_impl(ptr, 0); +} + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + uint32 value = 0; + return pg_atomic_compare_exchange_u32_impl(ptr, &value, 1); +} + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_read_u32_impl(ptr) == 0; +} + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* + * Use a memory barrier + plain write if we have a native memory + * barrier. But don't do so if memory barriers use spinlocks - that'd lead + * to circularity if flags are used to implement spinlocks. + */ +#ifndef PG_HAVE_MEMORY_BARRIER_EMULATION + /* XXX: release semantics suffice? */ + pg_memory_barrier_impl(); + pg_atomic_write_u32_impl(ptr, 0); +#else + uint32 value = 1; + pg_atomic_compare_exchange_u32_impl(ptr, &value, 0); +#endif +} + +#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) +# error "No pg_atomic_test_and_set provided" +#endif /* !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) */ + + +#ifndef PG_HAVE_ATOMIC_INIT_U32 +#define PG_HAVE_ATOMIC_INIT_U32 +static inline void +pg_atomic_init_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 val_) +{ + pg_atomic_write_u32_impl(ptr, val_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_EXCHANGE_U32 +static inline uint32 +pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 xchg_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32_local *ptr, int32 add_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_SUB_U32 +static inline uint32 +pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32_local *ptr, int32 sub_) +{ + return pg_atomic_fetch_add_u32_impl(ptr, -sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_AND_U32 +static inline uint32 +pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 and_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_OR_U32 +static inline uint32 +pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32_local *ptr, uint32 or_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) +#define PG_HAVE_ATOMIC_ADD_FETCH_U32 +static inline uint32 +pg_atomic_add_fetch_u32_impl(volatile pg_atomic_uint32_local *ptr, int32 add_) +{ + return pg_atomic_fetch_add_u32_impl(ptr, add_) + add_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) +#define PG_HAVE_ATOMIC_SUB_FETCH_U32 +static inline uint32 +pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32_local *ptr, int32 sub_) +{ + return pg_atomic_fetch_sub_u32_impl(ptr, sub_) - sub_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_EXCHANGE_U64 +static inline uint64 +pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64_local *ptr, uint64 xchg_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_)) + /* skip */; + return old; +} +#endif + +#ifndef PG_HAVE_ATOMIC_WRITE_U64 +#define PG_HAVE_ATOMIC_WRITE_U64 + +#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \ + !defined(PG_HAVE_ATOMIC_U64_SIMULATION) + +static inline void +pg_atomic_write_u64_impl(volatile pg_atomic_uint64_local *ptr, uint64 val) +{ + /* + * On this platform aligned 64bit writes are guaranteed to be atomic, + * except if using the fallback implementation, where can't guarantee the + * required alignment. + */ + AssertPointerAlignment(ptr, 8); + ptr->value = val; +} + +#else + +static inline void +pg_atomic_write_u64_impl(volatile pg_atomic_uint64_local *ptr, uint64 val) +{ + /* + * 64 bit writes aren't safe on all platforms. In the generic + * implementation implement them as an atomic exchange. + */ + pg_atomic_exchange_u64_impl(ptr, val); +} + +#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */ +#endif /* PG_HAVE_ATOMIC_WRITE_U64 */ + +#ifndef PG_HAVE_ATOMIC_READ_U64 +#define PG_HAVE_ATOMIC_READ_U64 + +#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \ + !defined(PG_HAVE_ATOMIC_U64_SIMULATION) + +static inline uint64 +pg_atomic_read_u64_impl(volatile pg_atomic_uint64_local *ptr) +{ + /* + * On this platform aligned 64-bit reads are guaranteed to be atomic. + */ + AssertPointerAlignment(ptr, 8); + return ptr->value; +} + +#else + +static inline uint64 +pg_atomic_read_u64_impl(volatile pg_atomic_uint64_local *ptr) +{ + uint64 old = 0; + + /* + * 64-bit reads aren't atomic on all platforms. In the generic + * implementation implement them as a compare/exchange with 0. That'll + * fail or succeed, but always return the old value. Possibly might store + * a 0, but only if the previous value also was a 0 - i.e. harmless. + */ + pg_atomic_compare_exchange_u64_impl(ptr, &old, 0); + + return old; +} +#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */ +#endif /* PG_HAVE_ATOMIC_READ_U64 */ + +#ifndef PG_HAVE_ATOMIC_INIT_U64 +#define PG_HAVE_ATOMIC_INIT_U64 +static inline void +pg_atomic_init_u64_impl(volatile pg_atomic_uint64_local *ptr, uint64 val_) +{ + pg_atomic_write_u64_impl(ptr, val_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64_local *ptr, int64 add_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_SUB_U64 +static inline uint64 +pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64_local *ptr, int64 sub_) +{ + return pg_atomic_fetch_add_u64_impl(ptr, -sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_AND_U64 +static inline uint64 +pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64_local *ptr, uint64 and_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_OR_U64 +static inline uint64 +pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64_local *ptr, uint64 or_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) +#define PG_HAVE_ATOMIC_ADD_FETCH_U64 +static inline uint64 +pg_atomic_add_fetch_u64_impl(volatile pg_atomic_uint64_local *ptr, int64 add_) +{ + return pg_atomic_fetch_add_u64_impl(ptr, add_) + add_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) +#define PG_HAVE_ATOMIC_SUB_FETCH_U64 +static inline uint64 +pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64_local *ptr, int64 sub_) +{ + return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_; +} +#endif diff --git a/src/bin/pg_probackup/backup.cpp b/src/bin/pg_probackup/backup.cpp new file mode 100644 index 000000000..16c78a5a3 --- /dev/null +++ b/src/bin/pg_probackup/backup.cpp @@ -0,0 +1,2357 @@ +/*------------------------------------------------------------------------- + * + * backup.c: backup DB cluster, archived WAL + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#if PG_VERSION_NUM < 110000 +#include "catalog/catalog.h" +#endif +#include "catalog/pg_tablespace.h" +#include "pgtar.h" +#include "receivelog.h" +#include "streamutil.h" + +#include +#include +#include + +#include "thread.h" +#include "file.h" +#include "common/fe_memutils.h" + +static int standby_message_timeout_local = 10 * 1000; /* 10 sec = default */ +static XLogRecPtr stop_backup_lsn = InvalidXLogRecPtr; +static XLogRecPtr stop_stream_lsn = InvalidXLogRecPtr; + +/* + * How long we should wait for streaming end in seconds. + * Retrieved as checkpoint_timeout + checkpoint_timeout * 0.1 + */ +static uint32 stream_stop_timeout = 0; +/* Time in which we started to wait for streaming end */ +static time_t stream_stop_begin = 0; + +//const char *progname = "pg_probackup"; + +/* list of files contained in backup */ +static parray *backup_files_list = NULL; + +/* We need critical section for datapagemap_add() in case of using threads */ +static pthread_mutex_t backup_pagemap_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* + * We need to wait end of WAL streaming before execute pg_stop_backup(). + */ +typedef struct +{ + const char *basedir; + PGconn *conn; + + /* + * Return value from the thread. + * 0 means there is no error, 1 - there is an error. + */ + int ret; + + XLogRecPtr startpos; + TimeLineID starttli; +} StreamThreadArg; + +static pthread_t stream_thread; +static StreamThreadArg stream_thread_arg = {"", NULL, 1}; + +bool exclusive_backup = false; + +/* Is pg_start_backup() was executed */ +static bool backup_in_progress = false; +/* Is pg_stop_backup() was sent */ +static bool pg_stop_backup_is_sent = false; + +/* + * Backup routines + */ +static void backup_cleanup(bool fatal, void *userdata); + +static void *backup_files(void *arg); + +static void do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool backup_logs); + +static void pg_start_backup(const char *label, bool smooth, pgBackup *backup, + PGNodeInfo *nodeInfo, PGconn *conn); +static void pg_switch_wal(PGconn *conn); +static void pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, PGNodeInfo *nodeInfo); +static int checkpoint_timeout(PGconn *backup_conn); + +static XLogRecPtr wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli, + bool in_prev_segment, bool segment_only, + int timeout_elevel, bool in_stream_dir); + +static void *StreamLog(void *arg); +static void IdentifySystem(StreamThreadArg *stream_thread_arg); + +static void check_external_for_tablespaces(parray *external_list, + PGconn *backup_conn); +static parray *get_database_map(PGconn *pg_startbackup_conn); + +/* pgpro specific functions */ +static bool pgpro_support(PGconn *conn); + +/* Check functions */ +static bool pg_checksum_enable(PGconn *conn); +static bool pg_is_in_recovery(PGconn *conn); +static bool pg_is_superuser(PGconn *conn); +static void confirm_block_size(PGconn *conn, const char *name, int blcksz); +static void set_cfs_datafiles(parray *files, const char *root, char *relative, size_t i); + +static void +backup_stopbackup_callback(bool fatal, void *userdata) +{ + PGconn *pg_startbackup_conn = (PGconn *) userdata; + /* + * If backup is in progress, notify stop of backup to PostgreSQL + */ + if (backup_in_progress) + { + elog(WARNING, "backup in progress, stop backup"); + pg_stop_backup(NULL, pg_startbackup_conn, NULL); /* don't care about stop_lsn in case of error */ + } +} + +/* + * Take a backup of a single postgresql instance. + * Move files from 'pgdata' to a subdirectory in 'backup_path'. + */ +static void +do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool backup_logs) +{ + int i; + char database_path[MAXPGPATH]; + char external_prefix[MAXPGPATH]; /* Temp value. Used as template */ + char dst_backup_path[MAXPGPATH]; + char label[1024]; + XLogRecPtr prev_backup_start_lsn = InvalidXLogRecPtr; + + /* arrays with meta info for multi threaded backup */ + pthread_t *threads; + backup_files_arg *threads_args; + bool backup_isok = true; + + pgBackup *prev_backup = NULL; + parray *prev_backup_filelist = NULL; + parray *backup_list = NULL; + parray *external_dirs = NULL; + parray *database_map = NULL; + + /* used for multitimeline incremental backup */ + parray *tli_list = NULL; + + /* for fancy reporting */ + time_t start_time, end_time; + char pretty_time[20]; + char pretty_bytes[20]; + + elog(LOG, "Database backup start"); + if(current.external_dir_str) + { + external_dirs = make_external_directory_list(current.external_dir_str, + false); + check_external_for_tablespaces(external_dirs, backup_conn); + } + + /* notify start of backup to PostgreSQL server */ + time2iso(label, lengthof(label), current.start_time); + strncat(label, " with pg_probackup", lengthof(label) - + strlen(" with pg_probackup")); + + /* Call pg_start_backup function in PostgreSQL connect */ + pg_start_backup(label, smooth_checkpoint, ¤t, nodeInfo, backup_conn); + + /* Obtain current timeline */ +#if PG_VERSION_NUM >= 90600 + current.tli = get_current_timeline(backup_conn); +#else + current.tli = get_current_timeline_from_control(false); +#endif + + /* + * In incremental backup mode ensure that already-validated + * backup on current timeline exists and get its filelist. + */ + if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) + { + /* get list of backups already taken */ + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + prev_backup = catalog_get_last_data_backup(backup_list, current.tli, current.start_time); + if (prev_backup == NULL) + { + /* try to setup multi-timeline backup chain */ + elog(WARNING, "Valid backup on current timeline %u is not found, " + "trying to look up on previous timelines", + current.tli); + + /* TODO: use read_timeline_history */ + tli_list = catalog_get_timelines(&instance_config); + + if (parray_num(tli_list) == 0) + elog(WARNING, "Cannot find valid backup on previous timelines, " + "WAL archive is not available"); + else + { + prev_backup = get_multi_timeline_parent(backup_list, tli_list, current.tli, + current.start_time, &instance_config); + + if (prev_backup == NULL) + elog(WARNING, "Cannot find valid backup on previous timelines"); + } + + /* failed to find suitable parent, error out */ + if (!prev_backup) + elog(ERROR, "Create new full backup before an incremental one"); + } + } + + if (prev_backup) + { + if (parse_program_version(prev_backup->program_version) > parse_program_version(PROGRAM_VERSION)) + elog(ERROR, "pg_probackup binary version is %s, but backup %s version is %s. " + "pg_probackup do not guarantee to be forward compatible. " + "Please upgrade pg_probackup binary.", + PROGRAM_VERSION, base36enc(prev_backup->start_time), prev_backup->program_version); + + elog(INFO, "Parent backup: %s", base36enc(prev_backup->start_time)); + + /* Files of previous backup needed by DELTA backup */ + prev_backup_filelist = get_backup_filelist(prev_backup, true); + + /* If lsn is not NULL, only pages with higher lsn will be copied. */ + prev_backup_start_lsn = prev_backup->start_lsn; + current.parent_backup = prev_backup->start_time; + + write_backup(¤t, true); + } + + /* For incremental backup check that start_lsn is not from the past + * Though it will not save us if PostgreSQL instance is actually + * restored STREAM backup. + */ + if (current.backup_mode != BACKUP_MODE_FULL && + prev_backup->start_lsn > current.start_lsn) + elog(ERROR, "Current START LSN %X/%X is lower than START LSN %X/%X of previous backup %s. " + "It may indicate that we are trying to backup PostgreSQL instance from the past.", + (uint32) (current.start_lsn >> 32), (uint32) (current.start_lsn), + (uint32) (prev_backup->start_lsn >> 32), (uint32) (prev_backup->start_lsn), + base36enc(prev_backup->start_time)); + + /* Update running backup meta with START LSN */ + write_backup(¤t, true); + + pgBackupGetPath(¤t, database_path, lengthof(database_path), + DATABASE_DIR); + pgBackupGetPath(¤t, external_prefix, lengthof(external_prefix), + EXTERNAL_DIR); + + /* start stream replication */ + if (stream_wal) + { + /* How long we should wait for streaming end after pg_stop_backup */ + stream_stop_timeout = checkpoint_timeout(backup_conn); + stream_stop_timeout = stream_stop_timeout + stream_stop_timeout * 0.1; + + join_path_components(dst_backup_path, database_path, PG_XLOG_DIR); + fio_mkdir(dst_backup_path, DIR_PERMISSION, FIO_BACKUP_HOST); + + stream_thread_arg.basedir = dst_backup_path; + + /* + * Connect in replication mode to the server. + */ + stream_thread_arg.conn = pgut_connect_replication(instance_config.conn_opt.pghost, + instance_config.conn_opt.pgport, + instance_config.conn_opt.pgdatabase, + instance_config.conn_opt.pguser); + /* sanity */ + IdentifySystem(&stream_thread_arg); + + /* By default there are some error */ + stream_thread_arg.ret = 1; + /* we must use startpos as start_lsn from start_backup */ + stream_thread_arg.startpos = current.start_lsn; + stream_thread_arg.starttli = current.tli; + + thread_interrupted = false; + pthread_create(&stream_thread, NULL, StreamLog, &stream_thread_arg); + } + + /* initialize backup list */ + backup_files_list = parray_new(); + + /* list files with the logical path. omit $PGDATA */ + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(backup_files_list, instance_config.pgdata, + true, true, false, backup_logs, true, 0); + else + dir_list_file(backup_files_list, instance_config.pgdata, + true, true, false, backup_logs, true, 0, FIO_LOCAL_HOST); + + /* + * Get database_map (name to oid) for use in partial restore feature. + * It's possible that we fail and database_map will be NULL. + */ + database_map = get_database_map(backup_conn); + + /* + * Append to backup list all files and directories + * from external directory option + */ + if (external_dirs) + { + for (i = 0; i < parray_num(external_dirs); i++) + { + /* External dirs numeration starts with 1. + * 0 value is not external dir */ + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(backup_files_list, (const char *)parray_get(external_dirs, i), + false, true, false, false, true, i+1); + else + dir_list_file(backup_files_list, (const char *)parray_get(external_dirs, i), + false, true, false, false, true, i+1, FIO_LOCAL_HOST); + } + } + + /* close ssh session in main thread */ + fio_disconnect(); + + /* Sanity check for backup_files_list, thank you, Windows: + * https://github.com/postgrespro/pg_probackup/issues/48 + */ + + if (parray_num(backup_files_list) < 100) + elog(ERROR, "PGDATA is almost empty. Either it was concurrently deleted or " + "pg_probackup do not possess sufficient permissions to list PGDATA content"); + + /* Calculate pgdata_bytes */ + for (i = 0; i < parray_num(backup_files_list); i++) + { + pgFile *file = (pgFile *) parray_get(backup_files_list, i); + + if (file->external_dir_num != 0) + continue; + + if (S_ISDIR(file->mode)) + { + current.pgdata_bytes += 4096; + continue; + } + + current.pgdata_bytes += file->size; + } + + pretty_size(current.pgdata_bytes, pretty_bytes, lengthof(pretty_bytes)); + elog(INFO, "PGDATA size: %s", pretty_bytes); + + /* + * Sort pathname ascending. It is necessary to create intermediate + * directories sequentially. + * + * For example: + * 1 - create 'base' + * 2 - create 'base/1' + * + * Sorted array is used at least in parse_filelist_filenames(), + * extractPageMap(), make_pagemap_from_ptrack(). + */ + parray_qsort(backup_files_list, pgFileCompareRelPathWithExternal); + + /* Extract information about files in backup_list parsing their names:*/ + parse_filelist_filenames(backup_files_list, instance_config.pgdata); + + if (current.backup_mode != BACKUP_MODE_FULL) + { + elog(LOG, "Current tli: %X", current.tli); + elog(LOG, "Parent start_lsn: %X/%X", + (uint32) (prev_backup->start_lsn >> 32), (uint32) (prev_backup->start_lsn)); + elog(LOG, "start_lsn: %X/%X", + (uint32) (current.start_lsn >> 32), (uint32) (current.start_lsn)); + } + + /* + * Build page mapping in incremental mode. + */ + + if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) + { + time(&start_time); + elog(INFO, "Extracting pagemap of changed blocks"); + + /* + * Build the page map from ptrack information. + */ + make_pagemap_from_ptrack(backup_files_list, + backup_conn, + prev_backup_start_lsn); + + time(&end_time); + elog(INFO, "Pagemap successfully extracted, time elapsed: %.0f sec", + difftime(end_time, start_time)); + } + + /* + * Make directories before backup and setup threads at the same time + */ + for (i = 0; i < parray_num(backup_files_list); i++) + { + pgFile *file = (pgFile *) parray_get(backup_files_list, i); + + /* if the entry was a directory, create it in the backup */ + if (S_ISDIR(file->mode)) + { + char dirpath[MAXPGPATH]; + + if (file->external_dir_num) + { + char temp[MAXPGPATH]; + snprintf(temp, MAXPGPATH, "%s%d", external_prefix, + file->external_dir_num); + join_path_components(dirpath, temp, file->rel_path); + } + else + join_path_components(dirpath, database_path, file->rel_path); + + elog(VERBOSE, "Create directory '%s'", dirpath); + fio_mkdir(dirpath, DIR_PERMISSION, FIO_BACKUP_HOST); + } + + /* setup threads */ + pg_atomic_clear_flag(&file->lock); + } + + /* Sort by size for load balancing */ + parray_qsort(backup_files_list, pgFileCompareSize); + /* Sort the array for binary search */ + if (prev_backup_filelist) + parray_qsort(prev_backup_filelist, pgFileCompareRelPathWithExternal); + + /* write initial backup_content.control file and update backup.control */ + write_backup_filelist(¤t, backup_files_list, + instance_config.pgdata, external_dirs, true); + write_backup(¤t, true); + + /* Init backup page header map */ + init_header_map(¤t); + + /* init thread args with own file lists */ + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (backup_files_arg *) palloc(sizeof(backup_files_arg)*num_threads); + + for (i = 0; i < num_threads; i++) + { + backup_files_arg *arg = &(threads_args[i]); + + arg->nodeInfo = nodeInfo; + arg->from_root = instance_config.pgdata; + arg->to_root = database_path; + arg->external_prefix = external_prefix; + arg->external_dirs = external_dirs; + arg->files_list = backup_files_list; + arg->prev_filelist = prev_backup_filelist; + arg->prev_start_lsn = prev_backup_start_lsn; + arg->conn_arg.conn = NULL; + arg->conn_arg.cancel_conn = NULL; + arg->hdr_map = &(current.hdr_map); + arg->thread_num = i+1; + /* By default there are some error */ + arg->ret = 1; + } + + /* Run threads */ + thread_interrupted = false; + elog(INFO, "Start transferring data files"); + time(&start_time); + for (i = 0; i < num_threads; i++) + { + backup_files_arg *arg = &(threads_args[i]); + + elog(VERBOSE, "Start thread num: %i", i); + pthread_create(&threads[i], NULL, backup_files, arg); + } + + /* Wait threads */ + for (i = 0; i < num_threads; i++) + { + pthread_join(threads[i], NULL); + if (threads_args[i].ret == 1) + backup_isok = false; + } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + if (backup_isok) + elog(INFO, "Data files are transferred, time elapsed: %s", + pretty_time); + else + elog(ERROR, "Data files transferring failed, time elapsed: %s", + pretty_time); + + /* clean previous backup file list */ + if (prev_backup_filelist) + { + parray_walk(prev_backup_filelist, pgFileFree); + parray_free(prev_backup_filelist); + } + + /* Notify end of backup */ + pg_stop_backup(¤t, backup_conn, nodeInfo); + + /* close and sync page header map */ + if (current.hdr_map.fp) + { + cleanup_header_map(&(current.hdr_map)); + + if (fio_sync(current.hdr_map.path, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync file \"%s\": %s", current.hdr_map.path, strerror(errno)); + } + + /* close ssh session in main thread */ + fio_disconnect(); + + /* Add archived xlog files into the list of files of this backup */ + if (stream_wal) + { + parray *xlog_files_list; + char pg_xlog_path[MAXPGPATH]; + char wal_full_path[MAXPGPATH]; + + /* Scan backup PG_XLOG_DIR */ + xlog_files_list = parray_new(); + join_path_components(pg_xlog_path, database_path, PG_XLOG_DIR); + dir_list_file(xlog_files_list, pg_xlog_path, false, true, false, false, true, 0, + FIO_BACKUP_HOST); + + /* TODO: Drop streamed WAL segments greater than stop_lsn */ + for (i = 0; i < parray_num(xlog_files_list); i++) + { + pgFile *file = (pgFile *) parray_get(xlog_files_list, i); + + join_path_components(wal_full_path, pg_xlog_path, file->rel_path); + + if (!S_ISREG(file->mode)) + continue; + + file->crc = pgFileGetCRC(wal_full_path, true, false); + file->write_size = file->size; + + /* overwrite rel_path, because now it is relative to + * /backup_dir/backups/instance_name/backup_id/database/pg_xlog/ + */ + pg_free(file->rel_path); + + /* Now it is relative to /backup_dir/backups/instance_name/backup_id/database/ */ + file->rel_path = pgut_strdup(GetRelativePath(wal_full_path, database_path)); + + file->name = last_dir_separator(file->rel_path); + + if (file->name == NULL) // TODO: do it in pgFileInit + file->name = file->rel_path; + else + file->name++; + } + + /* Add xlog files into the list of backed up files */ + parray_concat(backup_files_list, xlog_files_list); + parray_free(xlog_files_list); + } + + /* write database map to file and add it to control file */ + if (database_map) + { + write_database_map(¤t, database_map, backup_files_list); + /* cleanup */ + parray_walk(database_map, db_map_entry_free); + parray_free(database_map); + } + + /* Print the list of files to backup catalog */ + write_backup_filelist(¤t, backup_files_list, instance_config.pgdata, + external_dirs, true); + /* update backup control file to update size info */ + write_backup(¤t, true); + + /* Sync all copied files unless '--no-sync' flag is used */ + if (no_sync) + elog(WARNING, "Backup files are not synced to disk"); + else + { + elog(INFO, "Syncing backup files to disk"); + time(&start_time); + + for (i = 0; i < parray_num(backup_files_list); i++) + { + char to_fullpath[MAXPGPATH]; + pgFile *file = (pgFile *) parray_get(backup_files_list, i); + + /* TODO: sync directory ? */ + if (S_ISDIR(file->mode)) + continue; + + if (file->write_size <= 0) + continue; + + /* construct fullpath */ + if (file->external_dir_num == 0) + join_path_components(to_fullpath, database_path, file->rel_path); + else + { + char external_dst[MAXPGPATH]; + + makeExternalDirPathByNum(external_dst, external_prefix, + file->external_dir_num); + join_path_components(to_fullpath, external_dst, file->rel_path); + } + + if (fio_sync(to_fullpath, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync file \"%s\": %s", to_fullpath, strerror(errno)); + } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + elog(INFO, "Backup files are synced, time elapsed: %s", pretty_time); + } + + /* be paranoid about instance been from the past */ + if (current.backup_mode != BACKUP_MODE_FULL && + current.stop_lsn < prev_backup->stop_lsn) + elog(ERROR, "Current backup STOP LSN %X/%X is lower than STOP LSN %X/%X of previous backup %s. " + "It may indicate that we are trying to backup PostgreSQL instance from the past.", + (uint32) (current.stop_lsn >> 32), (uint32) (current.stop_lsn), + (uint32) (prev_backup->stop_lsn >> 32), (uint32) (prev_backup->stop_lsn), + base36enc(prev_backup->stop_lsn)); + + /* clean external directories list */ + if (external_dirs) + free_dir_list(external_dirs); + + /* Cleanup */ + if (backup_list) + { + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); + } + + if (tli_list) + { + parray_walk(tli_list, timelineInfoFree); + parray_free(tli_list); + } + + parray_walk(backup_files_list, pgFileFree); + parray_free(backup_files_list); + backup_files_list = NULL; +} + +/* + * Common code for BACKUP commands. + * Ensure that we're able to connect to the instance + * check compatibility and fill basic info. + * Also checking system ID in this case serves no purpose, because + * all work is done by server. + * + * Returns established connection + */ +PGconn * +pgdata_basic_setup(ConnectionOptions conn_opt, PGNodeInfo *nodeInfo) +{ + PGconn *cur_conn; + bool from_replica; + + /* Create connection for PostgreSQL */ + cur_conn = pgut_connect(conn_opt.pghost, conn_opt.pgport, + conn_opt.pgdatabase, + conn_opt.pguser); + + from_replica = pg_is_in_recovery(cur_conn); + if (from_replica) { + elog(ERROR, "gs_probackup is not supported on standby\n"); + } + + /* Confirm data block size and xlog block size are compatible */ + confirm_block_size(cur_conn, "block_size", BLCKSZ); + confirm_block_size(cur_conn, "wal_block_size", XLOG_BLCKSZ); + nodeInfo->block_size = BLCKSZ; + nodeInfo->wal_block_size = XLOG_BLCKSZ; + nodeInfo->pgpro_support = pgpro_support(cur_conn); + + + + nodeInfo->server_version = PQserverVersion(conn); + exclusive_backup = true; + + current.checksum_version = 0; + + nodeInfo->checksum_version = current.checksum_version; + + if (current.checksum_version) + elog(LOG, "This PostgreSQL instance was initialized with data block checksums. " + "Data block corruption will be detected"); + else + elog(WARNING, "This PostgreSQL instance was initialized without data block checksums. " + "pg_probackup have no way to detect data block corruption without them. " + "Reinitialize PGDATA with option '--data-checksums'."); + + if (nodeInfo->is_superuser) + elog(WARNING, "Current PostgreSQL role is superuser. " + "It is not recommended to run backup as superuser."); + + StrNCpy(current.server_version, "9.2", + sizeof(current.server_version)); + + return cur_conn; +} + +/* + * Entry point of pg_probackup BACKUP subcommand. + */ +int +do_backup(time_t start_time, pgSetBackupParams *set_backup_params, + bool no_validate, bool no_sync, bool backup_logs) +{ + PGconn *backup_conn = NULL; + PGNodeInfo nodeInfo; + char pretty_bytes[20]; + + /* Initialize PGInfonode */ + pgNodeInit(&nodeInfo); + + if (!instance_config.pgdata) + elog(ERROR, "required parameter not specified: PGDATA " + "(-D, --pgdata)"); + + /* Update backup status and other metainfo. */ + current.status = BACKUP_STATUS_RUNNING; + current.start_time = start_time; + + StrNCpy(current.program_version, PROGRAM_VERSION, + sizeof(current.program_version)); + + current.compress_alg = instance_config.compress_alg; + current.compress_level = instance_config.compress_level; + + /* Save list of external directories */ + if (instance_config.external_dir_str && + (pg_strcasecmp(instance_config.external_dir_str, "none") != 0)) + current.external_dir_str = instance_config.external_dir_str; + + elog(INFO, "Backup start, pg_probackup version: %s, instance: %s, backup ID: %s, backup mode: %s, " + "wal mode: %s, remote: %s, compress-algorithm: %s, compress-level: %i", + PROGRAM_VERSION, instance_name, base36enc(start_time), pgBackupGetBackupMode(¤t), + current.stream ? "STREAM" : "ARCHIVE", IsSshProtocol() ? "true" : "false", + deparse_compress_alg(current.compress_alg), current.compress_level); + + /* Create backup directory and BACKUP_CONTROL_FILE */ + if (pgBackupCreateDir(¤t)) + elog(ERROR, "Cannot create backup directory"); + if (!lock_backup(¤t, true)) + elog(ERROR, "Cannot lock backup %s directory", + base36enc(current.start_time)); + write_backup(¤t, true); + + /* set the error processing function for the backup process */ + pgut_atexit_push(backup_cleanup, NULL); + + elog(LOG, "Backup destination is initialized"); + + /* + * setup backup_conn, do some compatibility checks and + * fill basic info about instance + */ + backup_conn = pgdata_basic_setup(instance_config.conn_opt, &nodeInfo); + + /* + * Ensure that backup directory was initialized for the same PostgreSQL + * instance we opened connection to. And that target backup database PGDATA + * belogns to the same instance. + */ + check_system_identifiers(backup_conn, instance_config.pgdata); + + /* below perform checks specific for backup command */ +#if PG_VERSION_NUM >= 110000 + if (!RetrieveWalSegSize(backup_conn)) + elog(ERROR, "Failed to retrieve wal_segment_size"); +#endif + + /* add note to backup if requested */ + if (set_backup_params && set_backup_params->note) + add_note(¤t, set_backup_params->note); + + /* backup data */ + do_backup_instance(backup_conn, &nodeInfo, no_sync, backup_logs); + pgut_atexit_pop(backup_cleanup, NULL); + + /* compute size of wal files of this backup stored in the archive */ + if (!current.stream) + { + XLogSegNo start_segno; + XLogSegNo stop_segno; + + GetXLogSegNo(current.start_lsn, start_segno, instance_config.xlog_seg_size); + GetXLogSegNo(current.stop_lsn, stop_segno, instance_config.xlog_seg_size); + current.wal_bytes = (stop_segno - start_segno) * instance_config.xlog_seg_size; + + /* + * If start_lsn and stop_lsn are located in the same segment, then + * set wal_bytes to the size of 1 segment. + */ + if (current.wal_bytes <= 0) + current.wal_bytes = instance_config.xlog_seg_size; + } + + /* Backup is done. Update backup status */ + current.end_time = time(NULL); + current.status = BACKUP_STATUS_DONE; + write_backup(¤t, true); + + /* Pin backup if requested */ + if (set_backup_params && + (set_backup_params->ttl > 0 || + set_backup_params->expire_time > 0)) + { + pin_backup(¤t, set_backup_params); + } + + if (!no_validate) + pgBackupValidate(¤t, NULL); + + /* Notify user about backup size */ + if (current.stream) + pretty_size(current.data_bytes + current.wal_bytes, pretty_bytes, lengthof(pretty_bytes)); + else + pretty_size(current.data_bytes, pretty_bytes, lengthof(pretty_bytes)); + elog(INFO, "Backup %s resident size: %s", base36enc(current.start_time), pretty_bytes); + + if (current.status == BACKUP_STATUS_OK || + current.status == BACKUP_STATUS_DONE) + elog(INFO, "Backup %s completed", base36enc(current.start_time)); + else + elog(ERROR, "Backup %s failed", base36enc(current.start_time)); + + /* + * After successful backup completion remove backups + * which are expired according to retention policies + */ + if (delete_expired || merge_expired || delete_wal) + do_retention(); + + return 0; +} + +/* + * Ensure that backup directory was initialized for the same PostgreSQL + * instance we opened connection to. And that target backup database PGDATA + * belogns to the same instance. + * All system identifiers must be equal. + */ +void +check_system_identifiers(PGconn *conn, char *pgdata) +{ + uint64 system_id_conn; + uint64 system_id_pgdata; + + system_id_pgdata = get_system_identifier(pgdata); + system_id_conn = get_remote_system_identifier(conn); + + if (current.backup_mode == BACKUP_MODE_INVALID) + { + if (system_id_conn != system_id_pgdata) + { + elog(ERROR, "Data directory initialized with system id " UINT64_FORMAT ", " + "but connected instance system id is " UINT64_FORMAT, + system_id_pgdata, system_id_conn); + } + return; + } + + if (system_id_conn != instance_config.system_identifier) + elog(ERROR, "Backup data directory was initialized for system id " UINT64_FORMAT ", " + "but connected instance system id is " UINT64_FORMAT, + instance_config.system_identifier, system_id_conn); + + if (system_id_pgdata != instance_config.system_identifier) + elog(ERROR, "Backup data directory was initialized for system id " UINT64_FORMAT ", " + "but target backup directory system id is " UINT64_FORMAT, + instance_config.system_identifier, system_id_pgdata); +} + +/* + * Ensure that target backup database is initialized with + * compatible settings. Currently check BLCKSZ and XLOG_BLCKSZ. + */ +static void +confirm_block_size(PGconn *conn, const char *name, int blcksz) +{ + PGresult *res; + char *endp; + int block_size; + + res = pgut_execute(conn, "SELECT pg_catalog.current_setting($1)", 1, &name); + if (PQntuples(res) != 1 || PQnfields(res) != 1) + elog(ERROR, "cannot get %s: %s", name, PQerrorMessage(conn)); + + block_size = strtol(PQgetvalue(res, 0, 0), &endp, 10); + if ((endp && *endp) || block_size != blcksz) + elog(ERROR, + "%s(%d) is not compatible(%d expected)", + name, block_size, blcksz); + + PQclear(res); +} + +/* + * Notify start of backup to PostgreSQL server. + */ +static void +pg_start_backup(const char *label, bool smooth, pgBackup *backup, + PGNodeInfo *nodeInfo, PGconn *conn) +{ + PGresult *res; + const char *params[2]; + uint32 lsn_hi; + uint32 lsn_lo; + + params[0] = label; + + /* 2nd argument is 'fast'*/ + params[1] = smooth ? "false" : "true"; + if (!exclusive_backup) + res = pgut_execute(conn, + "SELECT pg_catalog.pg_start_backup($1, $2, false)", + 2, + params); + else + res = pgut_execute(conn, + "SELECT pg_catalog.pg_start_backup($1, $2)", + 2, + params); + + /* + * Set flag that pg_start_backup() was called. If an error will happen it + * is necessary to call pg_stop_backup() in backup_cleanup(). + */ + backup_in_progress = true; + pgut_atexit_push(backup_stopbackup_callback, conn); + + /* Extract timeline and LSN from results of pg_start_backup() */ + XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo); + /* Calculate LSN */ + backup->start_lsn = ((uint64) lsn_hi )<< 32 | lsn_lo; + + PQclear(res); +} + +/* + * Switch to a new WAL segment. It should be called only for master. + * For PG 9.5 it should be called only if pguser is superuser. + */ +static void +pg_switch_wal(PGconn *conn) +{ + PGresult *res; + + /* Remove annoying NOTICE messages generated by backend */ + res = pgut_execute(conn, "SET client_min_messages = warning;", 0, NULL); + PQclear(res); + +#if PG_VERSION_NUM >= 100000 + res = pgut_execute(conn, "SELECT pg_catalog.pg_switch_wal()", 0, NULL); +#else + res = pgut_execute(conn, "SELECT pg_catalog.pg_switch_xlog()", 0, NULL); +#endif + + PQclear(res); +} + +/* + * Check if the instance is PostgresPro fork. + */ +static bool +pgpro_support(PGconn *conn) +{ + PGresult *res; + + res = pgut_execute(conn, + "SELECT proname FROM pg_proc WHERE proname='pgpro_edition'", + 0, NULL); + + if (PQresultStatus(res) == PGRES_TUPLES_OK && + (PQntuples(res) == 1) && + (strcmp(PQgetvalue(res, 0, 0), "pgpro_edition") == 0)) + { + PQclear(res); + return true; + } + + PQclear(res); + return false; +} + +/* + * Fill 'datname to Oid' map + * + * This function can fail to get the map for legal reasons, e.g. missing + * permissions on pg_database during `backup`. + * As long as user do not use partial restore feature it`s fine. + * + * To avoid breaking a backward compatibility don't throw an ERROR, + * throw a warning instead of an error and return NULL. + * Caller is responsible for checking the result. + */ +parray * +get_database_map(PGconn *conn) +{ + PGresult *res; + parray *database_map = NULL; + int i; + + /* + * Do not include template0 and template1 to the map + * as default databases that must always be restored. + */ + res = pgut_execute_extended(conn, + "SELECT oid, datname FROM pg_catalog.pg_database " + "WHERE datname NOT IN ('template1', 'template0')", + 0, NULL, true, true); + + /* Don't error out, simply return NULL. See comment above. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + PQclear(res); + elog(WARNING, "Failed to get database map: %s", + PQerrorMessage(conn)); + + return NULL; + } + + /* Construct database map */ + for (i = 0; i < PQntuples(res); i++) + { + char *datname = NULL; + db_map_entry *db_entry = (db_map_entry *) pgut_malloc(sizeof(db_map_entry)); + + /* get Oid */ + db_entry->dbOid = atoi(PQgetvalue(res, i, 0)); + + /* get datname */ + datname = PQgetvalue(res, i, 1); + db_entry->datname = (char *)pgut_malloc(strlen(datname) + 1); + strcpy(db_entry->datname, datname); + + if (database_map == NULL) + database_map = parray_new(); + + parray_append(database_map, db_entry); + } + + return database_map; +} + +/* Check if ptrack is enabled in target instance */ +static bool +pg_checksum_enable(PGconn *conn) +{ + PGresult *res_db; + + res_db = pgut_execute(conn, "SHOW data_checksums", 0, NULL); + + if (strcmp(PQgetvalue(res_db, 0, 0), "on") == 0) + { + PQclear(res_db); + return true; + } + PQclear(res_db); + return false; +} + +/* Check if target instance is replica */ +static bool +pg_is_in_recovery(PGconn *conn) +{ + PGresult *res_db; + + res_db = pgut_execute(conn, "SELECT pg_catalog.pg_is_in_recovery()", 0, NULL); + + if (PQgetvalue(res_db, 0, 0)[0] == 't') + { + PQclear(res_db); + return true; + } + PQclear(res_db); + return false; +} + + +/* Check if current PostgreSQL role is superuser */ +static bool +pg_is_superuser(PGconn *conn) +{ + PGresult *res; + + res = pgut_execute(conn, "SELECT pg_catalog.current_setting('is_superuser')", 0, NULL); + + if (strcmp(PQgetvalue(res, 0, 0), "on") == 0) + { + PQclear(res); + return true; + } + PQclear(res); + return false; +} + +/* + * Wait for target LSN or WAL segment, containing target LSN. + * + * Depending on value of flag in_stream_dir wait for target LSN to archived or + * streamed in 'archive_dir' or 'pg_wal' directory. + * + * If flag 'is_start_lsn' is set then issue warning for first-time users. + * If flag 'in_prev_segment' is set, look for LSN in previous segment, + * with EndRecPtr >= Target LSN. It should be used only for solving + * invalid XRecOff problem. + * If flag 'segment_only' is set, then, instead of waiting for LSN, wait for segment, + * containing that LSN. + * If flags 'in_prev_segment' and 'segment_only' are both set, then wait for + * previous segment. + * + * Flag 'in_stream_dir' determine whether we looking for WAL in 'pg_wal' directory or + * in archive. Do note, that we cannot rely sorely on global variable 'stream_wal' because, + * for example, PAGE backup must(!) look for start_lsn in archive regardless of wal_mode. + * + * 'timeout_elevel' determine the elevel for timeout elog message. If elevel lighter than + * ERROR is used, then return InvalidXLogRecPtr. TODO: return something more concrete, for example 1. + * + * Returns target LSN if such is found, failing that returns LSN of record prior to target LSN. + * Returns InvalidXLogRecPtr if 'segment_only' flag is used. + */ +static XLogRecPtr +wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli, + bool in_prev_segment, bool segment_only, + int timeout_elevel, bool in_stream_dir) +{ + XLogSegNo targetSegNo; + char pg_wal_dir[MAXPGPATH]; + char wal_segment_path[MAXPGPATH], + *wal_segment_dir, + wal_segment[MAXFNAMELEN]; + bool file_exists = false; + uint32 try_count = 0, + timeout; + char *wal_delivery_str = (char *)(in_stream_dir ? "streamed":"archived"); + +#ifdef HAVE_LIBZ + char gz_wal_segment_path[MAXPGPATH]; +#endif + + /* Compute the name of the WAL file containing requested LSN */ + GetXLogSegNo(target_lsn, targetSegNo, instance_config.xlog_seg_size); + if (in_prev_segment) + targetSegNo--; + GetXLogFileName(wal_segment, tli, targetSegNo, + instance_config.xlog_seg_size); + + /* + * In pg_start_backup we wait for 'target_lsn' in 'pg_wal' directory if it is + * stream and non-page backup. Page backup needs archived WAL files, so we + * wait for 'target_lsn' in archive 'wal' directory for page backups. + * + * In pg_stop_backup it depends only on stream_wal. + */ + if (in_stream_dir) + { + pgBackupGetPath2(¤t, pg_wal_dir, lengthof(pg_wal_dir), + DATABASE_DIR, PG_XLOG_DIR); + join_path_components(wal_segment_path, pg_wal_dir, wal_segment); + wal_segment_dir = pg_wal_dir; + } + else + { + join_path_components(wal_segment_path, arclog_path, wal_segment); + wal_segment_dir = arclog_path; + } + + /* TODO: remove this in 3.0 (it is a cludge against some old bug with archive_timeout) */ + if (instance_config.archive_timeout > 0) + timeout = instance_config.archive_timeout; + else + timeout = ARCHIVE_TIMEOUT_DEFAULT; + + if (segment_only) + elog(LOG, "Looking for segment: %s", wal_segment); + else + elog(LOG, "Looking for LSN %X/%X in segment: %s", + (uint32) (target_lsn >> 32), (uint32) target_lsn, wal_segment); + +#ifdef HAVE_LIBZ + snprintf(gz_wal_segment_path, sizeof(gz_wal_segment_path), "%s.gz", + wal_segment_path); +#endif + + /* Wait until target LSN is archived or streamed */ + while (true) + { + if (!file_exists) + { + file_exists = fileExists(wal_segment_path, FIO_BACKUP_HOST); + + /* Try to find compressed WAL file */ + if (!file_exists) + { +#ifdef HAVE_LIBZ + file_exists = fileExists(gz_wal_segment_path, FIO_BACKUP_HOST); + if (file_exists) + elog(LOG, "Found compressed WAL segment: %s", wal_segment_path); +#endif + } + else + elog(LOG, "Found WAL segment: %s", wal_segment_path); + } + + if (file_exists) + { + /* Do not check for target LSN */ + if (segment_only) + return InvalidXLogRecPtr; + + /* + * A WAL segment found. Look for target LSN in it. + */ + if (!XRecOffIsNull(target_lsn) && + wal_contains_lsn(wal_segment_dir, target_lsn, tli, + instance_config.xlog_seg_size)) + /* Target LSN was found */ + { + elog(LOG, "Found LSN: %X/%X", (uint32) (target_lsn >> 32), (uint32) target_lsn); + return target_lsn; + } + } + + sleep(1); + if (interrupted) + elog(ERROR, "Interrupted during waiting for WAL archiving"); + try_count++; + + /* Inform user if WAL segment is absent in first attempt */ + if (try_count == 1) + { + if (segment_only) + elog(INFO, "Wait for WAL segment %s to be %s", + wal_segment_path, wal_delivery_str); + else + elog(INFO, "Wait for LSN %X/%X in %s WAL segment %s", + (uint32) (target_lsn >> 32), (uint32) target_lsn, + wal_delivery_str, wal_segment_path); + } + + if (!stream_wal && is_start_lsn && try_count == 30) + elog(WARNING, "By default pg_probackup assume WAL delivery method to be ARCHIVE. " + "If continuous archiving is not set up, use '--stream' option to make autonomous backup. " + "Otherwise check that continuous archiving works correctly."); + + if (timeout > 0 && try_count > timeout) + { + if (file_exists) + elog(timeout_elevel, "WAL segment %s was %s, " + "but target LSN %X/%X could not be archived in %d seconds", + wal_segment, wal_delivery_str, + (uint32) (target_lsn >> 32), (uint32) target_lsn, timeout); + /* If WAL segment doesn't exist or we wait for previous segment */ + else + elog(timeout_elevel, + "WAL segment %s could not be %s in %d seconds", + wal_segment, wal_delivery_str, timeout); + + return InvalidXLogRecPtr; + } + } +} + +/* + * Notify end of backup to PostgreSQL server. + */ +static void +pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, + PGNodeInfo *nodeInfo) +{ + PGconn *conn; + PGresult *res; + PGresult *tablespace_map_content = NULL; + uint32 lsn_hi; + uint32 lsn_lo; + //XLogRecPtr restore_lsn = InvalidXLogRecPtr; + int pg_stop_backup_timeout = 0; + char path[MAXPGPATH]; + char backup_label[MAXPGPATH]; + FILE *fp; + pgFile *file; + size_t len; + char *val = NULL; + const char *stop_backup_query = NULL; + bool stop_lsn_exists = false; + XLogRecPtr stop_backup_lsn_tmp = InvalidXLogRecPtr; + + /* + * We will use this values if there are no transactions between start_lsn + * and stop_lsn. + */ + time_t recovery_time; + TransactionId recovery_xid; + + if (!backup_in_progress) + elog(ERROR, "backup is not in progress"); + + conn = pg_startbackup_conn; + + /* Remove annoying NOTICE messages generated by backend */ + res = pgut_execute(conn, "SET client_min_messages = warning;", + 0, NULL); + PQclear(res); + + /* Make proper timestamp format for parse_time() */ + res = pgut_execute(conn, "SET datestyle = 'ISO, DMY';", 0, NULL); + PQclear(res); + + /* Create restore point */ + if (backup != NULL) + { + const char *params[1]; + char name[1024]; + + snprintf(name, lengthof(name), "pg_probackup, backup_id %s", + base36enc(backup->start_time)); + params[0] = name; + + res = pgut_execute(conn, "SELECT pg_catalog.pg_create_restore_point($1)", + 1, params); + PQclear(res); + } + + /* + * send pg_stop_backup asynchronously because we could came + * here from backup_cleanup() after some error caused by + * postgres archive_command problem and in this case we will + * wait for pg_stop_backup() forever. + */ + + if (!pg_stop_backup_is_sent) + { + bool sent = false; + + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," + " pg_catalog.pg_stop_backup() as lsn"; + + sent = pgut_send(conn, stop_backup_query, 0, NULL, WARNING); + pg_stop_backup_is_sent = true; + if (!sent) + elog(ERROR, "Failed to send pg_stop_backup query"); + } + + /* After we have sent pg_stop_backup, we don't need this callback anymore */ + pgut_atexit_pop(backup_stopbackup_callback, pg_startbackup_conn); + + /* + * Wait for the result of pg_stop_backup(), but no longer than + * archive_timeout seconds + */ + if (pg_stop_backup_is_sent && !in_cleanup) + { + res = NULL; + + while (1) + { + if (!PQconsumeInput(conn)) + elog(ERROR, "pg_stop backup() failed: %s", + PQerrorMessage(conn)); + + if (PQisBusy(conn)) + { + pg_stop_backup_timeout++; + sleep(1); + + if (interrupted) + { + pgut_cancel(conn); + elog(ERROR, "interrupted during waiting for pg_stop_backup"); + } + + if (pg_stop_backup_timeout == 1) + elog(INFO, "wait for pg_stop_backup()"); + + /* + * If postgres haven't answered in archive_timeout seconds, + * send an interrupt. + */ + if (pg_stop_backup_timeout > instance_config.archive_timeout) + { + pgut_cancel(conn); + elog(ERROR, "pg_stop_backup doesn't answer in %d seconds, cancel it", + instance_config.archive_timeout); + } + } + else + { + res = PQgetResult(conn); + break; + } + } + + /* Check successfull execution of pg_stop_backup() */ + if (!res) + elog(ERROR, "pg_stop backup() failed"); + else + { + switch (PQresultStatus(res)) + { + /* + * We should expect only PGRES_TUPLES_OK since pg_stop_backup + * returns tuples. + */ + case PGRES_TUPLES_OK: + break; + default: + elog(ERROR, "query failed: %s query was: %s", + PQerrorMessage(conn), stop_backup_query); + } + elog(INFO, "pg_stop backup() successfully executed"); + } + + backup_in_progress = false; + +// char *target_lsn = "2/F578A000"; +// XLogDataFromLSN(target_lsn, &lsn_hi, &lsn_lo); + + /* Extract timeline and LSN from results of pg_stop_backup() */ + XLogDataFromLSN(PQgetvalue(res, 0, 2), &lsn_hi, &lsn_lo); + /* Calculate LSN */ + stop_backup_lsn_tmp = ((uint64) lsn_hi) << 32 | lsn_lo; + + /* It is ok for replica to return invalid STOP LSN + * UPD: Apparently it is ok even for a master. + */ + if (!XRecOffIsValid(stop_backup_lsn_tmp)) + { + char *xlog_path, + stream_xlog_path[MAXPGPATH]; + XLogSegNo segno = 0; + XLogRecPtr lsn_tmp = InvalidXLogRecPtr; + + /* + * Even though the value is invalid, it's expected postgres behaviour + * and we're trying to fix it below. + */ + elog(LOG, "Invalid offset in stop_lsn value %X/%X, trying to fix", + (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); + + /* + * Note: even with gdb it is very hard to produce automated tests for + * contrecord + invalid LSN, so emulate it for manual testing. + */ + //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE; + //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X", + // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); + + if (stream_wal) + { + pgBackupGetPath2(backup, stream_xlog_path, + lengthof(stream_xlog_path), + DATABASE_DIR, PG_XLOG_DIR); + xlog_path = stream_xlog_path; + } + else + xlog_path = arclog_path; + + GetXLogSegNo(stop_backup_lsn_tmp, segno, instance_config.xlog_seg_size); + + /* + * Note, that there is no guarantee that corresponding WAL file even exists. + * Replica may return LSN from future and keep staying in present. + * Or it can return invalid LSN. + * + * That's bad, since we want to get real LSN to save it in backup label file + * and to use it in WAL validation. + * + * So we try to do the following: + * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and + * look for the first valid record in it. + * It solves the problem of occasional invalid LSN on write-busy system. + * 2. Failing that, look for record in previous segment with endpoint + * equal or greater than stop_lsn. It may(!) solve the problem of invalid LSN + * on write-idle system. If that fails too, error out. + */ + + /* stop_lsn is pointing to a 0 byte of xlog segment */ + if (stop_backup_lsn_tmp % instance_config.xlog_seg_size == 0) + { + /* Wait for segment with current stop_lsn, it is ok for it to never arrive */ + wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + false, true, WARNING, stream_wal); + + /* Get the first record in segment with current stop_lsn */ + lsn_tmp = get_first_record_lsn(xlog_path, segno, backup->tli, + instance_config.xlog_seg_size, + instance_config.archive_timeout); + + /* Check that returned LSN is valid and greater than stop_lsn */ + if (XLogRecPtrIsInvalid(lsn_tmp) || + !XRecOffIsValid(lsn_tmp) || + lsn_tmp < stop_backup_lsn_tmp) + { + elog(ERROR, "Failed to get next WAL record after %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + + /* No luck, falling back to looking up for previous record */ + elog(WARNING, "Failed to get next WAL record after %X/%X, " + "looking for previous WAL record", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + + /* Despite looking for previous record there is not guarantee of success + * because previous record can be the contrecord. + */ + lsn_tmp = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + true, false, ERROR, stream_wal); + + /* sanity */ + if (!XRecOffIsValid(lsn_tmp) || XLogRecPtrIsInvalid(lsn_tmp)) + elog(ERROR, "Failed to get WAL record prior to %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + } + } + /* stop lsn is aligned to xlog block size, just find next lsn */ + else if (stop_backup_lsn_tmp % XLOG_BLCKSZ == 0) + { + /* Wait for segment with current stop_lsn */ + wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + false, true, ERROR, stream_wal); + + /* Get the next closest record in segment with current stop_lsn */ + lsn_tmp = get_next_record_lsn(xlog_path, segno, backup->tli, + instance_config.xlog_seg_size, + instance_config.archive_timeout, + stop_backup_lsn_tmp); + + /* sanity */ + if (!XRecOffIsValid(lsn_tmp) || XLogRecPtrIsInvalid(lsn_tmp)) + elog(ERROR, "Failed to get WAL record next to %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + } + /* PostgreSQL returned something very illegal as STOP_LSN, error out */ + else + elog(ERROR, "Invalid stop_backup_lsn value %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); + + /* Setting stop_backup_lsn will set stop point for streaming */ + stop_backup_lsn = lsn_tmp; + stop_lsn_exists = true; + } + + elog(LOG, "stop_lsn: %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); + + /* Write backup_label and tablespace_map */ + if (!exclusive_backup) + { + Assert(PQnfields(res) >= 4); + pgBackupGetPath(backup, path, lengthof(path), DATABASE_DIR); + + /* Write backup_label */ + join_path_components(backup_label, path, PG_BACKUP_LABEL_FILE); + fp = fio_fopen(backup_label, PG_BINARY_W, FIO_BACKUP_HOST); + if (fp == NULL) + elog(ERROR, "can't open backup label file \"%s\": %s", + backup_label, strerror(errno)); + + len = strlen(PQgetvalue(res, 0, 3)); + if (fio_fwrite(fp, PQgetvalue(res, 0, 3), len) != len || + fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "can't write backup label file \"%s\": %s", + backup_label, strerror(errno)); + + /* + * It's vital to check if backup_files_list is initialized, + * because we could get here because the backup was interrupted + */ + if (backup_files_list) + { + file = pgFileNew(backup_label, PG_BACKUP_LABEL_FILE, true, 0, + FIO_BACKUP_HOST); + + file->crc = pgFileGetCRC(backup_label, true, false); + + file->write_size = file->size; + file->uncompressed_size = file->size; + parray_append(backup_files_list, file); + } + } + + if (sscanf(PQgetvalue(res, 0, 0), XID_FMT, &recovery_xid) != 1) + elog(ERROR, + "result of txid_snapshot_xmax() is invalid: %s", + PQgetvalue(res, 0, 0)); + if (!parse_time(PQgetvalue(res, 0, 1), &recovery_time, true)) + elog(ERROR, + "result of current_timestamp is invalid: %s", + PQgetvalue(res, 0, 1)); + + /* Get content for tablespace_map from stop_backup results + * in case of non-exclusive backup + */ + if (!exclusive_backup) + val = PQgetvalue(res, 0, 4); + + /* Write tablespace_map */ + if (!exclusive_backup && val && strlen(val) > 0) + { + char tablespace_map[MAXPGPATH]; + + join_path_components(tablespace_map, path, PG_TABLESPACE_MAP_FILE); + fp = fio_fopen(tablespace_map, PG_BINARY_W, FIO_BACKUP_HOST); + if (fp == NULL) + elog(ERROR, "can't open tablespace map file \"%s\": %s", + tablespace_map, strerror(errno)); + + len = strlen(val); + if (fio_fwrite(fp, val, len) != len || + fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "can't write tablespace map file \"%s\": %s", + tablespace_map, strerror(errno)); + + if (backup_files_list) + { + file = pgFileNew(tablespace_map, PG_TABLESPACE_MAP_FILE, true, 0, + FIO_BACKUP_HOST); + if (S_ISREG(file->mode)) + { + file->crc = pgFileGetCRC(tablespace_map, true, false); + file->write_size = file->size; + } + + parray_append(backup_files_list, file); + } + } + + if (tablespace_map_content) + PQclear(tablespace_map_content); + PQclear(res); + } + + /* Fill in fields if that is the correct end of backup. */ + if (backup != NULL) + { + char *xlog_path, + stream_xlog_path[MAXPGPATH]; + + /* + * Wait for stop_lsn to be archived or streamed. + * If replica returned valid STOP_LSN of not actually existing record, + * look for previous record with endpoint >= STOP_LSN. + */ + if (!stop_lsn_exists) + stop_backup_lsn = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + false, false, ERROR, stream_wal); + + if (stream_wal) + { + /* Wait for the completion of stream */ + pthread_join(stream_thread, NULL); + if (stream_thread_arg.ret == 1) + elog(ERROR, "WAL streaming failed"); + + pgBackupGetPath2(backup, stream_xlog_path, + lengthof(stream_xlog_path), + DATABASE_DIR, PG_XLOG_DIR); + xlog_path = stream_xlog_path; + } + else + xlog_path = arclog_path; + + backup->stop_lsn = stop_backup_lsn; + backup->recovery_xid = recovery_xid; + + elog(LOG, "Getting the Recovery Time from WAL"); + + /* iterate over WAL from stop_backup lsn to start_backup lsn */ + if (!read_recovery_info(xlog_path, backup->tli, + instance_config.xlog_seg_size, + backup->start_lsn, backup->stop_lsn, + &backup->recovery_time)) + { + elog(LOG, "Failed to find Recovery Time in WAL, forced to trust current_timestamp"); + backup->recovery_time = recovery_time; + } + } +} + +/* + * Retrieve checkpoint_timeout GUC value in seconds. + */ +static int +checkpoint_timeout(PGconn *backup_conn) +{ + PGresult *res; + const char *val; + const char *hintmsg; + int val_int; + + res = pgut_execute(backup_conn, "show checkpoint_timeout", 0, NULL); + val = PQgetvalue(res, 0, 0); + + if (!parse_int(val, &val_int, OPTION_UNIT_S, &hintmsg)) + { + PQclear(res); + if (hintmsg) + elog(ERROR, "Invalid value of checkout_timeout %s: %s", val, + hintmsg); + else + elog(ERROR, "Invalid value of checkout_timeout %s", val); + } + + PQclear(res); + + return val_int; +} + +/* + * Notify end of backup to server when "backup_label" is in the root directory + * of the DB cluster. + * Also update backup status to ERROR when the backup is not finished. + */ +static void +backup_cleanup(bool fatal, void *userdata) +{ + /* + * Update status of backup in BACKUP_CONTROL_FILE to ERROR. + * end_time != 0 means backup finished + */ + if (current.status == BACKUP_STATUS_RUNNING && current.end_time == 0) + { + elog(WARNING, "Backup %s is running, setting its status to ERROR", + base36enc(current.start_time)); + current.end_time = time(NULL); + current.status = BACKUP_STATUS_ERROR; + write_backup(¤t, true); + } +} + +/* + * Take a backup of the PGDATA at a file level. + * Copy all directories and files listed in backup_files_list. + * If the file is 'datafile' (regular relation's main fork), read it page by page, + * verify checksum and copy. + * In incremental backup mode, copy only files or datafiles' pages changed after + * previous backup. + */ +static void * +backup_files(void *arg) +{ + int i; + char from_fullpath[MAXPGPATH]; + char to_fullpath[MAXPGPATH]; + static time_t prev_time; + + backup_files_arg *arguments = (backup_files_arg *) arg; + int n_backup_files_list = parray_num(arguments->files_list); + + prev_time = current.start_time; + + /* backup a file */ + for (i = 0; i < n_backup_files_list; i++) + { + pgFile *file = (pgFile *) parray_get(arguments->files_list, i); + pgFile *prev_file = NULL; + + /* We have already copied all directories */ + if (S_ISDIR(file->mode)) + continue; + + if (arguments->thread_num == 1) + { + /* update backup_content.control every 60 seconds */ + if ((difftime(time(NULL), prev_time)) > 60) + { + write_backup_filelist(¤t, arguments->files_list, arguments->from_root, + arguments->external_dirs, false); + /* update backup control file to update size info */ + write_backup(¤t, true); + + prev_time = time(NULL); + } + } + + if (!pg_atomic_test_set_flag(&file->lock)) + continue; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "interrupted during backup"); + + if (progress) + elog(INFO, "Progress: (%d/%d). Process file \"%s\"", + i + 1, n_backup_files_list, file->rel_path); + + /* Handle zero sized files */ + if (file->size == 0) + { + file->write_size = 0; + continue; + } + + /* construct destination filepath */ + if (file->external_dir_num == 0) + { + join_path_components(from_fullpath, arguments->from_root, file->rel_path); + join_path_components(to_fullpath, arguments->to_root, file->rel_path); + } + else + { + char external_dst[MAXPGPATH]; + char *external_path = (char *)parray_get(arguments->external_dirs, + file->external_dir_num - 1); + + makeExternalDirPathByNum(external_dst, + arguments->external_prefix, + file->external_dir_num); + + join_path_components(to_fullpath, external_dst, file->rel_path); + join_path_components(from_fullpath, external_path, file->rel_path); + } + + /* Encountered some strange beast */ + if (!S_ISREG(file->mode)) + elog(WARNING, "Unexpected type %d of file \"%s\", skipping", + file->mode, from_fullpath); + + /* Check that file exist in previous backup */ + if (current.backup_mode != BACKUP_MODE_FULL) + { + pgFile **prev_file_tmp = NULL; + prev_file_tmp = (pgFile **) parray_bsearch(arguments->prev_filelist, + file, pgFileCompareRelPathWithExternal); + if (prev_file_tmp) + { + /* File exists in previous backup */ + file->exists_in_prev = true; + prev_file = *prev_file_tmp; + } + } + + /* backup file */ + if (file->is_datafile && !file->is_cfs) + { + backup_data_file(&(arguments->conn_arg), file, from_fullpath, to_fullpath, + arguments->prev_start_lsn, + current.backup_mode, + instance_config.compress_alg, + instance_config.compress_level, + arguments->nodeInfo->checksum_version, + arguments->hdr_map, false); + } + else + { + backup_non_data_file(file, prev_file, from_fullpath, to_fullpath, + current.backup_mode, current.parent_backup, true); + } + + if (file->write_size == FILE_NOT_FOUND) + continue; + + if (file->write_size == BYTES_INVALID) + { + elog(VERBOSE, "Skipping the unchanged file: \"%s\"", from_fullpath); + continue; + } + + //elog(VERBOSE, "File \"%s\". Copied "INT64_FORMAT " bytes", + // from_fullpath, file->write_size); + } + + /* ssh connection to longer needed */ + fio_disconnect(); + + /* Close connection */ + if (arguments->conn_arg.conn) + pgut_disconnect(arguments->conn_arg.conn); + + /* Data files transferring is successful */ + arguments->ret = 0; + + return NULL; +} + +/* + * Extract information about files in backup_list parsing their names: + * - remove temp tables from the list + * - remove unlogged tables from the list (leave the _init fork) + * - set flags for database directories + * - set flags for datafiles + */ +void +parse_filelist_filenames(parray *files, const char *root) +{ + size_t i = 0; + Oid unlogged_file_reloid = 0; + + while (i < parray_num(files)) + { + pgFile *file = (pgFile *) parray_get(files, i); + int sscanf_result; + + if (S_ISREG(file->mode) && + path_is_prefix_of_path(PG_TBLSPC_DIR, file->rel_path)) + { + /* + * Found file in pg_tblspc/tblsOid/TABLESPACE_VERSION_DIRECTORY + * Legal only in case of 'pg_compression' + */ + if (strcmp(file->name, "pg_compression") == 0) + { + Oid tblspcOid; + Oid dbOid; + char tmp_rel_path[MAXPGPATH]; + /* + * Check that the file is located under + * TABLESPACE_VERSION_DIRECTORY + */ + sscanf_result = sscanf(file->rel_path, PG_TBLSPC_DIR "/%u/%s/%u", + &tblspcOid, tmp_rel_path, &dbOid); + + /* Yes, it is */ + if (sscanf_result == 2 && + strncmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY, + strlen(TABLESPACE_VERSION_DIRECTORY)) == 0) + set_cfs_datafiles(files, root, file->rel_path, i); + } + } + + if (S_ISREG(file->mode) && file->tblspcOid != 0 && + file->name && file->name[0]) + { + if (file->forkName == init) + { + /* + * Do not backup files of unlogged relations. + * scan filelist backward and exclude these files. + */ + int unlogged_file_num = i - 1; + pgFile *unlogged_file = (pgFile *) parray_get(files, + unlogged_file_num); + + unlogged_file_reloid = file->relOid; + + while (unlogged_file_num >= 0 && + (unlogged_file_reloid != 0) && + (unlogged_file->relOid == unlogged_file_reloid)) + { + pgFileFree(unlogged_file); + parray_remove(files, unlogged_file_num); + + unlogged_file_num--; + i--; + + unlogged_file = (pgFile *) parray_get(files, + unlogged_file_num); + } + } + } + + i++; + } +} + +/* If file is equal to pg_compression, then we consider this tablespace as + * cfs-compressed and should mark every file in this tablespace as cfs-file + * Setting is_cfs is done via going back through 'files' set every file + * that contain cfs_tablespace in his path as 'is_cfs' + * Goings back through array 'files' is valid option possible because of current + * sort rules: + * tblspcOid/TABLESPACE_VERSION_DIRECTORY + * tblspcOid/TABLESPACE_VERSION_DIRECTORY/dboid + * tblspcOid/TABLESPACE_VERSION_DIRECTORY/dboid/1 + * tblspcOid/TABLESPACE_VERSION_DIRECTORY/dboid/1.cfm + * tblspcOid/TABLESPACE_VERSION_DIRECTORY/pg_compression + */ +static void +set_cfs_datafiles(parray *files, const char *root, char *relative, size_t i) +{ + int len; + int p; + pgFile *prev_file; + char *cfs_tblspc_path; + + cfs_tblspc_path = strdup(relative); + if(!cfs_tblspc_path) + elog(ERROR, "Out of memory"); + len = strlen("/pg_compression"); + cfs_tblspc_path[strlen(cfs_tblspc_path) - len] = 0; + elog(VERBOSE, "CFS DIRECTORY %s, pg_compression path: %s", cfs_tblspc_path, relative); + + for (p = (int) i; p >= 0; p--) + { + prev_file = (pgFile *) parray_get(files, (size_t) p); + + elog(VERBOSE, "Checking file in cfs tablespace %s", prev_file->rel_path); + + if (strstr(prev_file->rel_path, cfs_tblspc_path) != NULL) + { + if (S_ISREG(prev_file->mode) && prev_file->is_datafile) + { + elog(VERBOSE, "Setting 'is_cfs' on file %s, name %s", + prev_file->rel_path, prev_file->name); + prev_file->is_cfs = true; + } + } + else + { + elog(VERBOSE, "Breaking on %s", prev_file->rel_path); + break; + } + } + free(cfs_tblspc_path); +} + +/* + * Find pgfile by given rnode in the backup_files_list + * and add given blkno to its pagemap. + */ +void +process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) +{ +// char *path; + char *rel_path; + BlockNumber blkno_inseg; + int segno; + pgFile **file_item; + pgFile f; + + segno = blkno / RELSEG_SIZE; + blkno_inseg = blkno % RELSEG_SIZE; + + rel_path = relpathperm(rnode, forknum); + if (segno > 0) + f.rel_path = rel_path;//psprintf("%s.%u", rel_path, segno); + else + f.rel_path = rel_path; + + f.external_dir_num = 0; + + /* backup_files_list should be sorted before */ + file_item = (pgFile **) parray_bsearch(backup_files_list, &f, + pgFileCompareRelPathWithExternal); + + /* + * If we don't have any record of this file in the file map, it means + * that it's a relation that did not have much activity since the last + * backup. We can safely ignore it. If it is a new relation file, the + * backup would simply copy it as-is. + */ + if (file_item) + { + /* We need critical section only we use more than one threads */ + if (num_threads > 1) + pthread_lock(&backup_pagemap_mutex); + + datapagemap_add(&(*file_item)->pagemap, blkno_inseg); + + if (num_threads > 1) + pthread_mutex_unlock(&backup_pagemap_mutex); + } + + pg_free(rel_path); +} + +/* + * Stop WAL streaming if current 'xlogpos' exceeds 'stop_backup_lsn', which is + * set by pg_stop_backup(). + */ +static bool +stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished) +{ + static uint32 prevtimeline = 0; + static XLogRecPtr prevpos = InvalidXLogRecPtr; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during WAL streaming"); + + /* we assume that we get called once at the end of each segment */ + if (segment_finished) + elog(VERBOSE, _("finished segment at %X/%X (timeline %u)"), + (uint32) (xlogpos >> 32), (uint32) xlogpos, timeline); + + /* + * Note that we report the previous, not current, position here. After a + * timeline switch, xlogpos points to the beginning of the segment because + * that's where we always begin streaming. Reporting the end of previous + * timeline isn't totally accurate, because the next timeline can begin + * slightly before the end of the WAL that we received on the previous + * timeline, but it's close enough for reporting purposes. + */ + if (prevtimeline != 0 && prevtimeline != timeline) + elog(LOG, _("switched to timeline %u at %X/%X\n"), + timeline, (uint32) (prevpos >> 32), (uint32) prevpos); + + if (!XLogRecPtrIsInvalid(stop_backup_lsn)) + { + if (xlogpos >= stop_backup_lsn) + { + stop_stream_lsn = xlogpos; + return true; + } + + /* pg_stop_backup() was executed, wait for the completion of stream */ + if (stream_stop_begin == 0) + { + elog(INFO, "Wait for LSN %X/%X to be streamed", + (uint32) (stop_backup_lsn >> 32), (uint32) stop_backup_lsn); + + stream_stop_begin = time(NULL); + } + + if (time(NULL) - stream_stop_begin > stream_stop_timeout) + elog(ERROR, "Target LSN %X/%X could not be streamed in %d seconds", + (uint32) (stop_backup_lsn >> 32), (uint32) stop_backup_lsn, + stream_stop_timeout); + } + + prevtimeline = timeline; + prevpos = xlogpos; + + return false; +} + +/* + * Start the log streaming + */ +static void * +StreamLog(void *arg) +{ + StreamThreadArg *stream_arg = (StreamThreadArg *) arg; + + /* + * Always start streaming at the beginning of a segment + */ + stream_arg->startpos -= stream_arg->startpos % instance_config.xlog_seg_size; + + /* Initialize timeout */ + stream_stop_begin = 0; + +#if PG_VERSION_NUM >= 100000 + /* if slot name was not provided for temp slot, use default slot name */ + if (!replication_slot && temp_slot) + replication_slot = "pg_probackup_slot"; +#endif + + +#if PG_VERSION_NUM >= 110000 + /* Create temp repslot */ + if (temp_slot) + CreateReplicationSlot(stream_arg->conn, replication_slot, + NULL, temp_slot, true, true, false); +#endif + + /* + * Start the replication + */ + elog(LOG, "started streaming WAL at %X/%X (timeline %u)", + (uint32) (stream_arg->startpos >> 32), (uint32) stream_arg->startpos, + stream_arg->starttli); + +#if PG_VERSION_NUM >= 90600 + { + StreamCtl ctl; + + MemSet(&ctl, 0, sizeof(ctl)); + + ctl.startpos = stream_arg->startpos; + ctl.timeline = stream_arg->starttli; + ctl.sysidentifier = NULL; + +#if PG_VERSION_NUM >= 100000 + ctl.walmethod = CreateWalDirectoryMethod(stream_arg->basedir, 0, true); + ctl.replication_slot = replication_slot; + ctl.stop_socket = PGINVALID_SOCKET; +#if PG_VERSION_NUM >= 100000 && PG_VERSION_NUM < 110000 + ctl.temp_slot = temp_slot; +#endif +#else + ctl.basedir = (char *) stream_arg->basedir; +#endif + + ctl.stream_stop = stop_streaming; + ctl.standby_message_timeout = standby_message_timeout_local; + ctl.partial_suffix = NULL; + ctl.synchronous = false; + ctl.mark_done = false; + + if(ReceiveXlogStream(stream_arg->conn, &ctl) == false) + elog(ERROR, "Problem in receivexlog"); + +#if PG_VERSION_NUM >= 100000 + if (!ctl.walmethod->finish()) + elog(ERROR, "Could not finish writing WAL files: %s", + strerror(errno)); +#endif + } +#else + if(ReceiveXlogStream(stream_arg->conn, stream_arg->startpos, stream_arg->starttli, + NULL, (char *) stream_arg->basedir, stop_streaming, + standby_message_timeout_local, false) == false) + elog(ERROR, "Problem in receivexlog"); +#endif + + elog(LOG, "finished streaming WAL at %X/%X (timeline %u)", + (uint32) (stop_stream_lsn >> 32), (uint32) stop_stream_lsn, stream_arg->starttli); + stream_arg->ret = 0; + + PQfinish(stream_arg->conn); + stream_arg->conn = NULL; + + return NULL; +} + +static void +check_external_for_tablespaces(parray *external_list, PGconn *backup_conn) +{ + PGresult *res; + int i = 0; + int j = 0; + char *tablespace_path = NULL; + char *query = (char *)"SELECT pg_catalog.pg_tablespace_location(oid) " + "FROM pg_catalog.pg_tablespace " + "WHERE pg_catalog.pg_tablespace_location(oid) <> '';"; + + res = pgut_execute(backup_conn, query, 0, NULL); + + /* Check successfull execution of query */ + if (!res) + elog(ERROR, "Failed to get list of tablespaces"); + + for (i = 0; i < res->ntups; i++) + { + tablespace_path = PQgetvalue(res, i, 0); + Assert (strlen(tablespace_path) > 0); + + canonicalize_path(tablespace_path); + + for (j = 0; j < parray_num(external_list); j++) + { + char *external_path = (char *)parray_get(external_list, j); + + if (path_is_prefix_of_path(external_path, tablespace_path)) + elog(ERROR, "External directory path (-E option) \"%s\" " + "contains tablespace \"%s\"", + external_path, tablespace_path); + if (path_is_prefix_of_path(tablespace_path, external_path)) + elog(WARNING, "External directory path (-E option) \"%s\" " + "is in tablespace directory \"%s\"", + tablespace_path, external_path); + } + } + PQclear(res); + + /* Check that external directories do not overlap */ + if (parray_num(external_list) < 2) + return; + + for (i = 0; i < parray_num(external_list); i++) + { + char *external_path = (char *)parray_get(external_list, i); + + for (j = 0; j < parray_num(external_list); j++) + { + char *tmp_external_path = (char *)parray_get(external_list, j); + + /* skip yourself */ + if (j == i) + continue; + + if (path_is_prefix_of_path(external_path, tmp_external_path)) + elog(ERROR, "External directory path (-E option) \"%s\" " + "contain another external directory \"%s\"", + external_path, tmp_external_path); + + } + } +} + +/* + * Run IDENTIFY_SYSTEM through a given connection and + * check system identifier and timeline are matching + */ +void +IdentifySystem(StreamThreadArg *stream_thread_arg) +{ + PGresult *res; + + uint64 stream_conn_sysidentifier = 0; + char *stream_conn_sysidentifier_str; + TimeLineID stream_conn_tli = 0; + + //if (!CheckServerVersionForStreaming(stream_thread_arg->conn)) + //{ + // PQfinish(stream_thread_arg->conn); + /* + * Error message already written in CheckServerVersionForStreaming(). + * There's no hope of recovering from a version mismatch, so don't + * retry. + */ + // elog(ERROR, "Cannot continue backup because stream connect has failed."); + //} + + /* + * Identify server, obtain server system identifier and timeline + */ + res = pgut_execute(stream_thread_arg->conn, "IDENTIFY_SYSTEM", 0, NULL); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + elog(WARNING,"Could not send replication command \"%s\": %s", + "IDENTIFY_SYSTEM", PQerrorMessage(stream_thread_arg->conn)); + PQfinish(stream_thread_arg->conn); + elog(ERROR, "Cannot continue backup because stream connect has failed."); + } + + stream_conn_sysidentifier_str = PQgetvalue(res, 0, 0); + stream_conn_tli = atoi(PQgetvalue(res, 0, 1)); + + /* Additional sanity, primary for PG 9.5, + * where system id can be obtained only via "IDENTIFY SYSTEM" + */ + if (!parse_uint64(stream_conn_sysidentifier_str, &stream_conn_sysidentifier, 0)) + elog(ERROR, "%s is not system_identifier", stream_conn_sysidentifier_str); + + if (stream_conn_sysidentifier != instance_config.system_identifier) + elog(ERROR, "System identifier mismatch. Connected PostgreSQL instance has system id: " + "" UINT64_FORMAT ". Expected: " UINT64_FORMAT ".", + stream_conn_sysidentifier, instance_config.system_identifier); + + if (stream_conn_tli != current.tli) + elog(ERROR, "Timeline identifier mismatch. " + "Connected PostgreSQL instance has timeline id: %X. Expected: %X.", + stream_conn_tli, current.tli); + + PQclear(res); +} diff --git a/src/bin/pg_probackup/catalog.cpp b/src/bin/pg_probackup/catalog.cpp new file mode 100644 index 000000000..3c78a4e50 --- /dev/null +++ b/src/bin/pg_probackup/catalog.cpp @@ -0,0 +1,2775 @@ +/*------------------------------------------------------------------------- + * + * catalog.c: backup catalog operation + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" +//#include "access/timeline.h" + +#include +#include +#include +#include + +#include "file.h" +#include "configuration.h" +#include "common/fe_memutils.h" + +static pgBackup* get_closest_backup(timelineInfo *tlinfo); +static pgBackup* get_oldest_backup(timelineInfo *tlinfo); +static const char *backupModes[] = {"", "PTRACK", "FULL"}; +static pgBackup *readBackupControlFile(const char *path); + +static bool exit_hook_registered = false; +static parray *lock_files = NULL; + +static timelineInfo * +timelineInfoNew(TimeLineID tli) +{ + timelineInfo *tlinfo = (timelineInfo *) pgut_malloc(sizeof(timelineInfo)); + MemSet(tlinfo, 0, sizeof(timelineInfo)); + tlinfo->tli = tli; + tlinfo->switchpoint = InvalidXLogRecPtr; + tlinfo->parent_link = NULL; + tlinfo->xlog_filelist = parray_new(); + tlinfo->anchor_lsn = InvalidXLogRecPtr; + tlinfo->anchor_tli = 0; + tlinfo->n_xlog_files = 0; + return tlinfo; +} + +/* free timelineInfo object */ +void +timelineInfoFree(void *tliInfo) +{ + timelineInfo *tli = (timelineInfo *) tliInfo; + + parray_walk(tli->xlog_filelist, pgFileFree); + parray_free(tli->xlog_filelist); + + if (tli->backups) + { + parray_walk(tli->backups, pgBackupFree); + parray_free(tli->backups); + } + + pfree(tliInfo); +} + +/* Iterate over locked backups and delete locks files */ +static void +unlink_lock_atexit(void) +{ + int i; + + if (lock_files == NULL) + return; + + for (i = 0; i < parray_num(lock_files); i++) + { + char *lock_file = (char *) parray_get(lock_files, i); + int res; + + res = fio_unlink(lock_file, FIO_BACKUP_HOST); + if (res != 0 && errno != ENOENT) + elog(WARNING, "%s: %s", lock_file, strerror(errno)); + } + + parray_walk(lock_files, pfree); + parray_free(lock_files); + lock_files = NULL; +} + +/* + * Read backup meta information from BACKUP_CONTROL_FILE. + * If no backup matches, return NULL. + */ +pgBackup * +read_backup(const char *root_dir) +{ + char conf_path[MAXPGPATH]; + + join_path_components(conf_path, root_dir, BACKUP_CONTROL_FILE); + + return readBackupControlFile(conf_path); +} + +/* + * Save the backup status into BACKUP_CONTROL_FILE. + * + * We need to reread the backup using its ID and save it changing only its + * status. + */ +void +write_backup_status(pgBackup *backup, BackupStatus status, + const char *instance_name, bool strict) +{ + pgBackup *tmp; + + tmp = read_backup(backup->root_dir); + if (!tmp) + { + /* + * Silently exit the function, since read_backup already logged the + * warning message. + */ + return; + } + + backup->status = status; + tmp->status = backup->status; + tmp->root_dir = pgut_strdup(backup->root_dir); + + write_backup(tmp, strict); + + pgBackupFree(tmp); +} + +/* + * Create exclusive lockfile in the backup's directory. + */ +bool +lock_backup(pgBackup *backup, bool strict) +{ + char lock_file[MAXPGPATH]; + int fd; + char buffer[MAXPGPATH * 2 + 256]; + int ntries; + int len; + int encoded_pid; + pid_t my_pid, + my_p_pid; + + join_path_components(lock_file, backup->root_dir, BACKUP_CATALOG_PID); + + /* + * If the PID in the lockfile is our own PID or our parent's or + * grandparent's PID, then the file must be stale (probably left over from + * a previous system boot cycle). We need to check this because of the + * likelihood that a reboot will assign exactly the same PID as we had in + * the previous reboot, or one that's only one or two counts larger and + * hence the lockfile's PID now refers to an ancestor shell process. We + * allow pg_ctl to pass down its parent shell PID (our grandparent PID) + * via the environment variable PG_GRANDPARENT_PID; this is so that + * launching the postmaster via pg_ctl can be just as reliable as + * launching it directly. There is no provision for detecting + * further-removed ancestor processes, but if the init script is written + * carefully then all but the immediate parent shell will be root-owned + * processes and so the kill test will fail with EPERM. Note that we + * cannot get a false negative this way, because an existing postmaster + * would surely never launch a competing postmaster or pg_ctl process + * directly. + */ + my_pid = getpid(); +#ifndef WIN32 + my_p_pid = getppid(); +#else + + /* + * Windows hasn't got getppid(), but doesn't need it since it's not using + * real kill() either... + */ + my_p_pid = 0; +#endif + + /* + * We need a loop here because of race conditions. But don't loop forever + * (for example, a non-writable $backup_instance_path directory might cause a failure + * that won't go away). 100 tries seems like plenty. + */ + for (ntries = 0;; ntries++) + { + /* + * Try to create the lock file --- O_EXCL makes this atomic. + * + * Think not to make the file protection weaker than 0600. See + * comments below. + */ + fd = fio_open(lock_file, O_RDWR | O_CREAT | O_EXCL, FIO_BACKUP_HOST); + if (fd >= 0) + break; /* Success; exit the retry loop */ + + /* + * Couldn't create the pid file. Probably it already exists. + */ + if ((errno != EEXIST && errno != EACCES) || ntries > 100) + elog(ERROR, "Could not create lock file \"%s\": %s", + lock_file, strerror(errno)); + + /* + * Read the file to get the old owner's PID. Note race condition + * here: file might have been deleted since we tried to create it. + */ + fd = fio_open(lock_file, O_RDONLY, FIO_BACKUP_HOST); + if (fd < 0) + { + if (errno == ENOENT) + continue; /* race condition; try again */ + elog(ERROR, "Could not open lock file \"%s\": %s", + lock_file, strerror(errno)); + } + if ((len = fio_read(fd, buffer, sizeof(buffer) - 1)) < 0) + elog(ERROR, "Could not read lock file \"%s\": %s", + lock_file, strerror(errno)); + fio_close(fd); + + if (len == 0) + elog(ERROR, "Lock file \"%s\" is empty", lock_file); + + buffer[len] = '\0'; + encoded_pid = atoi(buffer); + + if (encoded_pid <= 0) + elog(ERROR, "Bogus data in lock file \"%s\": \"%s\"", + lock_file, buffer); + + /* + * Check to see if the other process still exists + * + * Per discussion above, my_pid, my_p_pid can be + * ignored as false matches. + * + * Normally kill() will fail with ESRCH if the given PID doesn't + * exist. + */ + if (encoded_pid != my_pid && encoded_pid != my_p_pid) + { + if (kill(encoded_pid, 0) == 0) + { + elog(WARNING, "Process %d is using backup %s and still is running", + encoded_pid, base36enc(backup->start_time)); + return false; + } + else + { + if (errno == ESRCH) + elog(WARNING, "Process %d which used backup %s no longer exists", + encoded_pid, base36enc(backup->start_time)); + else + elog(ERROR, "Failed to send signal 0 to a process %d: %s", + encoded_pid, strerror(errno)); + } + } + + /* + * Looks like nobody's home. Unlink the file and try again to create + * it. Need a loop because of possible race condition against other + * would-be creators. + */ + if (fio_unlink(lock_file, FIO_BACKUP_HOST) < 0) + elog(ERROR, "Could not remove old lock file \"%s\": %s", + lock_file, strerror(errno)); + } + + /* + * Successfully created the file, now fill it. + */ + snprintf(buffer, sizeof(buffer), "%d\n", my_pid); + + errno = 0; + if (fio_write(fd, buffer, strlen(buffer)) != strlen(buffer)) + { + int save_errno = errno; + + fio_close(fd); + fio_unlink(lock_file, FIO_BACKUP_HOST); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + /* In lax mode if we failed to grab lock because of 'out of space error', + * then treat backup as locked. + * Only delete command should be run in lax mode. + */ + if (!strict && errno == ENOSPC) + return true; + + elog(ERROR, "Could not write lock file \"%s\": %s", + lock_file, strerror(errno)); + } + if (fio_flush(fd) != 0) + { + int save_errno = errno; + + fio_close(fd); + fio_unlink(lock_file, FIO_BACKUP_HOST); + errno = save_errno; + elog(ERROR, "Could not write lock file \"%s\": %s", + lock_file, strerror(errno)); + } + if (fio_close(fd) != 0) + { + int save_errno = errno; + + fio_unlink(lock_file, FIO_BACKUP_HOST); + errno = save_errno; + elog(ERROR, "Could not write lock file \"%s\": %s", + lock_file, strerror(errno)); + } + + /* + * Arrange to unlink the lock file(s) at proc_exit. + */ + if (!exit_hook_registered) + { + atexit(unlink_lock_atexit); + exit_hook_registered = true; + } + + /* Use parray so that the lock files are unlinked in a loop */ + if (lock_files == NULL) + lock_files = parray_new(); + parray_append(lock_files, pgut_strdup(lock_file)); + + return true; +} + +/* + * Get backup_mode in string representation. + */ +const char * +pgBackupGetBackupMode(pgBackup *backup) +{ + return backupModes[backup->backup_mode]; +} + +static bool +IsDir(const char *dirpath, const char *entry, fio_location location) +{ + char path[MAXPGPATH]; + struct stat st; + + snprintf(path, MAXPGPATH, "%s/%s", dirpath, entry); + + return fio_stat(path, &st, false, location) == 0 && S_ISDIR(st.st_mode); +} + +/* + * Create list of instances in given backup catalog. + * + * Returns parray of "InstanceConfig" structures, filled with + * actual config of each instance. + */ +parray * +catalog_get_instance_list(void) +{ + char path[MAXPGPATH]; + DIR *dir; + struct dirent *dent; + parray *instances; + + instances = parray_new(); + + /* open directory and list contents */ + join_path_components(path, backup_path, BACKUPS_DIR); + dir = opendir(path); + if (dir == NULL) + elog(ERROR, "Cannot open directory \"%s\": %s", + path, strerror(errno)); + + while (errno = 0, (dent = readdir(dir)) != NULL) + { + char child[MAXPGPATH]; + struct stat st; + InstanceConfig *instance; + + /* skip entries point current dir or parent dir */ + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + + join_path_components(child, path, dent->d_name); + + if (lstat(child, &st) == -1) + elog(ERROR, "Cannot stat file \"%s\": %s", + child, strerror(errno)); + + if (!S_ISDIR(st.st_mode)) + continue; + + instance = readInstanceConfigFile(dent->d_name); + + parray_append(instances, instance); + } + + /* TODO 3.0: switch to ERROR */ + if (parray_num(instances) == 0) + elog(WARNING, "This backup catalog contains no backup instances. Backup instance can be added via 'add-instance' command."); + + if (errno) + elog(ERROR, "Cannot read directory \"%s\": %s", + path, strerror(errno)); + + if (closedir(dir)) + elog(ERROR, "Cannot close directory \"%s\": %s", + path, strerror(errno)); + + return instances; +} + +/* + * Create list of backups. + * If 'requested_backup_id' is INVALID_BACKUP_ID, return list of all backups. + * The list is sorted in order of descending start time. + * If valid backup id is passed only matching backup will be added to the list. + */ +parray * +catalog_get_backup_list(const char *instance_name, time_t requested_backup_id) +{ + DIR *data_dir = NULL; + struct dirent *data_ent = NULL; + parray *backups = NULL; + int i; + char backup_instance_path[MAXPGPATH]; + + sprintf(backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + + /* open backup instance backups directory */ + data_dir = fio_opendir(backup_instance_path, FIO_BACKUP_HOST); + if (data_dir == NULL) + { + elog(WARNING, "cannot open directory \"%s\": %s", backup_instance_path, + strerror(errno)); + goto err_proc; + } + + /* scan the directory and list backups */ + backups = parray_new(); + for (; (data_ent = fio_readdir(data_dir)) != NULL; errno = 0) + { + char backup_conf_path[MAXPGPATH]; + char data_path[MAXPGPATH]; + pgBackup *backup = NULL; + + /* skip not-directory entries and hidden entries */ + if (!IsDir(backup_instance_path, data_ent->d_name, FIO_BACKUP_HOST) + || data_ent->d_name[0] == '.') + continue; + + /* open subdirectory of specific backup */ + join_path_components(data_path, backup_instance_path, data_ent->d_name); + + /* read backup information from BACKUP_CONTROL_FILE */ + snprintf(backup_conf_path, MAXPGPATH, "%s/%s", data_path, BACKUP_CONTROL_FILE); + backup = readBackupControlFile(backup_conf_path); + + if (!backup) + { + backup = pgut_new(pgBackup); + pgBackupInit(backup); + backup->start_time = base36dec(data_ent->d_name); + } + else if (strcmp(base36enc(backup->start_time), data_ent->d_name) != 0) + { + elog(WARNING, "backup ID in control file \"%s\" doesn't match name of the backup folder \"%s\"", + base36enc(backup->start_time), backup_conf_path); + } + + backup->root_dir = pgut_strdup(data_path); + + backup->database_dir = (char *)pgut_malloc(MAXPGPATH); + join_path_components(backup->database_dir, backup->root_dir, DATABASE_DIR); + + /* Initialize page header map */ + init_header_map(backup); + + /* TODO: save encoded backup id */ + backup->backup_id = backup->start_time; + if (requested_backup_id != INVALID_BACKUP_ID + && requested_backup_id != backup->start_time) + { + pgBackupFree(backup); + continue; + } + parray_append(backups, backup); + + if (errno && errno != ENOENT) + { + elog(WARNING, "cannot read data directory \"%s\": %s", + data_ent->d_name, strerror(errno)); + goto err_proc; + } + } + if (errno) + { + elog(WARNING, "cannot read backup root directory \"%s\": %s", + backup_instance_path, strerror(errno)); + goto err_proc; + } + + fio_closedir(data_dir); + data_dir = NULL; + + parray_qsort(backups, pgBackupCompareIdDesc); + + /* Link incremental backups with their ancestors.*/ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *curr = (pgBackup *)parray_get(backups, i); + pgBackup **ancestor; + pgBackup key; + + if (curr->backup_mode == BACKUP_MODE_FULL) + continue; + + key.start_time = curr->parent_backup; + ancestor = (pgBackup **) parray_bsearch(backups, &key, + pgBackupCompareIdDesc); + if (ancestor) + curr->parent_backup_link = *ancestor; + } + + return backups; + +err_proc: + if (data_dir) + fio_closedir(data_dir); + if (backups) + parray_walk(backups, pgBackupFree); + parray_free(backups); + + elog(ERROR, "Failed to get backup list"); + + return NULL; +} + +/* + * Create list of backup datafiles. + * If 'requested_backup_id' is INVALID_BACKUP_ID, exit with error. + * If valid backup id is passed only matching backup will be added to the list. + * TODO this function only used once. Is it really needed? + */ +parray * +get_backup_filelist(pgBackup *backup, bool strict) +{ + parray *files = NULL; + char backup_filelist_path[MAXPGPATH]; + + join_path_components(backup_filelist_path, backup->root_dir, DATABASE_FILE_LIST); + files = dir_read_file_list(NULL, NULL, backup_filelist_path, FIO_BACKUP_HOST, backup->content_crc); + + /* redundant sanity? */ + if (!files) + elog(strict ? ERROR : WARNING, "Failed to get file list for backup %s", base36enc(backup->start_time)); + + return files; +} + +/* + * Lock list of backups. Function goes in backward direction. + */ +void +catalog_lock_backup_list(parray *backup_list, int from_idx, int to_idx, bool strict) +{ + int start_idx, + end_idx; + int i; + + if (parray_num(backup_list) == 0) + return; + + start_idx = Max(from_idx, to_idx); + end_idx = Min(from_idx, to_idx); + + for (i = start_idx; i >= end_idx; i--) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + if (!lock_backup(backup, strict)) + elog(ERROR, "Cannot lock backup %s directory", + base36enc(backup->start_time)); + } +} + +/* + * Find the latest valid child of latest valid FULL backup on given timeline + */ +pgBackup * +catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current_start_time) +{ + int i; + pgBackup *full_backup = NULL; + pgBackup *tmp_backup = NULL; + char *invalid_backup_id; + + /* backup_list is sorted in order of descending ID */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + if ((backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) && backup->tli == tli) + { + full_backup = backup; + break; + } + } + + /* Failed to find valid FULL backup to fulfill ancestor role */ + if (!full_backup) + return NULL; + + elog(LOG, "Latest valid FULL backup: %s", + base36enc(full_backup->start_time)); + + /* FULL backup is found, lets find his latest child */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* only valid descendants are acceptable for evaluation */ + if ((backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + switch (scan_parent_chain(backup, &tmp_backup)) + { + /* broken chain */ + case ChainIsBroken: + invalid_backup_id = base36enc_dup(tmp_backup->parent_backup); + + elog(WARNING, "Backup %s has missing parent: %s. Cannot be a parent", + base36enc(backup->start_time), invalid_backup_id); + pg_free(invalid_backup_id); + continue; + + /* chain is intact, but at least one parent is invalid */ + case ChainIsInvalid: + invalid_backup_id = base36enc_dup(tmp_backup->start_time); + + elog(WARNING, "Backup %s has invalid parent: %s. Cannot be a parent", + base36enc(backup->start_time), invalid_backup_id); + pg_free(invalid_backup_id); + continue; + + /* chain is ok */ + case ChainIsOk: + /* Yes, we could call is_parent() earlier - after choosing the ancestor, + * but this way we have an opportunity to detect and report all possible + * anomalies. + */ + if (is_parent(full_backup->start_time, backup, true)) + return backup; + } + } + /* skip yourself */ + else if (backup->start_time == current_start_time) + continue; + else + { + elog(WARNING, "Backup %s has status: %s. Cannot be a parent.", + base36enc(backup->start_time), status2str(backup->status)); + } + } + + return NULL; +} + +/* + * For multi-timeline chain, look up suitable parent for incremental backup. + * Multi-timeline chain has full backup and one or more descendants located + * on different timelines. + */ +pgBackup * +get_multi_timeline_parent(parray *backup_list, parray *tli_list, + TimeLineID current_tli, time_t current_start_time, + InstanceConfig *instance) +{ + int i; + timelineInfo *my_tlinfo = NULL; + timelineInfo *tmp_tlinfo = NULL; + pgBackup *ancestor_backup = NULL; + + /* there are no timelines in the archive */ + if (parray_num(tli_list) == 0) + return NULL; + + /* look for current timelineInfo */ + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); + + if (tlinfo->tli == current_tli) + { + my_tlinfo = tlinfo; + break; + } + } + + if (my_tlinfo == NULL) + return NULL; + + /* Locate tlinfo of suitable full backup. + * Consider this example: + * t3 s2-------X <-! We are here + * / + * t2 s1----D---*----E---> + * / + * t1--A--B--*---C-------> + * + * A, E - full backups + * B, C, D - incremental backups + * + * We must find A. + */ + tmp_tlinfo = my_tlinfo; + while (tmp_tlinfo->parent_link) + { + /* if timeline has backups, iterate over them */ + if (tmp_tlinfo->parent_link->backups) + { + for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i); + + if (backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) && + backup->stop_lsn <= tmp_tlinfo->switchpoint) + { + ancestor_backup = backup; + break; + } + } + } + + if (ancestor_backup) + break; + + tmp_tlinfo = tmp_tlinfo->parent_link; + } + + /* failed to find valid FULL backup on parent timelines */ + if (!ancestor_backup) + return NULL; + else + elog(LOG, "Latest valid full backup: %s, tli: %i", + base36enc(ancestor_backup->start_time), ancestor_backup->tli); + + /* At this point we found suitable full backup, + * now we must find his latest child, suitable to be + * parent of current incremental backup. + * Consider this example: + * t3 s2-------X <-! We are here + * / + * t2 s1----D---*----E---> + * / + * t1--A--B--*---C-------> + * + * A, E - full backups + * B, C, D - incremental backups + * + * We found A, now we must find D. + */ + + /* Optimistically, look on current timeline for valid incremental backup, child of ancestor */ + if (my_tlinfo->backups) + { + /* backups are sorted in descending order and we need latest valid */ + for (i = 0; i < parray_num(my_tlinfo->backups); i++) + { + pgBackup *tmp_backup = NULL; + pgBackup *backup = (pgBackup *) parray_get(my_tlinfo->backups, i); + + /* found suitable parent */ + if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk && + is_parent(ancestor_backup->start_time, backup, false)) + return backup; + } + } + + /* Iterate over parent timelines and look for a valid backup, child of ancestor */ + tmp_tlinfo = my_tlinfo; + while (tmp_tlinfo->parent_link) + { + + /* if timeline has backups, iterate over them */ + if (tmp_tlinfo->parent_link->backups) + { + for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++) + { + pgBackup *tmp_backup = NULL; + pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i); + + /* We are not interested in backups + * located outside of our timeline history + */ + if (backup->stop_lsn > tmp_tlinfo->switchpoint) + continue; + + if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk && + is_parent(ancestor_backup->start_time, backup, true)) + return backup; + } + } + + tmp_tlinfo = tmp_tlinfo->parent_link; + } + + return NULL; +} + +/* create backup directory in $BACKUP_PATH */ +int +pgBackupCreateDir(pgBackup *backup) +{ + int i; + char path[MAXPGPATH]; + parray *subdirs = parray_new(); + + parray_append(subdirs, pg_strdup(DATABASE_DIR)); + + /* Add external dirs containers */ + if (backup->external_dir_str) + { + parray *external_list; + + external_list = make_external_directory_list(backup->external_dir_str, + false); + for (i = 0; i < parray_num(external_list); i++) + { + char temp[MAXPGPATH]; + /* Numeration of externaldirs starts with 1 */ + makeExternalDirPathByNum(temp, EXTERNAL_DIR, i+1); + parray_append(subdirs, pg_strdup(temp)); + } + free_dir_list(external_list); + } + + pgBackupGetPath(backup, path, lengthof(path), NULL); + + if (!dir_is_empty(path, FIO_BACKUP_HOST)) + elog(ERROR, "backup destination is not empty \"%s\"", path); + + fio_mkdir(path, DIR_PERMISSION, FIO_BACKUP_HOST); + backup->root_dir = pgut_strdup(path); + + backup->database_dir = (char *)pgut_malloc(MAXPGPATH); + join_path_components(backup->database_dir, backup->root_dir, DATABASE_DIR); + + /* block header map */ + init_header_map(backup); + + /* create directories for actual backup files */ + for (i = 0; i < parray_num(subdirs); i++) + { + join_path_components(path, backup->root_dir, (const char *)parray_get(subdirs, i)); + fio_mkdir(path, DIR_PERMISSION, FIO_BACKUP_HOST); + } + + free_dir_list(subdirs); + return 0; +} + +/* + * Create list of timelines. + * TODO: '.partial' and '.part' segno information should be added to tlinfo. + */ +parray * +catalog_get_timelines(InstanceConfig *instance) +{ + int i,j,k; + parray *xlog_files_list = parray_new(); + parray *timelineinfos; + parray *backups; + timelineInfo *tlinfo; + char arclog_path[MAXPGPATH]; + + /* for fancy reporting */ + char begin_segno_str[MAXFNAMELEN]; + char end_segno_str[MAXFNAMELEN]; + + /* read all xlog files that belong to this archive */ + sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance->name); + dir_list_file(xlog_files_list, arclog_path, false, false, false, false, true, 0, FIO_BACKUP_HOST); + parray_qsort(xlog_files_list, pgFileCompareName); + + timelineinfos = parray_new(); + tlinfo = NULL; + + /* walk through files and collect info about timelines */ + for (i = 0; i < parray_num(xlog_files_list); i++) + { + pgFile *file = (pgFile *) parray_get(xlog_files_list, i); + TimeLineID tli; + parray *timelines; + xlogFile *wal_file = NULL; + + /* + * Regular WAL file. + * IsXLogFileName() cannot be used here + */ + if (strspn(file->name, "0123456789ABCDEF") == XLOG_FNAME_LEN) + { + int result = 0; + uint32 log, seg; + XLogSegNo segno = 0; + char suffix[MAXFNAMELEN]; + + result = sscanf(file->name, "%08X%08X%08X.%s", + &tli, &log, &seg, (char *) &suffix); + + /* sanity */ + if (result < 3) + { + elog(WARNING, "unexpected WAL file name \"%s\"", file->name); + continue; + } + + /* get segno from log */ + GetXLogSegNoFromScrath(segno, log, seg, instance->xlog_seg_size); + + /* regular WAL file with suffix */ + if (result == 4) + { + /* backup history file. Currently we don't use them */ + if (IsBackupHistoryFileName(file->name)) + { + elog(VERBOSE, "backup history file \"%s\"", file->name); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + + /* append file to xlog file list */ + wal_file = (xlogFile *)palloc(sizeof(xlogFile)); + //wal_file->file = *file; + memcpy(&wal_file->file, file, sizeof(wal_file->file)); + wal_file->segno = segno; + wal_file->type = BACKUP_HISTORY_FILE; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + continue; + } + /* partial WAL segment */ + else if (IsPartialXLogFileName(file->name) || + IsPartialCompressXLogFileName(file->name)) + { + elog(VERBOSE, "partial WAL file \"%s\"", file->name); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + + /* append file to xlog file list */ + wal_file = (xlogFile *)palloc(sizeof(xlogFile)); + //wal_file->file = *file; + memcpy(&wal_file->file, file, sizeof(wal_file->file)); + wal_file->segno = segno; + wal_file->type = PARTIAL_SEGMENT; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + continue; + } + /* temp WAL segment */ + else if (IsTempXLogFileName(file->name) || + IsTempCompressXLogFileName(file->name)) + { + elog(VERBOSE, "temp WAL file \"%s\"", file->name); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + + /* append file to xlog file list */ + wal_file = (xlogFile *)palloc(sizeof(xlogFile)); + //wal_file->file = *file; + memcpy(&wal_file->file, file, sizeof(wal_file->file)); + wal_file->segno = segno; + wal_file->type = TEMP_SEGMENT; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + continue; + } + /* we only expect compressed wal files with .gz suffix */ + else if (strcmp(suffix, "gz") != 0) + { + elog(WARNING, "unexpected WAL file name \"%s\"", file->name); + continue; + } + } + + /* new file belongs to new timeline */ + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + /* + * As it is impossible to detect if segments before segno are lost, + * or just do not exist, do not report them as lost. + */ + else if (tlinfo->n_xlog_files != 0) + { + /* check, if segments are consequent */ + XLogSegNo expected_segno = tlinfo->end_segno + 1; + + /* + * Some segments are missing. remember them in lost_segments to report. + * Normally we expect that segment numbers form an increasing sequence, + * though it's legal to find two files with equal segno in case there + * are both compressed and non-compessed versions. For example + * 000000010000000000000002 and 000000010000000000000002.gz + * + */ + if (segno != expected_segno && segno != tlinfo->end_segno) + { + xlogInterval *interval = (xlogInterval *)palloc(sizeof(xlogInterval));; + interval->begin_segno = expected_segno; + interval->end_segno = segno - 1; + + if (tlinfo->lost_segments == NULL) + tlinfo->lost_segments = parray_new(); + + parray_append(tlinfo->lost_segments, interval); + } + } + + if (tlinfo->begin_segno == 0) + tlinfo->begin_segno = segno; + + /* this file is the last for this timeline so far */ + tlinfo->end_segno = segno; + /* update counters */ + tlinfo->n_xlog_files++; + tlinfo->size += file->size; + + /* append file to xlog file list */ + wal_file = (xlogFile *)palloc(sizeof(xlogFile)); + //wal_file->file = *file; + memcpy(&wal_file->file, file, sizeof(wal_file->file)); + wal_file->segno = segno; + wal_file->type = SEGMENT; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + } +#ifdef SUPPORT_MULTI_TIMELINE + /* timeline history file */ + else if (IsTLHistoryFileName(file->name)) + { + TimeLineHistoryEntry *tln; + + sscanf(file->name, "%08X.history", &tli); + timelines = read_timeline_history(arclog_path, tli, true); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + /* + * 1 is the latest timeline in the timelines list. + * 0 - is our timeline, which is of no interest here + */ + tln = (TimeLineHistoryEntry *) parray_get(timelines, 1); + tlinfo->switchpoint = tln->end; + tlinfo->parent_tli = tln->tli; + + /* find parent timeline to link it with this one */ + for (j = 0; j < parray_num(timelineinfos); j++) + { + timelineInfo *cur = (timelineInfo *) parray_get(timelineinfos, j); + if (cur->tli == tlinfo->parent_tli) + { + tlinfo->parent_link = cur; + break; + } + } + } + + parray_walk(timelines, pfree); + parray_free(timelines); + } +#endif + else + elog(WARNING, "unexpected WAL file name \"%s\"", file->name); + } + + /* save information about backups belonging to each timeline */ + backups = catalog_get_backup_list(instance->name, INVALID_BACKUP_ID); + + for (i = 0; i < parray_num(timelineinfos); i++) + { + timelineInfo *tlinfo = (timelineInfo *)parray_get(timelineinfos, i); + for (j = 0; j < parray_num(backups); j++) + { + pgBackup *backup = (pgBackup *)parray_get(backups, j); + if (tlinfo->tli == backup->tli) + { + if (tlinfo->backups == NULL) + tlinfo->backups = parray_new(); + + parray_append(tlinfo->backups, backup); + } + } + } + + /* determine oldest backup and closest backup for every timeline */ + for (i = 0; i < parray_num(timelineinfos); i++) + { + timelineInfo *tlinfo = (timelineInfo *)parray_get(timelineinfos, i); + + tlinfo->oldest_backup = get_oldest_backup(tlinfo); + tlinfo->closest_backup = get_closest_backup(tlinfo); + } + + /* determine which WAL segments must be kept because of wal retention */ + if (instance->wal_depth <= 0) + return timelineinfos; + + /* + * WAL retention for now is fairly simple. + * User can set only one parameter - 'wal-depth'. + * It determines how many latest valid(!) backups on timeline + * must have an ability to perform PITR: + * Consider the example: + * + * ---B1-------B2-------B3-------B4--------> WAL timeline1 + * + * If 'wal-depth' is set to 2, then WAL purge should produce the following result: + * + * B1 B2 B3-------B4--------> WAL timeline1 + * + * Only valid backup can satisfy 'wal-depth' condition, so if B3 is not OK or DONE, + * then WAL purge should produce the following result: + * B1 B2-------B3-------B4--------> WAL timeline1 + * + * Complicated cases, such as branched timelines are taken into account. + * wal-depth is applied to each timeline independently: + * + * |---------> WAL timeline2 + * ---B1---|---B2-------B3-------B4--------> WAL timeline1 + * + * after WAL purge with wal-depth=2: + * + * |---------> WAL timeline2 + * B1---| B2 B3-------B4--------> WAL timeline1 + * + * In this example WAL retention prevents purge of WAL required by tli2 + * to stay reachable from backup B on tli1. + * + * To protect WAL from purge we try to set 'anchor_lsn' and 'anchor_tli' in every timeline. + * They are usually comes from 'start-lsn' and 'tli' attributes of backup + * calculated by 'wal-depth' parameter. + * With 'wal-depth=2' anchor_backup in tli1 is B3. + + * If timeline has not enough valid backups to satisfy 'wal-depth' condition, + * then 'anchor_lsn' and 'anchor_tli' taken from from 'start-lsn' and 'tli + * attribute of closest_backup. + * The interval of WAL starting from closest_backup to switchpoint is + * saved into 'keep_segments' attribute. + * If there is several intermediate timelines between timeline and its closest_backup + * then on every intermediate timeline WAL interval between switchpoint + * and starting segment is placed in 'keep_segments' attributes: + * + * |---------> WAL timeline3 + * |------| B5-----B6--> WAL timeline2 + * B1---| B2 B3-------B4------------> WAL timeline1 + * + * On timeline where closest_backup is located the WAL interval between + * closest_backup and switchpoint is placed into 'keep_segments'. + * If timeline has no 'closest_backup', then 'wal-depth' rules cannot be applied + * to this timeline and its WAL must be purged by following the basic rules of WAL purging. + * + * Third part is handling of ARCHIVE backups. + * If B1 and B2 have ARCHIVE wal-mode, then we must preserve WAL intervals + * between start_lsn and stop_lsn for each of them in 'keep_segments'. + */ + + /* determine anchor_lsn and keep_segments for every timeline */ + for (i = 0; i < parray_num(timelineinfos); i++) + { + int count = 0; + timelineInfo *tlinfo = (timelineInfo *)parray_get(timelineinfos, i); + + /* + * Iterate backward on backups belonging to this timeline to find + * anchor_backup. NOTE Here we rely on the fact that backups list + * is ordered by start_lsn DESC. + */ + if (tlinfo->backups) + { + for (j = 0; j < parray_num(tlinfo->backups); j++) + { + pgBackup *backup = (pgBackup *)parray_get(tlinfo->backups, j); + + /* sanity */ + if (XLogRecPtrIsInvalid(backup->start_lsn) || + backup->tli <= 0) + continue; + + /* skip invalid backups */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE) + continue; + + /* + * Pinned backups should be ignored for the + * purpose of retention fulfillment, so skip them. + */ + if (backup->expire_time > 0 && + backup->expire_time > current_time) + { + elog(LOG, "Pinned backup %s is ignored for the " + "purpose of WAL retention", + base36enc(backup->start_time)); + continue; + } + + count++; + + if (count == instance->wal_depth) + { + elog(LOG, "On timeline %i WAL is protected from purge at %X/%X", + tlinfo->tli, + (uint32) (backup->start_lsn >> 32), + (uint32) (backup->start_lsn)); + + tlinfo->anchor_lsn = backup->start_lsn; + tlinfo->anchor_tli = backup->tli; + break; + } + } + } + + /* + * Failed to find anchor backup for this timeline. + * We cannot just thrown it to the wolves, because by + * doing that we will violate our own guarantees. + * So check the existence of closest_backup for + * this timeline. If there is one, then + * set the 'anchor_lsn' and 'anchor_tli' to closest_backup + * 'start-lsn' and 'tli' respectively. + * |-------------B5----------> WAL timeline3 + * |-----|-------------------------> WAL timeline2 + * B1 B2---| B3 B4-------B6-----> WAL timeline1 + * + * wal-depth=2 + * + * If number of valid backups on timelines is less than 'wal-depth' + * then timeline must(!) stay reachable via parent timelines if any. + * If closest_backup is not available, then general WAL purge rules + * are applied. + */ + if (XLogRecPtrIsInvalid(tlinfo->anchor_lsn)) + { + /* + * Failed to find anchor_lsn in our own timeline. + * Consider the case: + * -------------------------------------> tli5 + * ----------------------------B4-------> tli4 + * S3`--------------> tli3 + * S1`------------S3---B3-------B6-> tli2 + * B1---S1-------------B2--------B5-----> tli1 + * + * B* - backups + * S* - switchpoints + * wal-depth=2 + * + * Expected result: + * TLI5 will be purged entirely + * B4-------> tli4 + * S2`--------------> tli3 + * S1`------------S2 B3-------B6-> tli2 + * B1---S1 B2--------B5-----> tli1 + */ + pgBackup *closest_backup = NULL; + xlogInterval *interval = NULL; + TimeLineID tli = 0; + /* check if tli has closest_backup */ + if (!tlinfo->closest_backup) + /* timeline has no closest_backup, wal retention cannot be + * applied to this timeline. + * Timeline will be purged up to oldest_backup if any or + * purge entirely if there is none. + * In example above: tli5 and tli4. + */ + continue; + + /* sanity for closest_backup */ + if (XLogRecPtrIsInvalid(tlinfo->closest_backup->start_lsn) || + tlinfo->closest_backup->tli <= 0) + continue; + + /* + * Set anchor_lsn and anchor_tli to protect whole timeline from purge + * In the example above: tli3. + */ + tlinfo->anchor_lsn = tlinfo->closest_backup->start_lsn; + tlinfo->anchor_tli = tlinfo->closest_backup->tli; + + /* closest backup may be located not in parent timeline */ + closest_backup = tlinfo->closest_backup; + + tli = tlinfo->tli; + + /* + * Iterate over parent timeline chain and + * look for timeline where closest_backup belong + */ + while (tlinfo->parent_link) + { + /* In case of intermediate timeline save to keep_segments + * begin_segno and switchpoint segment. + * In case of final timelines save to keep_segments + * closest_backup start_lsn segment and switchpoint segment. + */ + XLogRecPtr switchpoint = tlinfo->switchpoint; + + tlinfo = tlinfo->parent_link; + + if (tlinfo->keep_segments == NULL) + tlinfo->keep_segments = parray_new(); + + /* in any case, switchpoint segment must be added to interval */ + interval = (xlogInterval *)palloc(sizeof(xlogInterval)); + GetXLogSegNo(switchpoint, interval->end_segno, instance->xlog_seg_size); + + /* Save [S1`, S2] to keep_segments */ + if (tlinfo->tli != closest_backup->tli) + interval->begin_segno = tlinfo->begin_segno; + /* Save [B1, S1] to keep_segments */ + else + GetXLogSegNo(closest_backup->start_lsn, interval->begin_segno, instance->xlog_seg_size); + + /* + * TODO: check, maybe this interval is already here or + * covered by other larger interval. + */ + + GetXLogFileName(begin_segno_str, tlinfo->tli, interval->begin_segno, instance->xlog_seg_size); + GetXLogFileName(end_segno_str, tlinfo->tli, interval->end_segno, instance->xlog_seg_size); + + elog(LOG, "Timeline %i to stay reachable from timeline %i " + "protect from purge WAL interval between " + "%s and %s on timeline %i", + tli, closest_backup->tli, begin_segno_str, + end_segno_str, tlinfo->tli); + + parray_append(tlinfo->keep_segments, interval); + continue; + } + continue; + } + + /* Iterate over backups left */ + for (j = count; j < parray_num(tlinfo->backups); j++) + { + XLogSegNo segno = 0; + xlogInterval *interval = NULL; + pgBackup *backup = (pgBackup *)parray_get(tlinfo->backups, j); + + /* + * We must calculate keep_segments intervals for ARCHIVE backups + * with start_lsn less than anchor_lsn. + */ + + /* STREAM backups cannot contribute to keep_segments */ + if (backup->stream) + continue; + + /* sanity */ + if (XLogRecPtrIsInvalid(backup->start_lsn) || + backup->tli <= 0) + continue; + + /* no point in clogging keep_segments by backups protected by anchor_lsn */ + if (backup->start_lsn >= tlinfo->anchor_lsn) + continue; + + /* append interval to keep_segments */ + interval = (xlogInterval *)palloc(sizeof(xlogInterval)); + GetXLogSegNo(backup->start_lsn, segno, instance->xlog_seg_size); + interval->begin_segno = segno; + GetXLogSegNo(backup->stop_lsn, segno, instance->xlog_seg_size); + + interval->end_segno = segno; + + GetXLogFileName(begin_segno_str, tlinfo->tli, interval->begin_segno, instance->xlog_seg_size); + GetXLogFileName(end_segno_str, tlinfo->tli, interval->end_segno, instance->xlog_seg_size); + + elog(LOG, "Archive backup %s to stay consistent " + "protect from purge WAL interval " + "between %s and %s on timeline %i", + base36enc(backup->start_time), + begin_segno_str, end_segno_str, backup->tli); + + if (tlinfo->keep_segments == NULL) + tlinfo->keep_segments = parray_new(); + + parray_append(tlinfo->keep_segments, interval); + } + } + + /* + * Protect WAL segments from deletion by setting 'keep' flag. + * We must keep all WAL segments after anchor_lsn (including), and also segments + * required by ARCHIVE backups for consistency - WAL between [start_lsn, stop_lsn]. + */ + for (i = 0; i < parray_num(timelineinfos); i++) + { + XLogSegNo anchor_segno = 0; + timelineInfo *tlinfo = (timelineInfo *)parray_get(timelineinfos, i); + + /* + * At this point invalid anchor_lsn can be only in one case: + * timeline is going to be purged by regular WAL purge rules. + */ + if (XLogRecPtrIsInvalid(tlinfo->anchor_lsn)) + continue; + + /* + * anchor_lsn is located in another timeline, it means that the timeline + * will be protected from purge entirely. + */ + if (tlinfo->anchor_tli > 0 && tlinfo->anchor_tli != tlinfo->tli) + continue; + + GetXLogSegNo(tlinfo->anchor_lsn, anchor_segno, instance->xlog_seg_size); + + for (j = 0; j < parray_num(tlinfo->xlog_filelist); j++) + { + xlogFile *wal_file = (xlogFile *) parray_get(tlinfo->xlog_filelist, j); + + if (wal_file->segno >= anchor_segno) + { + wal_file->keep = true; + continue; + } + + /* no keep segments */ + if (!tlinfo->keep_segments) + continue; + + /* Protect segments belonging to one of the keep invervals */ + for (k = 0; k < parray_num(tlinfo->keep_segments); k++) + { + xlogInterval *keep_segments = (xlogInterval *) parray_get(tlinfo->keep_segments, k); + + if ((wal_file->segno >= keep_segments->begin_segno) && + wal_file->segno <= keep_segments->end_segno) + { + wal_file->keep = true; + break; + } + } + } + } + + return timelineinfos; +} + +/* + * Iterate over parent timelines and look for valid backup + * closest to given timeline switchpoint. + * + * If such backup doesn't exist, it means that + * timeline is unreachable. Return NULL. + */ +pgBackup* +get_closest_backup(timelineInfo *tlinfo) +{ + pgBackup *closest_backup = NULL; + int i; + + /* + * Iterate over backups belonging to parent timelines + * and look for candidates. + */ + while (tlinfo->parent_link && !closest_backup) + { + parray *backup_list = tlinfo->parent_link->backups; + if (backup_list != NULL) + { + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *)parray_get(backup_list, i); + + /* + * Only valid backups made before switchpoint + * should be considered. + */ + if (!XLogRecPtrIsInvalid(backup->stop_lsn) && + XRecOffIsValid(backup->stop_lsn) && + backup->stop_lsn <= tlinfo->switchpoint && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + /* Check if backup is closer to switchpoint than current candidate */ + if (!closest_backup || backup->stop_lsn > closest_backup->stop_lsn) + closest_backup = backup; + } + } + } + + /* Continue with parent */ + tlinfo = tlinfo->parent_link; + } + + return closest_backup; +} + +/* + * Find oldest backup in given timeline + * to determine what WAL segments of this timeline + * are reachable from backups belonging to it. + * + * If such backup doesn't exist, it means that + * there is no backups on this timeline. Return NULL. + */ +pgBackup* +get_oldest_backup(timelineInfo *tlinfo) +{ + pgBackup *oldest_backup = NULL; + int i; + parray *backup_list = tlinfo->backups; + + if (backup_list != NULL) + { + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *)parray_get(backup_list, i); + + /* Backups with invalid START LSN can be safely skipped */ + if (XLogRecPtrIsInvalid(backup->start_lsn) || + !XRecOffIsValid(backup->start_lsn)) + continue; + + /* + * Check if backup is older than current candidate. + * Here we use start_lsn for comparison, because backup that + * started earlier needs more WAL. + */ + if (!oldest_backup || backup->start_lsn < oldest_backup->start_lsn) + oldest_backup = backup; + } + } + + return oldest_backup; +} + +/* + * Overwrite backup metadata. + */ +void +do_set_backup(const char *instance_name, time_t backup_id, + pgSetBackupParams *set_backup_params) +{ + pgBackup *target_backup = NULL; + parray *backup_list = NULL; + + if (!set_backup_params) + elog(ERROR, "Nothing to set by 'set-backup' command"); + + backup_list = catalog_get_backup_list(instance_name, backup_id); + if (parray_num(backup_list) != 1) + elog(ERROR, "Failed to find backup %s", base36enc(backup_id)); + + target_backup = (pgBackup *) parray_get(backup_list, 0); + + /* Pin or unpin backup if requested */ + if (set_backup_params->ttl >= 0 || set_backup_params->expire_time > 0) + pin_backup(target_backup, set_backup_params); + + if (set_backup_params->note) + add_note(target_backup, set_backup_params->note); +} + +/* + * Set 'expire-time' attribute based on set_backup_params, or unpin backup + * if ttl is equal to zero. + */ +void +pin_backup(pgBackup *target_backup, pgSetBackupParams *set_backup_params) +{ + + /* sanity, backup must have positive recovery-time */ + if (target_backup->recovery_time <= 0) + elog(ERROR, "Failed to set 'expire-time' for backup %s: invalid 'recovery-time'", + base36enc(target_backup->backup_id)); + + /* Pin comes from ttl */ + if (set_backup_params->ttl > 0) + target_backup->expire_time = target_backup->recovery_time + set_backup_params->ttl; + /* Unpin backup */ + else if (set_backup_params->ttl == 0) + { + /* If backup was not pinned in the first place, + * then there is nothing to unpin. + */ + if (target_backup->expire_time == 0) + { + elog(WARNING, "Backup %s is not pinned, nothing to unpin", + base36enc(target_backup->start_time)); + return; + } + target_backup->expire_time = 0; + } + /* Pin comes from expire-time */ + else if (set_backup_params->expire_time > 0) + target_backup->expire_time = set_backup_params->expire_time; + else + /* nothing to do */ + return; + + /* Update backup.control */ + write_backup(target_backup, true); + + if (set_backup_params->ttl > 0 || set_backup_params->expire_time > 0) + { + char expire_timestamp[100]; + + time2iso(expire_timestamp, lengthof(expire_timestamp), target_backup->expire_time); + elog(INFO, "Backup %s is pinned until '%s'", base36enc(target_backup->start_time), + expire_timestamp); + } + else + elog(INFO, "Backup %s is unpinned", base36enc(target_backup->start_time)); + + return; +} + +/* + * Add note to backup metadata or unset already existing note. + * It is a job of the caller to make sure that note is not NULL. + */ +void +add_note(pgBackup *target_backup, char *note) +{ + + char *note_string; + + /* unset note */ + if (pg_strcasecmp(note, "none") == 0) + { + target_backup->note = NULL; + elog(INFO, "Removing note from backup %s", + base36enc(target_backup->start_time)); + } + else + { + /* Currently we do not allow string with newlines as note, + * because it will break parsing of backup.control. + * So if user provides string like this "aaa\nbbbbb", + * we save only "aaa" + * Example: tests.set_backup.SetBackupTest.test_add_note_newlines + */ + note_string = (char *)pgut_malloc(MAX_NOTE_SIZE); + sscanf(note, "%[^\n]", note_string); + + target_backup->note = note_string; + elog(INFO, "Adding note to backup %s: '%s'", + base36enc(target_backup->start_time), target_backup->note); + } + + /* Update backup.control */ + write_backup(target_backup, true); +} + +/* + * Write information about backup.in to stream "out". + */ +void +pgBackupWriteControl(FILE *out, pgBackup *backup) +{ + char timestamp[100]; + + fio_fprintf(out, "#Configuration\n"); + fio_fprintf(out, "backup-mode = %s\n", pgBackupGetBackupMode(backup)); + fio_fprintf(out, "stream = %s\n", backup->stream ? "true" : "false"); + fio_fprintf(out, "compress-alg = %s\n", + deparse_compress_alg(backup->compress_alg)); + fio_fprintf(out, "compress-level = %d\n", backup->compress_level); + + fio_fprintf(out, "\n#Compatibility\n"); + fio_fprintf(out, "block-size = %u\n", backup->block_size); + fio_fprintf(out, "xlog-block-size = %u\n", backup->wal_block_size); + fio_fprintf(out, "checksum-version = %u\n", backup->checksum_version); + if (backup->program_version[0] != '\0') + fio_fprintf(out, "program-version = %s\n", backup->program_version); + if (backup->server_version[0] != '\0') + fio_fprintf(out, "server-version = %s\n", backup->server_version); + + fio_fprintf(out, "\n#Result backup info\n"); + fio_fprintf(out, "timelineid = %d\n", backup->tli); + /* LSN returned by pg_start_backup */ + fio_fprintf(out, "start-lsn = %X/%X\n", + (uint32) (backup->start_lsn >> 32), + (uint32) backup->start_lsn); + /* LSN returned by pg_stop_backup */ + fio_fprintf(out, "stop-lsn = %X/%X\n", + (uint32) (backup->stop_lsn >> 32), + (uint32) backup->stop_lsn); + + time2iso(timestamp, lengthof(timestamp), backup->start_time); + fio_fprintf(out, "start-time = '%s'\n", timestamp); + if (backup->merge_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->merge_time); + fio_fprintf(out, "merge-time = '%s'\n", timestamp); + } + if (backup->end_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->end_time); + fio_fprintf(out, "end-time = '%s'\n", timestamp); + } + fio_fprintf(out, "recovery-xid = " XID_FMT "\n", backup->recovery_xid); + if (backup->recovery_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->recovery_time); + fio_fprintf(out, "recovery-time = '%s'\n", timestamp); + } + if (backup->expire_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->expire_time); + fio_fprintf(out, "expire-time = '%s'\n", timestamp); + } + + if (backup->merge_dest_backup != 0) + fio_fprintf(out, "merge-dest-id = '%s'\n", base36enc(backup->merge_dest_backup)); + + /* + * Size of PGDATA directory. The size does not include size of related + * WAL segments in archive 'wal' directory. + */ + if (backup->data_bytes != BYTES_INVALID) + fio_fprintf(out, "data-bytes = " INT64_FORMAT "\n", backup->data_bytes); + + if (backup->wal_bytes != BYTES_INVALID) + fio_fprintf(out, "wal-bytes = " INT64_FORMAT "\n", backup->wal_bytes); + + if (backup->uncompressed_bytes >= 0) + fio_fprintf(out, "uncompressed-bytes = " INT64_FORMAT "\n", backup->uncompressed_bytes); + + if (backup->pgdata_bytes >= 0) + fio_fprintf(out, "pgdata-bytes = " INT64_FORMAT "\n", backup->pgdata_bytes); + + fio_fprintf(out, "status = %s\n", status2str(backup->status)); + + /* 'parent_backup' is set if it is incremental backup */ + if (backup->parent_backup != 0) + fio_fprintf(out, "parent-backup-id = '%s'\n", base36enc(backup->parent_backup)); + + /* print external directories list */ + if (backup->external_dir_str) + fio_fprintf(out, "external-dirs = '%s'\n", backup->external_dir_str); + + if (backup->note) + fio_fprintf(out, "note = '%s'\n", backup->note); + + if (backup->content_crc != 0) + fio_fprintf(out, "content-crc = %u\n", backup->content_crc); + +} + +/* + * Save the backup content into BACKUP_CONTROL_FILE. + * TODO: honor the strict flag + */ +void +write_backup(pgBackup *backup, bool strict) +{ + FILE *fp = NULL; + char path[MAXPGPATH]; + char path_temp[MAXPGPATH]; + char buf[4096]; + + join_path_components(path, backup->root_dir, BACKUP_CONTROL_FILE); + snprintf(path_temp, sizeof(path_temp), "%s.tmp", path); + + fp = fopen(path_temp, PG_BINARY_W); + if (fp == NULL) + elog(ERROR, "Cannot open control file \"%s\": %s", + path_temp, strerror(errno)); + + if (chmod(path_temp, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", path_temp, + strerror(errno)); + + setvbuf(fp, buf, _IOFBF, sizeof(buf)); + + pgBackupWriteControl(fp, backup); + + if (fflush(fp) != 0) + elog(ERROR, "Cannot flush control file \"%s\": %s", + path_temp, strerror(errno)); + + if (fsync(fileno(fp)) < 0) + elog(ERROR, "Cannot sync control file \"%s\": %s", + path_temp, strerror(errno)); + + if (fclose(fp) != 0) + elog(ERROR, "Cannot close control file \"%s\": %s", + path_temp, strerror(errno)); + + if (rename(path_temp, path) < 0) + elog(ERROR, "Cannot rename file \"%s\" to \"%s\": %s", + path_temp, path, strerror(errno)); +} + +/* + * Output the list of files to backup catalog DATABASE_FILE_LIST + */ +void +write_backup_filelist(pgBackup *backup, parray *files, const char *root, + parray *external_list, bool sync) +{ + FILE *out; + char control_path[MAXPGPATH]; + char control_path_temp[MAXPGPATH]; + size_t i = 0; + #define BUFFERSZ 1024*1024 + char *buf; + int64 backup_size_on_disk = 0; + int64 uncompressed_size_on_disk = 0; + int64 wal_size_on_disk = 0; + + join_path_components(control_path, backup->root_dir, DATABASE_FILE_LIST); + snprintf(control_path_temp, sizeof(control_path_temp), "%s.tmp", control_path); + + out = fopen(control_path_temp, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open file list \"%s\": %s", control_path_temp, + strerror(errno)); + + if (chmod(control_path_temp, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", control_path_temp, + strerror(errno)); + + buf = (char *)pgut_malloc(BUFFERSZ); + setvbuf(out, buf, _IOFBF, BUFFERSZ); + + if (sync) + INIT_FILE_CRC32(true, backup->content_crc); + + /* print each file in the list */ + for (i = 0; i < parray_num(files); i++) + { + int len = 0; + char line[BLCKSZ]; + pgFile *file = (pgFile *) parray_get(files, i); + + /* Ignore disappeared file */ + if (file->write_size == FILE_NOT_FOUND) + continue; + + if (S_ISDIR(file->mode)) + { + backup_size_on_disk += 4096; + uncompressed_size_on_disk += 4096; + } + + /* Count the amount of the data actually copied */ + if (S_ISREG(file->mode) && file->write_size > 0) + { + /* + * Size of WAL files in 'pg_wal' is counted separately + * TODO: in 3.0 add attribute is_walfile + */ + if (IsXLogFileName(file->name) && file->external_dir_num == 0) + wal_size_on_disk += file->write_size; + else + { + backup_size_on_disk += file->write_size; + uncompressed_size_on_disk += file->uncompressed_size; + } + } + + len = sprintf(line, "{\"path\":\"%s\", \"size\":\"" INT64_FORMAT "\", " + "\"mode\":\"%u\", \"is_datafile\":\"%u\", " + "\"is_cfs\":\"%u\", \"crc\":\"%u\", " + "\"compress_alg\":\"%s\", \"external_dir_num\":\"%d\", " + "\"dbOid\":\"%u\"", + file->rel_path, file->write_size, file->mode, + file->is_datafile ? 1 : 0, + file->is_cfs ? 1 : 0, + file->crc, + deparse_compress_alg(file->compress_alg), + file->external_dir_num, + file->dbOid); + + if (file->is_datafile) + len += sprintf(line+len, ",\"segno\":\"%d\"", file->segno); + + if (file->linked) + len += sprintf(line+len, ",\"linked\":\"%s\"", file->linked); + + if (file->n_blocks > 0) + len += sprintf(line+len, ",\"n_blocks\":\"%i\"", file->n_blocks); + + if (file->n_headers > 0) + { + len += sprintf(line+len, ",\"n_headers\":\"%i\"", file->n_headers); + len += sprintf(line+len, ",\"hdr_crc\":\"%u\"", file->hdr_crc); + len += sprintf(line+len, ",\"hdr_off\":\"%li\"", file->hdr_off); + len += sprintf(line+len, ",\"hdr_size\":\"%i\"", file->hdr_size); + } + + sprintf(line+len, "}\n"); + + if (sync) + COMP_FILE_CRC32(true, backup->content_crc, line, strlen(line)); + + fprintf(out, "%s", line); + } + + if (sync) + FIN_FILE_CRC32(true, backup->content_crc); + + if (fflush(out) != 0) + elog(ERROR, "Cannot flush file list \"%s\": %s", + control_path_temp, strerror(errno)); + + if (sync && fsync(fileno(out)) < 0) + elog(ERROR, "Cannot sync file list \"%s\": %s", + control_path_temp, strerror(errno)); + + if (fclose(out) != 0) + elog(ERROR, "Cannot close file list \"%s\": %s", + control_path_temp, strerror(errno)); + + if (rename(control_path_temp, control_path) < 0) + elog(ERROR, "Cannot rename file \"%s\" to \"%s\": %s", + control_path_temp, control_path, strerror(errno)); + + /* use extra variable to avoid reset of previous data_bytes value in case of error */ + backup->data_bytes = backup_size_on_disk; + backup->uncompressed_bytes = uncompressed_size_on_disk; + + if (backup->stream) + backup->wal_bytes = wal_size_on_disk; + + free(buf); +} + +/* + * Read BACKUP_CONTROL_FILE and create pgBackup. + * - Comment starts with ';'. + * - Do not care section. + */ +static pgBackup * +readBackupControlFile(const char *path) +{ + pgBackup *backup = pgut_new(pgBackup); + char *backup_mode = NULL; + char *start_lsn = NULL; + char *stop_lsn = NULL; + char *status = NULL; + char *parent_backup = NULL; + char *merge_dest_backup = NULL; + char *program_version = NULL; + char *server_version = NULL; + char *compress_alg = NULL; + int parsed_options; + + ConfigOption options[] = + { + {'s', 0, "backup-mode", &backup_mode, SOURCE_FILE_STRICT}, + {'u', 0, "timelineid", &backup->tli, SOURCE_FILE_STRICT}, + {'s', 0, "start-lsn", &start_lsn, SOURCE_FILE_STRICT}, + {'s', 0, "stop-lsn", &stop_lsn, SOURCE_FILE_STRICT}, + {'t', 0, "start-time", &backup->start_time, SOURCE_FILE_STRICT}, + {'t', 0, "merge-time", &backup->merge_time, SOURCE_FILE_STRICT}, + {'t', 0, "end-time", &backup->end_time, SOURCE_FILE_STRICT}, + {'U', 0, "recovery-xid", &backup->recovery_xid, SOURCE_FILE_STRICT}, + {'t', 0, "recovery-time", &backup->recovery_time, SOURCE_FILE_STRICT}, + {'t', 0, "expire-time", &backup->expire_time, SOURCE_FILE_STRICT}, + {'I', 0, "data-bytes", &backup->data_bytes, SOURCE_FILE_STRICT}, + {'I', 0, "wal-bytes", &backup->wal_bytes, SOURCE_FILE_STRICT}, + {'I', 0, "uncompressed-bytes", &backup->uncompressed_bytes, SOURCE_FILE_STRICT}, + {'I', 0, "pgdata-bytes", &backup->pgdata_bytes, SOURCE_FILE_STRICT}, + {'u', 0, "block-size", &backup->block_size, SOURCE_FILE_STRICT}, + {'u', 0, "xlog-block-size", &backup->wal_block_size, SOURCE_FILE_STRICT}, + {'u', 0, "checksum-version", &backup->checksum_version, SOURCE_FILE_STRICT}, + {'s', 0, "program-version", &program_version, SOURCE_FILE_STRICT}, + {'s', 0, "server-version", &server_version, SOURCE_FILE_STRICT}, + {'b', 0, "stream", &backup->stream, SOURCE_FILE_STRICT}, + {'s', 0, "status", &status, SOURCE_FILE_STRICT}, + {'s', 0, "parent-backup-id", &parent_backup, SOURCE_FILE_STRICT}, + {'s', 0, "merge-dest-id", &merge_dest_backup, SOURCE_FILE_STRICT}, + {'s', 0, "compress-alg", &compress_alg, SOURCE_FILE_STRICT}, + {'u', 0, "compress-level", &backup->compress_level, SOURCE_FILE_STRICT}, + {'s', 0, "external-dirs", &backup->external_dir_str, SOURCE_FILE_STRICT}, + {'s', 0, "note", &backup->note, SOURCE_FILE_STRICT}, + {'u', 0, "content-crc", &backup->content_crc, SOURCE_FILE_STRICT}, + {0} + }; + + pgBackupInit(backup); + if (fio_access(path, F_OK, FIO_BACKUP_HOST) != 0) + { + elog(WARNING, "Control file \"%s\" doesn't exist", path); + pgBackupFree(backup); + return NULL; + } + + parsed_options = config_read_opt(path, options, WARNING, true, true); + + if (parsed_options == 0) + { + elog(WARNING, "Control file \"%s\" is empty", path); + pgBackupFree(backup); + return NULL; + } + + if (backup->start_time == 0) + { + elog(WARNING, "Invalid ID/start-time, control file \"%s\" is corrupted", path); + pgBackupFree(backup); + return NULL; + } + + if (backup_mode) + { + backup->backup_mode = parse_backup_mode(backup_mode); + free(backup_mode); + } + + if (start_lsn) + { + uint32 xlogid; + uint32 xrecoff; + + if (sscanf(start_lsn, "%X/%X", &xlogid, &xrecoff) == 2) + backup->start_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; + else + elog(WARNING, "Invalid START_LSN \"%s\"", start_lsn); + free(start_lsn); + } + + if (stop_lsn) + { + uint32 xlogid; + uint32 xrecoff; + + if (sscanf(stop_lsn, "%X/%X", &xlogid, &xrecoff) == 2) + backup->stop_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; + else + elog(WARNING, "Invalid STOP_LSN \"%s\"", stop_lsn); + free(stop_lsn); + } + + if (status) + { + if (strcmp(status, "OK") == 0) + backup->status = BACKUP_STATUS_OK; + else if (strcmp(status, "ERROR") == 0) + backup->status = BACKUP_STATUS_ERROR; + else if (strcmp(status, "RUNNING") == 0) + backup->status = BACKUP_STATUS_RUNNING; + else if (strcmp(status, "MERGING") == 0) + backup->status = BACKUP_STATUS_MERGING; + else if (strcmp(status, "MERGED") == 0) + backup->status = BACKUP_STATUS_MERGED; + else if (strcmp(status, "DELETING") == 0) + backup->status = BACKUP_STATUS_DELETING; + else if (strcmp(status, "DELETED") == 0) + backup->status = BACKUP_STATUS_DELETED; + else if (strcmp(status, "DONE") == 0) + backup->status = BACKUP_STATUS_DONE; + else if (strcmp(status, "ORPHAN") == 0) + backup->status = BACKUP_STATUS_ORPHAN; + else if (strcmp(status, "CORRUPT") == 0) + backup->status = BACKUP_STATUS_CORRUPT; + else + elog(WARNING, "Invalid STATUS \"%s\"", status); + free(status); + } + + if (parent_backup) + { + backup->parent_backup = base36dec(parent_backup); + free(parent_backup); + } + + if (merge_dest_backup) + { + backup->merge_dest_backup = base36dec(merge_dest_backup); + free(merge_dest_backup); + } + + if (program_version) + { + StrNCpy(backup->program_version, program_version, + sizeof(backup->program_version)); + pfree(program_version); + } + + if (server_version) + { + StrNCpy(backup->server_version, server_version, + sizeof(backup->server_version)); + pfree(server_version); + } + + if (compress_alg) + backup->compress_alg = parse_compress_alg(compress_alg); + + return backup; +} + +BackupMode +parse_backup_mode(const char *value) +{ + const char *v = value; + size_t len; + + /* Skip all spaces detected */ + while (IsSpace(*v)) + v++; + len = strlen(v); + + if (len > 0 && pg_strncasecmp("full", v, len) == 0) + return BACKUP_MODE_FULL; + else if (len > 0 && pg_strncasecmp("ptrack", v, len) == 0) + return BACKUP_MODE_DIFF_PTRACK; + + /* Backup mode is invalid, so leave with an error */ + elog(ERROR, "invalid backup-mode \"%s\"", value); + return BACKUP_MODE_INVALID; +} + +const char * +deparse_backup_mode(BackupMode mode) +{ + switch (mode) + { + case BACKUP_MODE_FULL: + return "full"; + case BACKUP_MODE_DIFF_PTRACK: + return "ptrack"; + case BACKUP_MODE_INVALID: + return "invalid"; + } + + return NULL; +} + +CompressAlg +parse_compress_alg(const char *arg) +{ + size_t len; + + /* Skip all spaces detected */ + while (isspace((unsigned char)*arg)) + arg++; + len = strlen(arg); + + if (len == 0) + elog(ERROR, "compress algorithm is empty"); + + if (pg_strncasecmp("zlib", arg, len) == 0) + return ZLIB_COMPRESS; + else if (pg_strncasecmp("pglz", arg, len) == 0) + return PGLZ_COMPRESS; + else if (pg_strncasecmp("none", arg, len) == 0) + return NONE_COMPRESS; + else + elog(ERROR, "invalid compress algorithm value \"%s\"", arg); + + return NOT_DEFINED_COMPRESS; +} + +const char* +deparse_compress_alg(int alg) +{ + switch (alg) + { + case NONE_COMPRESS: + case NOT_DEFINED_COMPRESS: + return "none"; + case ZLIB_COMPRESS: + return "zlib"; + case PGLZ_COMPRESS: + return "pglz"; + } + + return NULL; +} + +/* + * Fill PGNodeInfo struct with default values. + */ +void +pgNodeInit(PGNodeInfo *node) +{ + node->block_size = 0; + node->wal_block_size = 0; + node->checksum_version = 0; + + node->is_superuser = false; + node->pgpro_support = false; + + node->server_version = 0; + node->server_version_str[0] = '\0'; +} + +/* + * Fill pgBackup struct with default values. + */ +void +pgBackupInit(pgBackup *backup) +{ + backup->backup_id = INVALID_BACKUP_ID; + backup->backup_mode = BACKUP_MODE_INVALID; + backup->status = BACKUP_STATUS_INVALID; + backup->tli = 0; + backup->start_lsn = 0; + backup->stop_lsn = 0; + backup->start_time = (time_t) 0; + backup->merge_time = (time_t) 0; + backup->end_time = (time_t) 0; + backup->recovery_xid = 0; + backup->recovery_time = (time_t) 0; + backup->expire_time = (time_t) 0; + + backup->data_bytes = BYTES_INVALID; + backup->wal_bytes = BYTES_INVALID; + backup->uncompressed_bytes = 0; + backup->pgdata_bytes = 0; + + backup->compress_alg = COMPRESS_ALG_DEFAULT; + backup->compress_level = COMPRESS_LEVEL_DEFAULT; + + backup->block_size = BLCKSZ; + backup->wal_block_size = XLOG_BLCKSZ; + backup->checksum_version = 0; + + backup->stream = false; + backup->parent_backup = INVALID_BACKUP_ID; + backup->merge_dest_backup = INVALID_BACKUP_ID; + backup->parent_backup_link = NULL; + backup->program_version[0] = '\0'; + backup->server_version[0] = '\0'; + backup->external_dir_str = NULL; + backup->root_dir = NULL; + backup->database_dir = NULL; + backup->files = NULL; + backup->note = NULL; + backup->content_crc = 0; +} + +/* free pgBackup object */ +void +pgBackupFree(void *backup) +{ + pgBackup *b = (pgBackup *) backup; + + pg_free(b->external_dir_str); + pg_free(b->root_dir); + pg_free(b->database_dir); + pg_free(b->note); + pg_free(backup); +} + +/* Compare two pgBackup with their IDs (start time) in ascending order */ +int +pgBackupCompareId(const void *l, const void *r) +{ + pgBackup *lp = *(pgBackup **)l; + pgBackup *rp = *(pgBackup **)r; + + if (lp->start_time > rp->start_time) + return 1; + else if (lp->start_time < rp->start_time) + return -1; + else + return 0; +} + +/* Compare two pgBackup with their IDs in descending order */ +int +pgBackupCompareIdDesc(const void *l, const void *r) +{ + return -pgBackupCompareId(l, r); +} + +/* + * Construct absolute path of the backup directory. + * If subdir is not NULL, it will be appended after the path. + */ +void +pgBackupGetPath(const pgBackup *backup, char *path, size_t len, const char *subdir) +{ + pgBackupGetPath2(backup, path, len, subdir, NULL); +} + +/* + * Construct absolute path of the backup directory. + * Append "subdir1" and "subdir2" to the backup directory. + */ +void +pgBackupGetPath2(const pgBackup *backup, char *path, size_t len, + const char *subdir1, const char *subdir2) +{ + /* If "subdir1" is NULL do not check "subdir2" */ + if (!subdir1) + snprintf(path, len, "%s/%s", backup_instance_path, + base36enc(backup->start_time)); + else if (!subdir2) + snprintf(path, len, "%s/%s/%s", backup_instance_path, + base36enc(backup->start_time), subdir1); + /* "subdir1" and "subdir2" is not NULL */ + else + snprintf(path, len, "%s/%s/%s/%s", backup_instance_path, + base36enc(backup->start_time), subdir1, subdir2); +} + +/* + * independent from global variable backup_instance_path + * Still depends from backup_path + */ +void +pgBackupGetPathInInstance(const char *instance_name, + const pgBackup *backup, char *path, size_t len, + const char *subdir1, const char *subdir2) +{ + char backup_instance_path[MAXPGPATH]; + + sprintf(backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + + /* If "subdir1" is NULL do not check "subdir2" */ + if (!subdir1) + snprintf(path, len, "%s/%s", backup_instance_path, + base36enc(backup->start_time)); + else if (!subdir2) + snprintf(path, len, "%s/%s/%s", backup_instance_path, + base36enc(backup->start_time), subdir1); + /* "subdir1" and "subdir2" is not NULL */ + else + snprintf(path, len, "%s/%s/%s/%s", backup_instance_path, + base36enc(backup->start_time), subdir1, subdir2); +} + +/* + * Check if multiple backups consider target backup to be their direct parent + */ +bool +is_prolific(parray *backup_list, pgBackup *target_backup) +{ + int i; + int child_counter = 0; + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(backup_list, i); + + /* consider only OK and DONE backups */ + if (tmp_backup->parent_backup == target_backup->start_time && + (tmp_backup->status == BACKUP_STATUS_OK || + tmp_backup->status == BACKUP_STATUS_DONE)) + { + child_counter++; + if (child_counter > 1) + return true; + } + } + + return false; +} + +/* + * Find parent base FULL backup for current backup using parent_backup_link + */ +pgBackup* +find_parent_full_backup(pgBackup *current_backup) +{ + pgBackup *base_full_backup = NULL; + base_full_backup = current_backup; + + /* sanity */ + if (!current_backup) + elog(ERROR, "Target backup cannot be NULL"); + + while (base_full_backup->parent_backup_link != NULL) + { + base_full_backup = base_full_backup->parent_backup_link; + } + + if (base_full_backup->backup_mode != BACKUP_MODE_FULL) + { + if (base_full_backup->parent_backup) + elog(WARNING, "Backup %s is missing", + base36enc(base_full_backup->parent_backup)); + else + elog(WARNING, "Failed to find parent FULL backup for %s", + base36enc(current_backup->start_time)); + return NULL; + } + + return base_full_backup; +} + +/* + * Iterate over parent chain and look for any problems. + * Return 0 if chain is broken. + * result_backup must contain oldest existing backup after missing backup. + * we have no way to know if there are multiple missing backups. + * Return 1 if chain is intact, but at least one backup is !OK. + * result_backup must contain oldest !OK backup. + * Return 2 if chain is intact and all backups are OK. + * result_backup must contain FULL backup on which chain is based. + */ +int +scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup) +{ + pgBackup *target_backup = NULL; + pgBackup *invalid_backup = NULL; + + if (!current_backup) + elog(ERROR, "Target backup cannot be NULL"); + + target_backup = current_backup; + + while (target_backup->parent_backup_link) + { + if (target_backup->status != BACKUP_STATUS_OK && + target_backup->status != BACKUP_STATUS_DONE) + /* oldest invalid backup in parent chain */ + invalid_backup = target_backup; + + + target_backup = target_backup->parent_backup_link; + } + + /* Previous loop will skip FULL backup because his parent_backup_link is NULL */ + if (target_backup->backup_mode == BACKUP_MODE_FULL && + (target_backup->status != BACKUP_STATUS_OK && + target_backup->status != BACKUP_STATUS_DONE)) + { + invalid_backup = target_backup; + } + + /* found chain end and oldest backup is not FULL */ + if (target_backup->backup_mode != BACKUP_MODE_FULL) + { + /* Set oldest child backup in chain */ + *result_backup = target_backup; + return ChainIsBroken; + } + + /* chain is ok, but some backups are invalid */ + if (invalid_backup) + { + *result_backup = invalid_backup; + return ChainIsInvalid; + } + + *result_backup = target_backup; + return ChainIsOk; +} + +/* + * Determine if child_backup descend from parent_backup + * This check DO NOT(!!!) guarantee that parent chain is intact, + * because parent_backup can be missing. + * If inclusive is true, then child_backup counts as a child of himself + * if parent_backup_time is start_time of child_backup. + */ +bool +is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive) +{ + if (!child_backup) + elog(ERROR, "Target backup cannot be NULL"); + + if (inclusive && child_backup->start_time == parent_backup_time) + return true; + + while (child_backup->parent_backup_link && + child_backup->parent_backup != parent_backup_time) + { + child_backup = child_backup->parent_backup_link; + } + + if (child_backup->parent_backup == parent_backup_time) + return true; + + //if (inclusive && child_backup->start_time == parent_backup_time) + // return true; + + return false; +} + +/* + * Return backup index number. + * Note: this index number holds true until new sorting of backup list + */ +int +get_backup_index_number(parray *backup_list, pgBackup *backup) +{ + int i; + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(backup_list, i); + + if (tmp_backup->start_time == backup->start_time) + return i; + } + elog(WARNING, "Failed to find backup %s", base36enc(backup->start_time)); + return -1; +} + +/* On backup_list lookup children of target_backup and append them to append_list */ +void +append_children(parray *backup_list, pgBackup *target_backup, parray *append_list) +{ + int i; + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* check if backup is descendant of target backup */ + if (is_parent(target_backup->start_time, backup, false)) + { + /* if backup is already in the list, then skip it */ + if (!parray_contains(append_list, backup)) + parray_append(append_list, backup); + } + } +} + +/* + * * relpathbackend - construct path to a relation's file + * * + * * Result is a palloc'd string. + * */ +char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) +{ + int pathlen; + char* path = NULL; +#ifdef AAAAA + if (IsValidColForkNum(forknum)) { + path = (char*)gs_palloc0(MAXPGPATH * sizeof(char)); + + CFileNode cFileNode(rnode, ColForkNum2ColumnId(forknum), MAIN_FORKNUM); + CUStorage cuStorage(cFileNode); + cuStorage.GetBcmFileName(path, 0); + cuStorage.Destroy(); + } else { + errno_t rc = EOK; + + if (rnode.spcNode == GLOBALTABLESPACE_OID) { + /* Shared system relations live in {datadir}/global */ + Assert(rnode.dbNode == 0); + Assert(rnode.bucketNode == InvalidBktId); + Assert(backend == InvalidBackendId); + pathlen = 7 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char*)palloc(pathlen); + if (forknum != MAIN_FORKNUM) + rc = snprintf_s(path, pathlen, pathlen - 1, "global/%u_%s", rnode.relNode, forkNames[forknum]); + else + rc = snprintf_s(path, pathlen, pathlen - 1, "global/%u", rnode.relNode); + securec_check_ss(rc, "\0", "\0"); + } else if (rnode.spcNode == DEFAULTTABLESPACE_OID) { + /* The default tablespace is {datadir}/base */ + if (backend == InvalidBackendId) { + pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS + 2; + path = (char*)palloc(pathlen); + if (forknum != MAIN_FORKNUM) { + if (rnode.bucketNode == InvalidBktId) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "base/%u/%u_%s", + rnode.dbNode, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "base/%u/%u_b%d_%s", + rnode.dbNode, + rnode.relNode, + rnode.bucketNode, + forkNames[forknum]); + } else { + if (rnode.bucketNode == InvalidBktId) + rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/%u", rnode.dbNode, rnode.relNode); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "base/%u/%u_b%d", + rnode.dbNode, + rnode.relNode, + rnode.bucketNode); + } + securec_check_ss(rc, "\0", "\0"); + } else { + /* OIDCHARS will suffice for an integer, too */ + Assert(rnode.bucketNode == InvalidBktId); + pathlen = 5 + OIDCHARS + 2 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char*)palloc(pathlen); + if (forknum != MAIN_FORKNUM) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "base/%u/t%d_%u_%s", + rnode.dbNode, + backend, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/t%d_%u", rnode.dbNode, backend, rnode.relNode); + securec_check_ss(rc, "\0", "\0"); + } + } else { + /* All other tablespaces are accessed via symlinks */ + if (backend == InvalidBackendId) { + pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + + 1 +#ifdef PGXC + /* Postgres-XC tablespaces include node name */ + + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 +#endif + + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS + 2; + path = (char*)palloc(pathlen); +#ifdef PGXC + if (forknum != MAIN_FORKNUM) { + if (rnode.bucketNode == InvalidBktId) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s_%s/%u/%u_%s", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + rnode.dbNode, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s_%s/%u/%u_b%d_%s", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + rnode.dbNode, + rnode.relNode, + rnode.bucketNode, + forkNames[forknum]); + } else { + if (rnode.bucketNode == InvalidBktId) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s_%s/%u/%u", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + rnode.dbNode, + rnode.relNode); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s_%s/%u/%u_b%d", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + rnode.dbNode, + rnode.relNode, + rnode.bucketNode); + } + securec_check_ss(rc, "\0", "\0"); +#else + if (forknum != MAIN_FORKNUM) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s/%u/%u_%s", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s/%u/%u", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + rnode.relNode); + securec_check_ss(rc, "\0", "\0"); +#endif + } else { + /* OIDCHARS will suffice for an integer, too */ + pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 2 +#ifdef PGXC + + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 +#endif + + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char*)palloc(pathlen); +#ifdef PGXC + if (forknum != MAIN_FORKNUM) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s_%s/%u/t%d_%u_%s", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + rnode.dbNode, + backend, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s_%s/%u/t%d_%u", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + rnode.dbNode, + backend, + rnode.relNode); + securec_check_ss(rc, "\0", "\0"); +#else + if (forknum != MAIN_FORKNUM) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s/%u/t%d_%u_%s", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + backend, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "pg_tblspc/%u/%s/%u/t%d_%u", + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + backend, + rnode.relNode); + securec_check_ss(rc, "\0", "\0"); +#endif + } + } + } +#endif + return path; +} + diff --git a/src/bin/pg_probackup/configuration.cpp b/src/bin/pg_probackup/configuration.cpp new file mode 100644 index 000000000..465b229e9 --- /dev/null +++ b/src/bin/pg_probackup/configuration.cpp @@ -0,0 +1,1492 @@ +/*------------------------------------------------------------------------- + * + * configuration.c: - function implementations to work with pg_probackup + * configurations. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" +#include "configuration.h" +#include "logger.h" +#include "pgut.h" +#include "file.h" + +#include "datatype/timestamp.h" + +#include "getopt_long.h" +#include "common/fe_memutils.h" +#include + +#define MAXPG_LSNCOMPONENT 8 + +/* + * Unit conversion tables. + * + * Copied from guc.c. + */ +#define MAX_UNIT_LEN 3 /* length of longest recognized unit string */ + +typedef struct +{ + char unit[MAX_UNIT_LEN + 1]; /* unit, as a string, like "kB" or + * "min" */ + int base_unit; /* OPTION_UNIT_XXX */ + int multiplier; /* If positive, multiply the value with this + * for unit -> base_unit conversion. If + * negative, divide (with the absolute value) */ +} unit_conversion; + +static const char *memory_units_hint = "Valid units for this parameter are \"kB\", \"MB\", \"GB\", and \"TB\"."; + +static const unit_conversion memory_unit_conversion_table[] = +{ + {"TB", OPTION_UNIT_KB, 1024 * 1024 * 1024}, + {"GB", OPTION_UNIT_KB, 1024 * 1024}, + {"MB", OPTION_UNIT_KB, 1024}, + {"KB", OPTION_UNIT_KB, 1}, + {"kB", OPTION_UNIT_KB, 1}, + + {"TB", OPTION_UNIT_BLOCKS, (1024 * 1024 * 1024) / (BLCKSZ / 1024)}, + {"GB", OPTION_UNIT_BLOCKS, (1024 * 1024) / (BLCKSZ / 1024)}, + {"MB", OPTION_UNIT_BLOCKS, 1024 / (BLCKSZ / 1024)}, + {"kB", OPTION_UNIT_BLOCKS, -(BLCKSZ / 1024)}, + + {"TB", OPTION_UNIT_XBLOCKS, (1024 * 1024 * 1024) / (XLOG_BLCKSZ / 1024)}, + {"GB", OPTION_UNIT_XBLOCKS, (1024 * 1024) / (XLOG_BLCKSZ / 1024)}, + {"MB", OPTION_UNIT_XBLOCKS, 1024 / (XLOG_BLCKSZ / 1024)}, + {"kB", OPTION_UNIT_XBLOCKS, -(XLOG_BLCKSZ / 1024)}, + + {""} /* end of table marker */ +}; + +static const char *time_units_hint = "Valid units for this parameter are \"ms\", \"s\", \"min\", \"h\", and \"d\"."; + +static const unit_conversion time_unit_conversion_table[] = +{ + {"d", OPTION_UNIT_MS, 1000 * 60 * 60 * 24}, + {"h", OPTION_UNIT_MS, 1000 * 60 * 60}, + {"min", OPTION_UNIT_MS, 1000 * 60}, + {"s", OPTION_UNIT_MS, 1000}, + {"ms", OPTION_UNIT_MS, 1}, + + {"d", OPTION_UNIT_S, 60 * 60 * 24}, + {"h", OPTION_UNIT_S, 60 * 60}, + {"min", OPTION_UNIT_S, 60}, + {"s", OPTION_UNIT_S, 1}, + {"ms", OPTION_UNIT_S, -1000}, + + {"d", OPTION_UNIT_MIN, 60 * 24}, + {"h", OPTION_UNIT_MIN, 60}, + {"min", OPTION_UNIT_MIN, 1}, + {"s", OPTION_UNIT_MIN, -60}, + {"ms", OPTION_UNIT_MIN, -1000 * 60}, + + {""} /* end of table marker */ +}; + +extern size_t pvsnprintf(char *buf, size_t len, const char *fmt, va_list args); +extern char *psprintf(const char *fmt,...); + +/* + * Reading functions. + */ + +static uint32 +option_length(const ConfigOption opts[]) +{ + uint32 len; + + for (len = 0; opts && opts[len].type; len++) { } + + return len; +} + +static int +option_has_arg(char type) +{ + switch (type) + { + case 'b': + case 'B': + return no_argument;//optional_argument; + default: + return required_argument; + } +} + +static void +option_copy(struct option dst[], const ConfigOption opts[], size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + { + dst[i].name = opts[i].lname; + dst[i].has_arg = option_has_arg(opts[i].type); + dst[i].flag = NULL; + dst[i].val = opts[i].sname; + } +} + +static ConfigOption * +option_find(int c, ConfigOption opts1[]) +{ + size_t i; + + for (i = 0; opts1 && opts1[i].type; i++) + if (opts1[i].sname == c) + return &opts1[i]; + + return NULL; /* not found */ +} + +static char * +longopts_to_optstring(const struct option opts[], const size_t len) +{ + size_t i; + char *result; + char *s; + + result = (char *)pgut_malloc(len * 2 + 1); + + s = result; + for (i = 0; i < len; i++) + { + if (!isprint(opts[i].val)) + continue; + *s++ = opts[i].val; + if (opts[i].has_arg != no_argument) + *s++ = ':'; + } + *s = '\0'; + + return result; +} + +/* + * Compare two strings ignore cases and ignore. + */ +static bool +key_equals(const char *lhs, const char *rhs) +{ + for (; *lhs && *rhs; lhs++, rhs++) + { + if (strchr("-_ ", *lhs)) + { + if (!strchr("-_ ", *rhs)) + return false; + } + else if (ToLower(*lhs) != ToLower(*rhs)) + return false; + } + + return *lhs == '\0' && *rhs == '\0'; +} + +static void +assign_option(ConfigOption *opt, const char *optarg, OptionSource src) +{ + const char *message; + + if (opt == NULL) + elog(ERROR, "Option is not found. Try \"%s --help\" for more information.\n", + PROGRAM_NAME); + + if (opt->source > src) + { + /* high prior value has been set already. */ + return; + } + /* Allow duplicate entries for function option */ + else if (src >= SOURCE_CMD && opt->source >= src && opt->type != 'f') + { + message = "specified only once"; + } + else + { + OptionSource orig_source = opt->source; + + /* can be overwritten if non-command line source */ + opt->source = src; + + switch (opt->type) + { + case 'b': + case 'B': + if (optarg == NULL) + { + *((bool *) opt->var) = (opt->type == 'b'); + return; + } + else if (parse_bool(optarg, (bool *) opt->var)) + { + return; + } + message = "a boolean"; + break; + case 'f': + ((option_assign_fn) opt->var)(opt, optarg); + return; + case 'i': + if (parse_int32(optarg, (int32 *)opt->var, opt->flags)) + return; + message = "a 32bit signed integer"; + break; + case 'u': + if (parse_uint32(optarg, (uint32 *)opt->var, opt->flags)) + return; + message = "a 32bit unsigned integer"; + break; + case 'I': + if (parse_int64(optarg, (int64 *)opt->var, opt->flags)) + return; + message = "a 64bit signed integer"; + break; + case 'U': + if (parse_uint64(optarg, (uint64 *)opt->var, opt->flags)) + return; + message = "a 64bit unsigned integer"; + break; + case 's': + if (orig_source != SOURCE_DEFAULT) + free(*(char **) opt->var); + + /* 'none' and 'off' are always disable the string parameter */ + //if (optarg && (pg_strcasecmp(optarg, "none") == 0)) + //{ + // *(char **) opt->var = "none"; + // return; + //} + + *(char **) opt->var = pgut_strdup(optarg); + if (strcmp(optarg,"") != 0) + return; + message = "a valid string"; + break; + case 't': + if (parse_time(optarg, (time_t *)opt->var, + opt->source == SOURCE_FILE)) + return; + message = "a time"; + break; + default: + elog(ERROR, "Invalid option type: %c", opt->type); + return; /* keep compiler quiet */ + } + } + + if (optarg) + { + if (isprint(opt->sname)) + elog(ERROR, "Option -%c, --%s should be %s: '%s'", + opt->sname, opt->lname, message, optarg); + else + elog(ERROR, "Option --%s should be %s: '%s'", + opt->lname, message, optarg); + } + else + { + if (isprint(opt->sname)) + elog(ERROR, "Option -%c, --%s should be %s", + opt->sname, opt->lname, message); + else + elog(ERROR, "Option --%s should be %s", + opt->lname, message); + } +} + +static const char * +skip_space(const char *str, const char *line) +{ + while (IsSpace(*str)) { str++; } + return str; +} + +static const char * +get_next_token(const char *src, char *dst, const char *line) +{ + const char *s; + int i; + int j; + + if ((s = skip_space(src, line)) == NULL) + return NULL; + + /* parse quoted string */ + if (*s == '\'') + { + s++; + for (i = 0, j = 0; s[i] != '\0'; i++) + { + if (s[i] == '\'') + { + i++; + /* doubled quote becomes just one quote */ + if (s[i] == '\'') + dst[j] = s[i]; + else + break; + } + else + dst[j] = s[i]; + j++; + } + } + else + { + i = j = strcspn(s, "#\n\r\t\v"); + memcpy(dst, s, j); + } + + dst[j] = '\0'; + return s + i; +} + +static bool +parse_pair(const char buffer[], char key[], char value[]) +{ + const char *start; + const char *end; + + key[0] = value[0] = '\0'; + + /* + * parse key + */ + start = buffer; + if ((start = skip_space(start, buffer)) == NULL) + return false; + + end = start + strcspn(start, "=# \n\r\t\v"); + + /* skip blank buffer */ + if (end - start <= 0) + { + if (*start == '=') + elog(ERROR, "Syntax error in \"%s\"", buffer); + return false; + } + + /* key found */ + strncpy(key, start, end - start); + key[end - start] = '\0'; + + /* find key and value split char */ + if ((start = skip_space(end, buffer)) == NULL) + return false; + + if (*start != '=') + { + elog(ERROR, "Syntax error in \"%s\"", buffer); + return false; + } + + start++; + + /* + * parse value + */ + if ((end = get_next_token(start, value, buffer)) == NULL) + return false; + + if ((start = skip_space(end, buffer)) == NULL) + return false; + + if (*start != '\0' && *start != '#') + { + elog(ERROR, "Syntax error in \"%s\"", buffer); + return false; + } + + return true; +} + +/* + * Returns the current user name. + */ +static const char * +get_username(void) +{ + const char *ret; + +#ifndef WIN32 + struct passwd *pw; + + pw = getpwuid(geteuid()); + ret = (pw ? pw->pw_name : NULL); +#else + static char username[128]; /* remains after function exit */ + DWORD len = sizeof(username) - 1; + + if (GetUserName(username, &len)) + ret = username; + else + { + _dosmaperr(GetLastError()); + ret = NULL; + } +#endif + + if (ret == NULL) + elog(ERROR, "Could not get current user name: %s", strerror(errno)); + return ret; +} + +/* + * Process options passed from command line. + * TODO: currectly argument parsing treat missing argument for options + * as invalid option + */ +int +config_get_opt(int argc, char **argv, ConfigOption cmd_options[], + ConfigOption options[]) +{ + int c; + int optindex = 0; + char *optstring; + struct option *longopts; + uint32 cmd_len, + len; + + cmd_len = option_length(cmd_options); + len = option_length(options); + + longopts = pgut_newarray(struct option, + cmd_len + len + 1 /* zero/end option */); + + /* Concatenate two options */ + option_copy(longopts, cmd_options, cmd_len); + option_copy(longopts + cmd_len, options, len + 1); + + optstring = longopts_to_optstring(longopts, cmd_len + len); + + /* Assign named options */ + while ((c = getopt_long(argc, argv, optstring, longopts, &optindex)) != -1) + { + ConfigOption *opt; + + opt = option_find(c, cmd_options); + if (opt == NULL) + opt = option_find(c, options); + + if (opt + && !remote_agent + && opt->allowed < SOURCE_CMD && opt->allowed != SOURCE_CMD_STRICT) + elog(ERROR, "Option %s cannot be specified in command line", + opt->lname); + /* Check 'opt == NULL' is performed in assign_option() */ + assign_option(opt, optarg, SOURCE_CMD); + } + + return optind; +} + +/* + * Get configuration from configuration file. + * Return number of parsed options. + */ +int +config_read_opt(const char *path, ConfigOption options[], int elevel, + bool strict, bool missing_ok) +{ + FILE *fp; + char buf[1024]; + char key[1024]; + char value[1024]; + int parsed_options = 0; + + if (!options) + return parsed_options; + + if ((fp = pgut_fopen(path, "rt", missing_ok)) == NULL) + return parsed_options; + + while (fgets(buf, lengthof(buf), fp)) + { + size_t i; + + for (i = strlen(buf); i > 0 && IsSpace(buf[i - 1]); i--) + buf[i - 1] = '\0'; + + if (parse_pair(buf, key, value)) + { + for (i = 0; options[i].type; i++) + { + ConfigOption *opt = &options[i]; + + if (key_equals(key, opt->lname)) + { + if (opt->allowed < SOURCE_FILE && + opt->allowed != SOURCE_FILE_STRICT) + elog(elevel, "Option %s cannot be specified in file", + opt->lname); + else if (opt->source <= SOURCE_FILE) + { + assign_option(opt, value, SOURCE_FILE); + parsed_options++; + } + break; + } + } + if (strict && !options[i].type) + elog(elevel, "Invalid option \"%s\" in file \"%s\"", key, path); + } + } + + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", path); + + fio_close_stream(fp); + + return parsed_options; +} + +/* + * Process options passed as environment variables. + */ +void +config_get_opt_env(ConfigOption options[]) +{ + size_t i; + + for (i = 0; options && options[i].type; i++) + { + ConfigOption *opt = &options[i]; + const char *value = NULL; + + /* If option was already set do not check env */ + if (opt->source > SOURCE_ENV || opt->allowed < SOURCE_ENV) + continue; + + if (strcmp(opt->lname, "pgdata") == 0) + value = getenv("PGDATA"); + if (strcmp(opt->lname, "port") == 0) + value = getenv("PGPORT"); + if (strcmp(opt->lname, "host") == 0) + value = getenv("PGHOST"); + if (strcmp(opt->lname, "username") == 0) + value = getenv("PGUSER"); + if (strcmp(opt->lname, "pgdatabase") == 0) + { + value = getenv("PGDATABASE"); + if (value == NULL) + value = getenv("PGUSER"); + if (value == NULL) + value = get_username(); + } + + if (value) + assign_option(opt, value, SOURCE_ENV); + } +} + +/* + * Manually set source of the option. Find it by the pointer var. + */ +void +config_set_opt(ConfigOption options[], void *var, OptionSource source) +{ + int i; + + for (i = 0; options[i].type; i++) + { + ConfigOption *opt = &options[i]; + + if (opt->var == var) + { + if ((opt->allowed == SOURCE_FILE_STRICT && source != SOURCE_FILE) || + (opt->allowed == SOURCE_CMD_STRICT && source != SOURCE_CMD) || + (opt->allowed < source && opt->allowed >= SOURCE_ENV)) + elog(ERROR, "Invalid option source %d for %s", + source, opt->lname); + + opt->source = source; + break; + } + } +} + +/* + * Return value of the function in the string representation. Result is + * allocated string. + */ +char * +option_get_value(ConfigOption *opt) +{ + int64 value = 0; + uint64 value_u = 0; + const char *unit = NULL; + + /* + * If it is defined a unit for the option get readable value from base with + * unit name. + */ + if (opt->flags & OPTION_UNIT) + { + if (opt->type == 'i') + convert_from_base_unit(*((int32 *) opt->var), + opt->flags & OPTION_UNIT, &value, &unit); + else if (opt->type == 'i') + convert_from_base_unit(*((int64 *) opt->var), + opt->flags & OPTION_UNIT, &value, &unit); + else if (opt->type == 'u') + convert_from_base_unit_u(*((uint32 *) opt->var), + opt->flags & OPTION_UNIT, &value_u, &unit); + else if (opt->type == 'U') + convert_from_base_unit_u(*((uint64 *) opt->var), + opt->flags & OPTION_UNIT, &value_u, &unit); + } + + /* Get string representation itself */ + switch (opt->type) + { + case 'b': + case 'B': + return psprintf("%s", *((bool *) opt->var) ? "true" : "false"); + case 'i': + if (opt->flags & OPTION_UNIT) + return psprintf(INT64_FORMAT "%s", value, unit); + else + return psprintf("%d", *((int32 *) opt->var)); + case 'u': + if (opt->flags & OPTION_UNIT) + return psprintf(UINT64_FORMAT "%s", value_u, unit); + else + return psprintf("%u", *((uint32 *) opt->var)); + case 'I': + if (opt->flags & OPTION_UNIT) + return psprintf(INT64_FORMAT "%s", value, unit); + else + return psprintf(INT64_FORMAT, *((int64 *) opt->var)); + case 'U': + if (opt->flags & OPTION_UNIT) + return psprintf(UINT64_FORMAT "%s", value_u, unit); + else + return psprintf(UINT64_FORMAT, *((uint64 *) opt->var)); + case 's': + if (*((char **) opt->var) == NULL) + return NULL; + /* 'none' and 'off' are always disable the string parameter */ + //if ((pg_strcasecmp(*((char **) opt->var), "none") == 0) || + // (pg_strcasecmp(*((char **) opt->var), "off") == 0)) + // return NULL; + return gs_pstrdup(*((char **) opt->var)); + case 't': + { + char *timestamp; + time_t t = *((time_t *) opt->var); + + if (t > 0) + { + timestamp = (char *)palloc(100); + time2iso(timestamp, 100, t); + } + else + timestamp = (char *)palloc(1 /* just null termination */); + return timestamp; + } + default: + elog(ERROR, "Invalid option type: %c", opt->type); + return NULL; /* keep compiler quiet */ + } +} + +/* + * Parsing functions + */ + +/* + * Convert a value from one of the human-friendly units ("kB", "min" etc.) + * to the given base unit. 'value' and 'unit' are the input value and unit + * to convert from. The converted value is stored in *base_value. + * + * Returns true on success, false if the input unit is not recognized. + */ +static bool +convert_to_base_unit(int64 value, const char *unit, + int base_unit, int64 *base_value) +{ + const unit_conversion *table; + int i; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit && + strcmp(unit, table[i].unit) == 0) + { + if (table[i].multiplier < 0) + *base_value = value / (-table[i].multiplier); + else + { + /* Check for integer overflow first */ + if (value > PG_INT64_MAX / table[i].multiplier) + return false; + + *base_value = value * table[i].multiplier; + } + return true; + } + } + return false; +} + +/* + * Unsigned variant of convert_to_base_unit() + */ +static bool +convert_to_base_unit_u(uint64 value, const char *unit, + int base_unit, uint64 *base_value) +{ + const unit_conversion *table; + int i; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit && + strcmp(unit, table[i].unit) == 0) + { + if (table[i].multiplier < 0) + *base_value = value / (-table[i].multiplier); + else + { + /* Check for integer overflow first */ + if (value > PG_UINT64_MAX / table[i].multiplier) + return false; + + *base_value = value * table[i].multiplier; + } + return true; + } + } + return false; +} + +static bool +parse_unit(char *unit_str, int flags, int64 value, int64 *base_value) +{ + /* allow whitespace between integer and unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + /* Handle possible unit */ + if (*unit_str != '\0') + { + char unit[MAX_UNIT_LEN + 1]; + int unitlen; + bool converted = false; + + if ((flags & OPTION_UNIT) == 0) + return false; /* this setting does not accept a unit */ + + unitlen = 0; + while (*unit_str != '\0' && !isspace((unsigned char) *unit_str) && + unitlen < MAX_UNIT_LEN) + unit[unitlen++] = *(unit_str++); + unit[unitlen] = '\0'; + /* allow whitespace after unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + if (*unit_str == '\0') + converted = convert_to_base_unit(value, unit, (flags & OPTION_UNIT), + base_value); + if (!converted) + return false; + } + + return true; +} + +/* + * Unsigned variant of parse_unit() + */ +static bool +parse_unit_u(char *unit_str, int flags, uint64 value, uint64 *base_value) +{ + /* allow whitespace between integer and unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + /* Handle possible unit */ + if (*unit_str != '\0') + { + char unit[MAX_UNIT_LEN + 1]; + int unitlen; + bool converted = false; + + if ((flags & OPTION_UNIT) == 0) + return false; /* this setting does not accept a unit */ + + unitlen = 0; + while (*unit_str != '\0' && !isspace((unsigned char) *unit_str) && + unitlen < MAX_UNIT_LEN) + unit[unitlen++] = *(unit_str++); + unit[unitlen] = '\0'; + /* allow whitespace after unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + if (*unit_str == '\0') + converted = convert_to_base_unit_u(value, unit, + (flags & OPTION_UNIT), + base_value); + if (!converted) + return false; + } + + return true; +} + +/* + * Try to interpret value as boolean value. Valid values are: true, + * false, yes, no, on, off, 1, 0; as well as unique prefixes thereof. + * If the string parses okay, return true, else false. + * If okay and result is not NULL, return the value in *result. + */ +bool +parse_bool(const char *value, bool *result) +{ + return parse_bool_with_len(value, strlen(value), result); +} + +bool +parse_bool_with_len(const char *value, size_t len, bool *result) +{ + switch (*value) + { + case 't': + case 'T': + if (pg_strncasecmp(value, "true", len) == 0) + { + if (result) + *result = true; + return true; + } + break; + case 'f': + case 'F': + if (pg_strncasecmp(value, "false", len) == 0) + { + if (result) + *result = false; + return true; + } + break; + case 'y': + case 'Y': + if (pg_strncasecmp(value, "yes", len) == 0) + { + if (result) + *result = true; + return true; + } + break; + case 'n': + case 'N': + if (pg_strncasecmp(value, "no", len) == 0) + { + if (result) + *result = false; + return true; + } + break; + case 'o': + case 'O': + /* 'o' is not unique enough */ + if (pg_strncasecmp(value, "on", (len > 2 ? len : 2)) == 0) + { + if (result) + *result = true; + return true; + } + else if (pg_strncasecmp(value, "off", (len > 2 ? len : 2)) == 0) + { + if (result) + *result = false; + return true; + } + break; + case '1': + if (len == 1) + { + if (result) + *result = true; + return true; + } + break; + case '0': + if (len == 1) + { + if (result) + *result = false; + return true; + } + break; + default: + break; + } + + if (result) + *result = false; /* suppress compiler warning */ + return false; +} + +/* + * Parse string as 32bit signed int. + * valid range: -2147483648 ~ 2147483647 + */ +bool +parse_int32(const char *value, int32 *result, int flags) +{ + int64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_INT32_MAX; + return true; + } + + errno = 0; + val = strtol(value, &endptr, 0); + if (endptr == value || (*endptr && flags == 0)) + return false; + + /* Check for integer overflow */ + if (errno == ERANGE || val != (int64) ((int32) val)) + return false; + + if (!parse_unit(endptr, flags, val, &val)) + return false; + + /* Check for integer overflow again */ + if (val != (int64) ((int32) val)) + return false; + + *result = val; + + return true; +} + +/* + * Parse string as 32bit unsigned int. + * valid range: 0 ~ 4294967295 (2^32-1) + */ +bool +parse_uint32(const char *value, uint32 *result, int flags) +{ + uint64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_UINT32_MAX; + return true; + } + + errno = 0; + val = strtoul(value, &endptr, 0); + if (endptr == value || (*endptr && flags == 0)) + return false; + + /* Check for integer overflow */ + if (errno == ERANGE || val != (uint64) ((uint32) val)) + return false; + + if (!parse_unit_u(endptr, flags, val, &val)) + return false; + + /* Check for integer overflow again */ + if (val != (uint64) ((uint32) val)) + return false; + + *result = val; + + return true; +} + +/* + * Parse string as int64 + * valid range: -9223372036854775808 ~ 9223372036854775807 + */ +bool +parse_int64(const char *value, int64 *result, int flags) +{ + int64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_INT64_MAX; + return true; + } + + errno = 0; +#if defined(HAVE_LONG_INT_64) + val = strtol(value, &endptr, 0); +#elif defined(HAVE_LONG_LONG_INT_64) + val = strtoll(value, &endptr, 0); +#else + val = strtol(value, &endptr, 0); +#endif + if (endptr == value || (*endptr && flags == 0)) + return false; + + if (errno == ERANGE) + return false; + + if (!parse_unit(endptr, flags, val, &val)) + return false; + + *result = val; + + return true; +} + +/* + * Parse string as uint64 + * valid range: 0 ~ (2^64-1) + */ +bool +parse_uint64(const char *value, uint64 *result, int flags) +{ + uint64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_UINT64_MAX; + return true; + } + + errno = 0; +#if defined(HAVE_LONG_INT_64) + val = strtoul(value, &endptr, 0); +#elif defined(HAVE_LONG_LONG_INT_64) + val = strtoull(value, &endptr, 0); +#else + val = strtoul(value, &endptr, 0); +#endif + if (endptr == value || (*endptr && flags == 0)) + return false; + + if (errno == ERANGE) + return false; + + if (!parse_unit_u(endptr, flags, val, &val)) + return false; + + *result = val; + + return true; +} + +/* + * Convert ISO-8601 format string to time_t value. + * + * If utc_default is true, then if timezone offset isn't specified tz will be + * +00:00. + * + * TODO: '0' converted into '2000-01-01 00:00:00'. Example: set-backup --expire-time=0 + */ +bool +parse_time(const char *value, time_t *result, bool utc_default) +{ + size_t len; + int fields_num, + tz = 0, + i; + bool tz_set = false; + char *tmp; + struct tm tm; + char junk[2]; + + /* tmp = replace( value, !isalnum, ' ' ) */ + tmp = (char *)pgut_malloc(strlen(value) + + 1); + len = 0; + fields_num = 1; + + while (*value) + { + if (IsAlnum(*value)) + { + tmp[len++] = *value; + value++; + } + else if (fields_num < 6) + { + fields_num++; + tmp[len++] = ' '; + value++; + } + /* timezone field is 7th */ + else if ((*value == '-' || *value == '+') && fields_num == 6) + { + int hr, + min, + sec = 0; + char *cp; + + errno = 0; + hr = strtol(value + 1, &cp, 10); + if ((value + 1) == cp || errno == ERANGE) + return false; + + /* explicit delimiter? */ + if (*cp == ':') + { + errno = 0; + min = strtol(cp + 1, &cp, 10); + if (errno == ERANGE) + return false; + if (*cp == ':') + { + errno = 0; + sec = strtol(cp + 1, &cp, 10); + if (errno == ERANGE) + return false; + } + } + /* otherwise, might have run things together... */ + else if (*cp == '\0' && strlen(value) > 3) + { + min = hr % 100; + hr = hr / 100; + /* we could, but don't, support a run-together hhmmss format */ + } + else + min = 0; + + /* Range-check the values; see notes in datatype/timestamp.h */ + if (hr < 0 || hr > MAX_TZDISP_HOUR) + return false; + if (min < 0 || min >= MINS_PER_HOUR) + return false; + if (sec < 0 || sec >= SECS_PER_MINUTE) + return false; + + tz = (hr * MINS_PER_HOUR + min) * SECS_PER_MINUTE + sec; + if (*value == '-') + tz = -tz; + + tz_set = true; + + fields_num++; + value = cp; + } + /* wrong format */ + else if (!IsSpace(*value)) + return false; + else + value++; + } + tmp[len] = '\0'; + + /* parse for "YYYY-MM-DD HH:MI:SS" */ + memset(&tm, 0, sizeof(tm)); + tm.tm_year = 0; /* tm_year is year - 1900 */ + tm.tm_mon = 0; /* tm_mon is 0 - 11 */ + tm.tm_mday = 1; /* tm_mday is 1 - 31 */ + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + i = sscanf(tmp, "%04d %02d %02d %02d %02d %02d%1s", + &tm.tm_year, &tm.tm_mon, &tm.tm_mday, + &tm.tm_hour, &tm.tm_min, &tm.tm_sec, junk); + free(tmp); + + if (i < 3 || i > 6) + return false; + + /* adjust year */ + if (tm.tm_year < 100) + tm.tm_year += 2000 - 1900; + else if (tm.tm_year >= 1900) + tm.tm_year -= 1900; + + /* adjust month */ + if (i > 1) + tm.tm_mon -= 1; + + /* determine whether Daylight Saving Time is in effect */ + tm.tm_isdst = -1; + + *result = mktime(&tm); + + /* adjust time zone */ + if (tz_set || utc_default) + { + time_t ltime = time(NULL); + struct tm *ptm = gmtime(<ime); + time_t gmt = mktime(ptm); + time_t offset; + + /* UTC time */ + *result -= tz; + + /* Get local time */ + ptm = localtime(<ime); + offset = ltime - gmt + (ptm->tm_isdst ? 3600 : 0); + + *result += offset; + } + + return true; +} + +/* + * Try to parse value as an integer. The accepted formats are the + * usual decimal, octal, or hexadecimal formats, optionally followed by + * a unit name if "flags" indicates a unit is allowed. + * + * If the string parses okay, return true, else false. + * If okay and result is not NULL, return the value in *result. + * If not okay and hintmsg is not NULL, *hintmsg is set to a suitable + * HINT message, or NULL if no hint provided. + */ +bool +parse_int(const char *value, int *result, int flags, const char **hintmsg) +{ + int64 val; + char *endptr; + + /* To suppress compiler warnings, always set output params */ + if (result) + *result = 0; + if (hintmsg) + *hintmsg = NULL; + + /* We assume here that int64 is at least as wide as long */ + errno = 0; + val = strtol(value, &endptr, 0); + + if (endptr == value) + return false; /* no HINT for integer syntax error */ + + if (errno == ERANGE || val != (int64) ((int32) val)) + { + if (hintmsg) + *hintmsg = "Value exceeds integer range."; + return false; + } + + /* allow whitespace between integer and unit */ + while (isspace((unsigned char) *endptr)) + endptr++; + + /* Handle possible unit */ + if (*endptr != '\0') + { + char unit[MAX_UNIT_LEN + 1]; + int unitlen; + bool converted = false; + + if ((flags & OPTION_UNIT) == 0) + return false; /* this setting does not accept a unit */ + + unitlen = 0; + while (*endptr != '\0' && !isspace((unsigned char) *endptr) && + unitlen < MAX_UNIT_LEN) + unit[unitlen++] = *(endptr++); + unit[unitlen] = '\0'; + /* allow whitespace after unit */ + while (isspace((unsigned char) *endptr)) + endptr++; + + if (*endptr == '\0') + converted = convert_to_base_unit(val, unit, (flags & OPTION_UNIT), + &val); + if (!converted) + { + /* invalid unit, or garbage after the unit; set hint and fail. */ + if (hintmsg) + { + if (flags & OPTION_UNIT_MEMORY) + *hintmsg = memory_units_hint; + else + *hintmsg = time_units_hint; + } + return false; + } + + /* Check for overflow due to units conversion */ + if (val != (int64) ((int32) val)) + { + if (hintmsg) + *hintmsg = "Value exceeds integer range."; + return false; + } + } + + if (result) + *result = (int) val; + return true; +} + +bool +parse_lsn(const char *value, XLogRecPtr *result) +{ + uint32 xlogid; + uint32 xrecoff; + int len1; + int len2; + + len1 = strspn(value, "0123456789abcdefABCDEF"); + if (len1 < 1 || len1 > MAXPG_LSNCOMPONENT || value[len1] != '/') + elog(ERROR, "invalid LSN \"%s\"", value); + len2 = strspn(value + len1 + 1, "0123456789abcdefABCDEF"); + if (len2 < 1 || len2 > MAXPG_LSNCOMPONENT || value[len1 + 1 + len2] != '\0') + elog(ERROR, "invalid LSN \"%s\"", value); + + if (sscanf(value, "%X/%X", &xlogid, &xrecoff) == 2) + *result = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; + else + { + elog(ERROR, "invalid LSN \"%s\"", value); + return false; + } + + return true; +} + +/* + * Convert a value in some base unit to a human-friendly unit. The output + * unit is chosen so that it's the greatest unit that can represent the value + * without loss. For example, if the base unit is GUC_UNIT_KB, 1024 is + * converted to 1 MB, but 1025 is represented as 1025 kB. + */ +void +convert_from_base_unit(int64 base_value, int base_unit, + int64 *value, const char **unit) +{ + const unit_conversion *table; + int i; + + *unit = NULL; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit) + { + /* + * Accept the first conversion that divides the value evenly. We + * assume that the conversions for each base unit are ordered from + * greatest unit to the smallest! + */ + if (table[i].multiplier < 0) + { + /* Check for integer overflow first */ + if (base_value > PG_INT64_MAX / (-table[i].multiplier)) + continue; + + *value = base_value * (-table[i].multiplier); + *unit = table[i].unit; + break; + } + else if (base_value % table[i].multiplier == 0) + { + *value = base_value / table[i].multiplier; + *unit = table[i].unit; + break; + } + } + } + + Assert(*unit != NULL); +} + +/* + * Unsigned variant of convert_from_base_unit() + */ +void +convert_from_base_unit_u(uint64 base_value, int base_unit, + uint64 *value, const char **unit) +{ + const unit_conversion *table; + int i; + + *unit = NULL; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit) + { + /* + * Accept the first conversion that divides the value evenly. We + * assume that the conversions for each base unit are ordered from + * greatest unit to the smallest! + */ + if (table[i].multiplier < 0) + { + /* Check for integer overflow first */ + if (base_value > PG_UINT64_MAX / (-table[i].multiplier)) + continue; + + *value = base_value * (-table[i].multiplier); + *unit = table[i].unit; + break; + } + else if (base_value % table[i].multiplier == 0) + { + *value = base_value / table[i].multiplier; + *unit = table[i].unit; + break; + } + } + } + + Assert(*unit != NULL); +} + +/* + * Convert time_t value to ISO-8601 format string. Always set timezone offset. + */ +void +time2iso(char *buf, size_t len, time_t time) +{ + struct tm *ptm = gmtime(&time); + time_t gmt = mktime(ptm); + time_t offset; + char *ptr = buf; + + ptm = localtime(&time); + offset = time - gmt + (ptm->tm_isdst ? 3600 : 0); + + strftime(ptr, len, "%Y-%m-%d %H:%M:%S", ptm); + + ptr += strlen(ptr); + snprintf(ptr, len - (ptr - buf), "%c%02d", + (offset >= 0) ? '+' : '-', + abs((int) offset) / SECS_PER_HOUR); + + if (abs((int) offset) % SECS_PER_HOUR != 0) + { + ptr += strlen(ptr); + snprintf(ptr, len - (ptr - buf), ":%02d", + abs((int) offset % SECS_PER_HOUR) / SECS_PER_MINUTE); + } +} diff --git a/src/bin/pg_probackup/configuration.h b/src/bin/pg_probackup/configuration.h new file mode 100644 index 000000000..44285e49b --- /dev/null +++ b/src/bin/pg_probackup/configuration.h @@ -0,0 +1,107 @@ +/*------------------------------------------------------------------------- + * + * configuration.h: - prototypes of functions and structures for + * configuration. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2018-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef CONFIGURATION_H +#define CONFIGURATION_H + +#include "postgres_fe.h" +#include "access/xlogdefs.h" + +#define INFINITE_STR "INFINITE" + +typedef enum OptionSource +{ + SOURCE_DEFAULT, + SOURCE_FILE_STRICT, + SOURCE_CMD_STRICT, + SOURCE_ENV, + SOURCE_FILE, + SOURCE_CMD, + SOURCE_CONST +} OptionSource; + +typedef struct ConfigOption ConfigOption; + +typedef void (*option_assign_fn) (ConfigOption *opt, const char *arg); +/* Returns allocated string value */ +typedef char *(*option_get_fn) (ConfigOption *opt); + +/* + * type: + * b: bool (true) + * B: bool (false) + * f: option_fn + * i: 32bit signed integer + * u: 32bit unsigned integer + * I: 64bit signed integer + * U: 64bit unsigned integer + * s: string + * t: time_t + */ +struct ConfigOption +{ + char type; + uint8 sname; /* short name */ + const char *lname; /* long name */ + void *var; /* pointer to variable */ + OptionSource allowed; /* allowed source */ + OptionSource source; /* actual source */ + const char *group; /* option group name */ + int flags; /* option unit */ + option_get_fn get_value; /* function to get the value as a string, + should return allocated string*/ +}; + +/* + * bit values in "flags" of an option + */ +#define OPTION_UNIT_KB 0x1000 /* value is in kilobytes */ +#define OPTION_UNIT_BLOCKS 0x2000 /* value is in blocks */ +#define OPTION_UNIT_XBLOCKS 0x3000 /* value is in xlog blocks */ +#define OPTION_UNIT_XSEGS 0x4000 /* value is in xlog segments */ +#define OPTION_UNIT_MEMORY 0xF000 /* mask for size-related units */ + +#define OPTION_UNIT_MS 0x10000 /* value is in milliseconds */ +#define OPTION_UNIT_S 0x20000 /* value is in seconds */ +#define OPTION_UNIT_MIN 0x30000 /* value is in minutes */ +#define OPTION_UNIT_TIME 0xF0000 /* mask for time-related units */ + +#define OPTION_UNIT (OPTION_UNIT_MEMORY | OPTION_UNIT_TIME) + +extern int config_get_opt(int argc, char **argv, ConfigOption cmd_options[], + ConfigOption options[]); +extern int config_read_opt(const char *path, ConfigOption options[], int elevel, + bool strict, bool missing_ok); +extern void config_get_opt_env(ConfigOption options[]); +extern void config_set_opt(ConfigOption options[], void *var, + OptionSource source); + +extern char *option_get_value(ConfigOption *opt); + +extern bool parse_bool(const char *value, bool *result); +extern bool parse_bool_with_len(const char *value, size_t len, bool *result); +extern bool parse_int32(const char *value, int32 *result, int flags); +extern bool parse_uint32(const char *value, uint32 *result, int flags); +extern bool parse_int64(const char *value, int64 *result, int flags); +extern bool parse_uint64(const char *value, uint64 *result, int flags); +extern bool parse_time(const char *value, time_t *result, bool utc_default); +extern bool parse_int(const char *value, int *result, int flags, + const char **hintmsg); +extern bool parse_lsn(const char *value, XLogRecPtr *result); + +extern void time2iso(char *buf, size_t len, time_t time); + +extern void convert_from_base_unit(int64 base_value, int base_unit, + int64 *value, const char **unit); +extern void convert_from_base_unit_u(uint64 base_value, int base_unit, + uint64 *value, const char **unit); + +#endif /* CONFIGURATION_H */ diff --git a/src/bin/pg_probackup/configure.cpp b/src/bin/pg_probackup/configure.cpp new file mode 100644 index 000000000..220f58e9b --- /dev/null +++ b/src/bin/pg_probackup/configure.cpp @@ -0,0 +1,699 @@ +/*------------------------------------------------------------------------- + * + * configure.c: - manage backup catalog. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include + +#include "configuration.h" +#include "json.h" + + +static void assign_log_level_console(ConfigOption *opt, const char *arg); +static void assign_log_level_file(ConfigOption *opt, const char *arg); +static void assign_compress_alg(ConfigOption *opt, const char *arg); + +static char *get_log_level_console(ConfigOption *opt); +static char *get_log_level_file(ConfigOption *opt); +static char *get_compress_alg(ConfigOption *opt); + +static void show_configure_start(void); +static void show_configure_end(void); + +static void show_configure_plain(ConfigOption *opt); +static void show_configure_json(ConfigOption *opt); + +#define RETENTION_REDUNDANCY_DEFAULT 0 +#define RETENTION_WINDOW_DEFAULT 0 + +#define OPTION_INSTANCE_GROUP "Backup instance information" +#define OPTION_CONN_GROUP "Connection parameters" +#define OPTION_ARCHIVE_GROUP "Archive parameters" +#define OPTION_LOG_GROUP "Logging parameters" +#define OPTION_RETENTION_GROUP "Retention parameters" +#define OPTION_COMPRESS_GROUP "Compression parameters" +#define OPTION_REMOTE_GROUP "Remote access parameters" + +/* + * Short name should be non-printable ASCII character. + */ +ConfigOption instance_options[] = +{ + /* Instance options */ + { + 's', 'D', "pgdata", + &instance_config.pgdata, SOURCE_CMD, (OptionSource)0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + { + 'U', 200, "system-identifier", + &instance_config.system_identifier, SOURCE_FILE_STRICT, (OptionSource)0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, +#if PG_VERSION_NUM >= 110000 + { + 'u', 201, "xlog-seg-size", + &instance_config.xlog_seg_size, SOURCE_FILE_STRICT, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, +#endif + { + 's', 'E', "external-dirs", + &instance_config.external_dir_str, SOURCE_CMD, (OptionSource)0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + /* Connection options */ + { + 's', 'd', "pgdatabase", + &instance_config.conn_opt.pgdatabase, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'h', "pghost", + &instance_config.conn_opt.pghost, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'p', "pgport", + &instance_config.conn_opt.pgport, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'U', "pguser", + &instance_config.conn_opt.pguser, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + /* Archive options */ + { + 'u', 207, "archive-timeout", + &instance_config.archive_timeout, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_ARCHIVE_GROUP, OPTION_UNIT_S, option_get_value + }, + { + 's', 208, "archive-host", + &instance_config.archive.host, SOURCE_CMD, (OptionSource)0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 209, "archive-port", + &instance_config.archive.port, SOURCE_CMD, (OptionSource)0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 210, "archive-user", + &instance_config.archive.user, SOURCE_CMD, (OptionSource)0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 211, "restore-command", + &instance_config.restore_command, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + /* Logging options */ + { + 'f', 212, "log-level-console", + (void *)assign_log_level_console, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, get_log_level_console + }, + { + 'f', 213, "log-level-file", + (void *)assign_log_level_file, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, get_log_level_file + }, + { + 's', 214, "log-filename", + &instance_config.logger.log_filename, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 215, "error-log-filename", + &instance_config.logger.error_log_filename, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 216, "log-directory", + &instance_config.logger.log_directory, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 'U', 217, "log-rotation-size", + &instance_config.logger.log_rotation_size, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_KB, option_get_value + }, + { + 'U', 218, "log-rotation-age", + &instance_config.logger.log_rotation_age, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_MS, option_get_value + }, + /* Retention options */ + { + 'u', 219, "retention-redundancy", + &instance_config.retention_redundancy, SOURCE_CMD, (OptionSource)0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 220, "retention-window", + &instance_config.retention_window, SOURCE_CMD, (OptionSource)0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 221, "wal-depth", + &instance_config.wal_depth, SOURCE_CMD, (OptionSource)0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + /* Compression options */ + { + 'f', 222, "compress-algorithm", + (void *)assign_compress_alg, SOURCE_CMD, (OptionSource)0, + OPTION_COMPRESS_GROUP, 0, get_compress_alg + }, + { + 'u', 223, "compress-level", + &instance_config.compress_level, SOURCE_CMD, (OptionSource)0, + OPTION_COMPRESS_GROUP, 0, option_get_value + }, + /* Remote backup options */ + { + 's', 224, "remote-proto", + &instance_config.remote.proto, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 225, "remote-host", + &instance_config.remote.host, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 226, "remote-port", + &instance_config.remote.port, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 227, "remote-path", + &instance_config.remote.path, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 228, "remote-user", + &instance_config.remote.user, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 229, "ssh-options", + &instance_config.remote.ssh_options, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 230, "ssh-config", + &instance_config.remote.ssh_config, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { 0 } +}; + +/* An instance configuration with default options */ +InstanceConfig instance_config; + +static PQExpBufferData show_buf; +static int32 json_level = 0; +static const char *current_group = NULL; + +/* + * Show configure options including default values. + */ +void +do_show_config(void) +{ + int i; + + show_configure_start(); + + for (i = 0; instance_options[i].type; i++) + { + if (show_format == SHOW_PLAIN) + show_configure_plain(&instance_options[i]); + else + show_configure_json(&instance_options[i]); + } + + show_configure_end(); +} + +/* + * Save configure options into BACKUP_CATALOG_CONF_FILE. Do not save default + * values into the file. + */ +void +do_set_config(bool missing_ok) +{ + char path[MAXPGPATH]; + char path_temp[MAXPGPATH]; + FILE *fp; + int i; + + join_path_components(path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); + snprintf(path_temp, sizeof(path_temp), "%s.tmp", path); + + if (!missing_ok && !fileExists(path, FIO_LOCAL_HOST)) + elog(ERROR, "Configuration file \"%s\" doesn't exist", path); + + fp = fopen(path_temp, "wt"); + if (fp == NULL) + elog(ERROR, "Cannot create configuration file \"%s\": %s", + BACKUP_CATALOG_CONF_FILE, strerror(errno)); + + current_group = NULL; + + for (i = 0; instance_options[i].type; i++) + { + ConfigOption *opt = &instance_options[i]; + char *value; + + /* Save only options from command line */ + if (opt->source != SOURCE_CMD && + /* ...or options from the previous configure file */ + opt->source != SOURCE_FILE && opt->source != SOURCE_FILE_STRICT) + continue; + + value = opt->get_value(opt); + if (value == NULL) + continue; + + if (current_group == NULL || strcmp(opt->group, current_group) != 0) + { + current_group = opt->group; + fprintf(fp, "# %s\n", current_group); + } + + if (strchr(value, ' ')) + fprintf(fp, "%s = '%s'\n", opt->lname, value); + else + fprintf(fp, "%s = %s\n", opt->lname, value); + pfree(value); + } + + fclose(fp); + + if (rename(path_temp, path) < 0) + { + int errno_temp = errno; + unlink(path_temp); + elog(ERROR, "Cannot rename configuration file \"%s\" to \"%s\": %s", + path_temp, path, strerror(errno_temp)); + } +} + +void +init_config(InstanceConfig *config, const char *instance_name) +{ + MemSet(config, 0, sizeof(InstanceConfig)); + + config->name = pgut_strdup(instance_name); + + /* + * Starting from PostgreSQL 11 WAL segment size may vary. Prior to + * PostgreSQL 10 xlog_seg_size is equal to XLOG_SEG_SIZE. + */ +#if PG_VERSION_NUM >= 110000 + config->xlog_seg_size = 0; +#else + config->xlog_seg_size = XLOG_SEG_SIZE; +#endif + + config->archive_timeout = ARCHIVE_TIMEOUT_DEFAULT; + + /* Copy logger defaults */ + config->logger = logger_config; + + config->retention_redundancy = RETENTION_REDUNDANCY_DEFAULT; + config->retention_window = RETENTION_WINDOW_DEFAULT; + config->wal_depth = 0; + + config->compress_alg = COMPRESS_ALG_DEFAULT; + config->compress_level = COMPRESS_LEVEL_DEFAULT; + + config->remote.proto = (char*)"ssh"; +} + +/* + * read instance config from file + */ +InstanceConfig * +readInstanceConfigFile(const char *instance_name) +{ + char path[MAXPGPATH]; + InstanceConfig *instance = pgut_new(InstanceConfig); + char *log_level_console = NULL; + char *log_level_file = NULL; + char *compress_alg = NULL; + int parsed_options; + + ConfigOption instance_options[] = + { + /* Instance options */ + { + 's', 'D', "pgdata", + &instance->pgdata, SOURCE_CMD, (OptionSource)0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + { + 'U', 200, "system-identifier", + &instance->system_identifier, SOURCE_FILE_STRICT, (OptionSource)0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + #if PG_VERSION_NUM >= 110000 + { + 'u', 201, "xlog-seg-size", + &instance->xlog_seg_size, SOURCE_FILE_STRICT, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + #endif + { + 's', 'E', "external-dirs", + &instance->external_dir_str, SOURCE_CMD, (OptionSource)0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + /* Connection options */ + { + 's', 'd', "pgdatabase", + &instance->conn_opt.pgdatabase, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'h', "pghost", + &instance->conn_opt.pghost, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'p', "pgport", + &instance->conn_opt.pgport, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'U', "pguser", + &instance->conn_opt.pguser, SOURCE_CMD, (OptionSource)0, + OPTION_CONN_GROUP, 0, option_get_value + }, + /* Archive options */ + { + 'u', 207, "archive-timeout", + &instance->archive_timeout, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_ARCHIVE_GROUP, OPTION_UNIT_S, option_get_value + }, + { + 's', 208, "archive-host", + &instance_config.archive.host, SOURCE_CMD, (OptionSource)0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 209, "archive-port", + &instance_config.archive.port, SOURCE_CMD, (OptionSource)0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 210, "archive-user", + &instance_config.archive.user, SOURCE_CMD, (OptionSource)0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 211, "restore-command", + &instance->restore_command, SOURCE_CMD, (OptionSource)0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + + /* Instance options */ + { + 's', 'D', "pgdata", + &instance->pgdata, SOURCE_CMD, (OptionSource)0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + + /* Logging options */ + { + 's', 212, "log-level-console", + &log_level_console, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 213, "log-level-file", + &log_level_file, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 214, "log-filename", + &instance->logger.log_filename, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 215, "error-log-filename", + &instance->logger.error_log_filename, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 216, "log-directory", + &instance->logger.log_directory, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 'U', 217, "log-rotation-size", + &instance->logger.log_rotation_size, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_KB, option_get_value + }, + { + 'U', 218, "log-rotation-age", + &instance->logger.log_rotation_age, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_MS, option_get_value + }, + /* Retention options */ + { + 'u', 219, "retention-redundancy", + &instance->retention_redundancy, SOURCE_CMD, (OptionSource)0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 220, "retention-window", + &instance->retention_window, SOURCE_CMD, (OptionSource)0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 221, "wal-depth", + &instance->wal_depth, SOURCE_CMD, (OptionSource)0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + /* Compression options */ + { + 's', 222, "compress-algorithm", + &compress_alg, SOURCE_CMD, (OptionSource)0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 'u', 223, "compress-level", + &instance->compress_level, SOURCE_CMD, (OptionSource)0, + OPTION_COMPRESS_GROUP, 0, option_get_value + }, + /* Remote backup options */ + { + 's', 224, "remote-proto", + &instance->remote.proto, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 225, "remote-host", + &instance->remote.host, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 226, "remote-port", + &instance->remote.port, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 227, "remote-path", + &instance->remote.path, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 228, "remote-user", + &instance->remote.user, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 229, "ssh-options", + &instance->remote.ssh_options, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 230, "ssh-config", + &instance->remote.ssh_config, SOURCE_CMD, (OptionSource)0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { 0 } + }; + + + init_config(instance, instance_name); + + sprintf(instance->backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + canonicalize_path(instance->backup_instance_path); + + sprintf(instance->arclog_path, "%s/%s/%s", + backup_path, "wal", instance_name); + canonicalize_path(instance->arclog_path); + + join_path_components(path, instance->backup_instance_path, + BACKUP_CATALOG_CONF_FILE); + + if (fio_access(path, F_OK, FIO_BACKUP_HOST) != 0) + { + elog(WARNING, "Control file \"%s\" doesn't exist", path); + pfree(instance); + return NULL; + } + + parsed_options = config_read_opt(path, instance_options, WARNING, true, true); + + if (parsed_options == 0) + { + elog(WARNING, "Control file \"%s\" is empty", path); + pfree(instance); + return NULL; + } + + if (log_level_console) + instance->logger.log_level_console = parse_log_level(log_level_console); + + if (log_level_file) + instance->logger.log_level_file = parse_log_level(log_level_file); + + if (compress_alg) + instance->compress_alg = parse_compress_alg(compress_alg); + +#if PG_VERSION_NUM >= 110000 + /* If for some reason xlog-seg-size is missing, then set it to 16MB */ + if (!instance->xlog_seg_size) + instance->xlog_seg_size = DEFAULT_XLOG_SEG_SIZE; +#endif + + return instance; + +} + +static void +assign_log_level_console(ConfigOption *opt, const char *arg) +{ + instance_config.logger.log_level_console = parse_log_level(arg); +} + +static void +assign_log_level_file(ConfigOption *opt, const char *arg) +{ + instance_config.logger.log_level_file = parse_log_level(arg); +} + +static void +assign_compress_alg(ConfigOption *opt, const char *arg) +{ + instance_config.compress_alg = parse_compress_alg(arg); +} + +static char * +get_log_level_console(ConfigOption *opt) +{ + return gs_pstrdup(deparse_log_level(instance_config.logger.log_level_console)); +} + +static char * +get_log_level_file(ConfigOption *opt) +{ + return gs_pstrdup(deparse_log_level(instance_config.logger.log_level_file)); +} + +static char * +get_compress_alg(ConfigOption *opt) +{ + return gs_pstrdup(deparse_compress_alg(instance_config.compress_alg)); +} + +/* + * Initialize configure visualization. + */ +static void +show_configure_start(void) +{ + initPQExpBuffer(&show_buf); + + if (show_format == SHOW_PLAIN) + current_group = NULL; + else + { + json_level = 0; + json_add(&show_buf, JT_BEGIN_OBJECT, &json_level); + } +} + +/* + * Finalize configure visualization. + */ +static void +show_configure_end(void) +{ + if (show_format == SHOW_PLAIN) + current_group = NULL; + else + { + json_add(&show_buf, JT_END_OBJECT, &json_level); + appendPQExpBufferChar(&show_buf, '\n'); + } + + fputs(show_buf.data, stdout); + termPQExpBuffer(&show_buf); +} + +/* + * Plain output. + */ + +static void +show_configure_plain(ConfigOption *opt) +{ + char *value; + + value = opt->get_value(opt); + if (value == NULL) + return; + + if (current_group == NULL || strcmp(opt->group, current_group) != 0) + { + current_group = opt->group; + appendPQExpBuffer(&show_buf, "# %s\n", current_group); + } + + appendPQExpBuffer(&show_buf, "%s = %s\n", opt->lname, value); + pfree(value); +} + +/* + * Json output. + */ + +static void +show_configure_json(ConfigOption *opt) +{ + char *value; + + value = opt->get_value(opt); + if (value == NULL) + return; + + json_add_value(&show_buf, opt->lname, value, json_level, + true); + pfree(value); +} diff --git a/src/bin/pg_probackup/data.cpp b/src/bin/pg_probackup/data.cpp new file mode 100644 index 000000000..f1f46af3a --- /dev/null +++ b/src/bin/pg_probackup/data.cpp @@ -0,0 +1,2239 @@ +/*------------------------------------------------------------------------- + * + * data.c: utils to parse and backup data pages + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include "storage/checksum.h" +#include "storage/checksum_impl.h" +#include "pg_lzcompress.h" +#include "file.h" + +#include +#include + +#ifdef HAVE_LIBZ +#include +#endif + +#include "thread.h" +#include "common/fe_memutils.h" + +/* Union to ease operations on relation pages */ +typedef struct DataPage +{ + BackupPageHeader bph; + char data[BLCKSZ]; +} DataPage; + +static bool get_page_header(FILE *in, const char *fullpath, BackupPageHeader* bph, + pg_crc32 *crc, bool use_crc32c); + +#ifdef HAVE_LIBZ +/* Implementation of zlib compression method */ +static int32 +zlib_compress(void *dst, size_t dst_size, void const *src, size_t src_size, + int level) +{ + uLongf compressed_size = dst_size; + int rc = compress2((Bytef *)dst, &compressed_size, (const Bytef*)src, src_size, + level); + + return rc == Z_OK ? compressed_size : rc; +} + +/* Implementation of zlib compression method */ +static int32 +zlib_decompress(void *dst, size_t dst_size, void const *src, size_t src_size) +{ + uLongf dest_len = dst_size; + int rc = uncompress((Bytef *)dst, &dest_len, (const Bytef*)src, src_size); + + return rc == Z_OK ? dest_len : rc; +} +#endif + +/* + * Compresses source into dest using algorithm. Returns the number of bytes + * written in the destination buffer, or -1 if compression fails. + */ +int32 +do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, + CompressAlg alg, int level, const char **errormsg) +{ + switch (alg) + { + case NONE_COMPRESS: + case NOT_DEFINED_COMPRESS: + return -1; +#ifdef HAVE_LIBZ + case ZLIB_COMPRESS: + { + int32 ret; + ret = zlib_compress(dst, dst_size, (char*)src, src_size, level); + if (ret < Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } +#endif + case PGLZ_COMPRESS: + return pglz_compress((const char*)src, src_size, (char*)dst, PGLZ_strategy_always); + } + + return -1; +} + +/* + * Decompresses source into dest using algorithm. Returns the number of bytes + * decompressed in the destination buffer, or -1 if decompression fails. + */ +int32 +do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, + CompressAlg alg, const char **errormsg) +{ + switch (alg) + { + case NONE_COMPRESS: + case NOT_DEFINED_COMPRESS: + if (errormsg) + *errormsg = "Invalid compression algorithm"; + return -1; +#ifdef HAVE_LIBZ + case ZLIB_COMPRESS: + { + int32 ret; + ret = zlib_decompress(dst, dst_size, src, src_size); + if (ret < Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } +#endif + case PGLZ_COMPRESS: + return pglz_decompress((const char*)src, src_size, (char*)dst, dst_size, true); + } + + return -1; +} + + +#define ZLIB_MAGIC 0x78 + +/* + * Before version 2.0.23 there was a bug in pro_backup that pages which compressed + * size is exactly the same as original size are not treated as compressed. + * This check tries to detect and decompress such pages. + * There is no 100% criteria to determine whether page is compressed or not. + * But at least we will do this check only for pages which will no pass validation step. + */ +static bool +page_may_be_compressed(Page page, CompressAlg alg, uint32 backup_version) +{ + PageHeader phdr; + + phdr = (PageHeader) page; + + /* First check if page header is valid (it seems to be fast enough check) */ + if (!(PageGetPageSize(phdr) == BLCKSZ && + // PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION && + (phdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && + phdr->pd_lower >= SizeOfPageHeaderData && + phdr->pd_lower <= phdr->pd_upper && + phdr->pd_upper <= phdr->pd_special && + phdr->pd_special <= BLCKSZ && + phdr->pd_special == MAXALIGN(phdr->pd_special))) + { + /* ... end only if it is invalid, then do more checks */ + if (backup_version >= 20023) + { + /* Versions 2.0.23 and higher don't have such bug */ + return false; + } +#ifdef HAVE_LIBZ + /* For zlib we can check page magic: + * https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like + */ + if (alg == ZLIB_COMPRESS && *(char*)page != ZLIB_MAGIC) + { + return false; + } +#endif + /* otherwise let's try to decompress the page */ + return true; + } + return false; +} + +/* Verify page's header */ +bool +parse_page(Page page, XLogRecPtr *lsn) +{ + PageHeader phdr = (PageHeader) page; + + /* Get lsn from page header */ + *lsn = PageXLogRecPtrGet(phdr->pd_lsn); + + if (PageGetPageSize(phdr) == BLCKSZ && + // PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION && + (phdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && + phdr->pd_lower >= SizeOfPageHeaderData && + phdr->pd_lower <= phdr->pd_upper && + phdr->pd_upper <= phdr->pd_special && + phdr->pd_special <= BLCKSZ && + phdr->pd_special == MAXALIGN(phdr->pd_special)) + return true; + + return false; +} + +/* We know that header is invalid, store specific + * details in errormsg. + */ +void +get_header_errormsg(Page page, char **errormsg) +{ + PageHeader phdr = (PageHeader) page; + *errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + + if (PageGetPageSize(phdr) != BLCKSZ) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "page size %lu is not equal to block size %u", + PageGetPageSize(phdr), BLCKSZ); + + else if (phdr->pd_lower < SizeOfPageHeaderData) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_lower %i is less than page header size %lu", + phdr->pd_lower, SizeOfPageHeaderData); + + else if (phdr->pd_lower > phdr->pd_upper) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_lower %u is greater than pd_upper %u", + phdr->pd_lower, phdr->pd_upper); + + else if (phdr->pd_upper > phdr->pd_special) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_upper %u is greater than pd_special %u", + phdr->pd_upper, phdr->pd_special); + + else if (phdr->pd_special > BLCKSZ) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_special %u is greater than block size %u", + phdr->pd_special, BLCKSZ); + + else if (phdr->pd_special != MAXALIGN(phdr->pd_special)) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_special %i is misaligned, expected %lu", + phdr->pd_special, MAXALIGN(phdr->pd_special)); + + else if (phdr->pd_flags & ~PD_VALID_FLAG_BITS) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_flags mask contain illegal bits"); + + else + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid"); +} + +/* We know that checksumms are mismatched, store specific + * details in errormsg. + */ +void +get_checksum_errormsg(Page page, char **errormsg, BlockNumber absolute_blkno) +{ + PageHeader phdr = (PageHeader) page; + *errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + + snprintf(*errormsg, ERRMSG_MAX_LEN, + "page verification failed, " + "calculated checksum %u but expected %u", + phdr->pd_checksum, + /*pg_checksum_page(page, absolute_blkno)*/0); +} + +/* + * Retrieves a page taking the backup mode into account + * and writes it into argument "page". Argument "page" + * should be a pointer to allocated BLCKSZ of bytes. + * + * Prints appropriate warnings/errors/etc into log. + * Returns: + * PageIsOk(0) if page was successfully retrieved + * PageIsTruncated(-1) if the page was truncated + * SkipCurrentPage(-2) if we need to skip this page, + * only used for DELTA backup + * PageIsCorrupted(-3) if the page checksum mismatch + * or header corruption, + * only used for checkdb + * TODO: probably we should always + * return it to the caller + */ +static int32 +prepare_page(ConnectionArgs *conn_arg, + pgFile *file, XLogRecPtr prev_backup_start_lsn, + BlockNumber blknum, FILE *in, + BackupMode backup_mode, + Page page, bool strict, + uint32 checksum_version, + const char *from_fullpath, + PageState *page_st) +{ + int try_again = PAGE_READ_ATTEMPTS; + bool page_is_valid = false; + BlockNumber absolute_blknum = file->segno * RELSEG_SIZE + blknum; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during page reading"); + + /* + * Read the page and verify its header and checksum. + * Under high write load it's possible that we've read partly + * flushed page, so try several times before throwing an error. + */ + int rc = 0; + while (!page_is_valid && try_again--) + { + /* read the block */ + int read_len = fio_pread(in, page, blknum * BLCKSZ); + + /* The block could have been truncated. It is fine. */ + if (read_len == 0) + { + elog(VERBOSE, "Cannot read block %u of \"%s\": " + "block truncated", blknum, from_fullpath); + return PageIsTruncated; + } + else if (read_len < 0) + elog(ERROR, "Cannot read block %u of \"%s\": %s", + blknum, from_fullpath, strerror(errno)); + else if (read_len != BLCKSZ) + elog(WARNING, "Cannot read block %u of \"%s\": " + "read %i of %d, try again", + blknum, from_fullpath, read_len, BLCKSZ); + else + { + /* We have BLCKSZ of raw data, validate it */ + rc = validate_one_page(page, absolute_blknum, + InvalidXLogRecPtr, page_st, + checksum_version); + switch (rc) + { + case PAGE_IS_ZEROED: + elog(VERBOSE, "File: \"%s\" blknum %u, empty page", from_fullpath, blknum); + return PageIsOk; + + case PAGE_IS_VALID: + return PageIsOk; + + case PAGE_HEADER_IS_INVALID: + elog(VERBOSE, "File: \"%s\" blknum %u have wrong page header, try again", + from_fullpath, blknum); + break; + + case PAGE_CHECKSUM_MISMATCH: + elog(VERBOSE, "File: \"%s\" blknum %u have wrong checksum, try again", + from_fullpath, blknum); + break; + default: + Assert(false); + } + } + } + + /* + * If page is not valid after 100 attempts to read it + * throw an error. + */ + if (!page_is_valid) + { + int elevel = ERROR; + char *errormsg = NULL; + + /* Get the details of corruption */ + if (rc == PAGE_HEADER_IS_INVALID) + get_header_errormsg(page, &errormsg); + else if (rc == PAGE_CHECKSUM_MISMATCH) + get_checksum_errormsg(page, &errormsg, + file->segno * RELSEG_SIZE + blknum); + + /* Error out in case of merge or backup without ptrack support; + * issue warning in case of backup with ptrack support + */ + if (!strict) + elevel = WARNING; + + if (errormsg) + elog(elevel, "Corruption detected in file \"%s\", block %u: %s", + from_fullpath, blknum, errormsg); + else + elog(elevel, "Corruption detected in file \"%s\", block %u", + from_fullpath, blknum); + + pg_free(errormsg); + return PageIsCorrupted; + } + + if (!strict) + return PageIsOk; + + return PageIsOk; +} + +/* split this function in two: compress() and backup() */ +static int +compress_and_backup_page(pgFile *file, BlockNumber blknum, + FILE *in, FILE *out, pg_crc32 *crc, + int page_state, Page page, + CompressAlg calg, int clevel, + const char *from_fullpath, const char *to_fullpath) +{ + int compressed_size = 0; + size_t write_buffer_size = 0; + char write_buffer[BLCKSZ*2]; /* compressed page may require more space than uncompressed */ + BackupPageHeader* bph = (BackupPageHeader*)write_buffer; + const char *errormsg = NULL; + + /* Compress the page */ + compressed_size = do_compress(write_buffer + sizeof(BackupPageHeader), + sizeof(write_buffer) - sizeof(BackupPageHeader), + page, BLCKSZ, calg, clevel, + &errormsg); + /* Something went wrong and errormsg was assigned, throw a warning */ + if (compressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during compressing block %u of file \"%s\": %s", + blknum, from_fullpath, errormsg); + + file->compress_alg = calg; /* TODO: wtf? why here? */ + + /* compression didn`t worked */ + if (compressed_size <= 0 || compressed_size >= BLCKSZ) + { + /* Do not compress page */ + memcpy(write_buffer + sizeof(BackupPageHeader), page, BLCKSZ); + compressed_size = BLCKSZ; + } + bph->block = blknum; + bph->compressed_size = compressed_size; + write_buffer_size = compressed_size + sizeof(BackupPageHeader); + + /* Update CRC */ + COMP_FILE_CRC32(true, *crc, write_buffer, write_buffer_size); + + /* write data page */ + if (fio_fwrite(out, write_buffer, write_buffer_size) != write_buffer_size) + elog(ERROR, "File: \"%s\", cannot write at block %u: %s", + to_fullpath, blknum, strerror(errno)); + + file->write_size += write_buffer_size; + file->uncompressed_size += BLCKSZ; + + return compressed_size; +} + +/* + * Backup data file in the from_root directory to the to_root directory with + * same relative path. If prev_backup_start_lsn is not NULL, only pages with + * higher lsn will be copied. + * Not just copy file, but read it block by block (use bitmap in case of + * incremental backup), validate checksum, optionally compress and write to + * backup with special header. + */ +void +backup_data_file(ConnectionArgs* conn_arg, pgFile *file, + const char *from_fullpath, const char *to_fullpath, + XLogRecPtr prev_backup_start_lsn, BackupMode backup_mode, + CompressAlg calg, int clevel, uint32 checksum_version, + HeaderMap *hdr_map, bool is_merge) +{ + int rc; + bool use_pagemap; + char *errmsg = NULL; + BlockNumber err_blknum = 0; + /* page headers */ + BackupPageHeader2 *headers = NULL; + + /* sanity */ + if (file->size % BLCKSZ != 0) + elog(WARNING, "File: \"%s\", invalid file size %zu", from_fullpath, file->size); + + /* + * Compute expected number of blocks in the file. + * NOTE This is a normal situation, if the file size has changed + * since the moment we computed it. + */ + file->n_blocks = file->size/BLCKSZ; + + /* + * Skip unchanged file only if it exists in previous backup. + * This way we can correctly handle null-sized files which are + * not tracked by pagemap and thus always marked as unchanged. + */ + if (backup_mode == BACKUP_MODE_DIFF_PTRACK && + file->pagemap.bitmapsize == PageBitmapIsEmpty && + file->exists_in_prev && !file->pagemap_isabsent) + { + /* + * There are no changed blocks since last backup. We want to make + * incremental backup, so we should exit. + */ + file->write_size = BYTES_INVALID; + return; + } + + /* reset size summary */ + file->read_size = 0; + file->write_size = 0; + file->uncompressed_size = 0; + INIT_FILE_CRC32(true, file->crc); + + /* + * Read each page, verify checksum and write it to backup. + * If page map is empty or file is not present in previous backup + * backup all pages of the relation. + * + * In PTRACK 1.x there was a problem + * of data files with missing _ptrack map. + * Such files should be fully copied. + */ + + if (file->pagemap.bitmapsize == PageBitmapIsEmpty || + file->pagemap_isabsent || !file->exists_in_prev || + !file->pagemap.bitmap) + use_pagemap = false; + else + use_pagemap = true; + + /* Remote mode */ + if (fio_is_remote(FIO_DB_HOST)) + { + + rc = fio_send_pages(to_fullpath, from_fullpath, file, + InvalidXLogRecPtr, + calg, clevel, checksum_version, + /* send pagemap if any */ + use_pagemap, + /* variables for error reporting */ + &err_blknum, &errmsg, &headers); + } + else + { + /* TODO: stop handling errors internally */ + rc = send_pages(conn_arg, to_fullpath, from_fullpath, file, + /* send prev backup START_LSN */ + InvalidXLogRecPtr, + calg, clevel, checksum_version, use_pagemap, + &headers, backup_mode); + } + + /* check for errors */ + if (rc == FILE_MISSING) + { + elog(is_merge ? ERROR : LOG, "File not found: \"%s\"", from_fullpath); + file->write_size = FILE_NOT_FOUND; + goto cleanup; + } + + else if (rc == WRITE_FAILED) + elog(ERROR, "Cannot write block %u of \"%s\": %s", + err_blknum, to_fullpath, strerror(errno)); + + else if (rc == PAGE_CORRUPTION) + { + if (errmsg) + elog(ERROR, "Corruption detected in file \"%s\", block %u: %s", + from_fullpath, err_blknum, errmsg); + else + elog(ERROR, "Corruption detected in file \"%s\", block %u", + from_fullpath, err_blknum); + } + /* OPEN_FAILED and READ_FAILED */ + else if (rc == OPEN_FAILED) + { + if (errmsg) + elog(ERROR, "%s", errmsg); + else + elog(ERROR, "Cannot open file \"%s\"", from_fullpath); + } + else if (rc == READ_FAILED) + { + if (errmsg) + elog(ERROR, "%s", errmsg); + else + elog(ERROR, "Cannot read file \"%s\"", from_fullpath); + } + + file->read_size = rc * BLCKSZ; + + /* refresh n_blocks for FULL */ + if (backup_mode == BACKUP_MODE_FULL) + file->n_blocks = file->read_size / BLCKSZ; + + /* Determine that file didn`t changed in case of incremental backup */ + if (backup_mode != BACKUP_MODE_FULL && + file->exists_in_prev && + file->write_size == 0 && + file->n_blocks > 0) + { + file->write_size = BYTES_INVALID; + } + +cleanup: + + /* finish CRC calculation */ + FIN_FILE_CRC32(true, file->crc); + + /* dump page headers */ + write_page_headers(headers, file, hdr_map, is_merge); + + pg_free(errmsg); + pg_free(file->pagemap.bitmap); + pg_free(headers); +} + +/* + * Backup non data file + * We do not apply compression to this file. + * If file exists in previous backup, then compare checksums + * and make a decision about copying or skiping the file. + */ +void +backup_non_data_file(pgFile *file, pgFile *prev_file, + const char *from_fullpath, const char *to_fullpath, + BackupMode backup_mode, time_t parent_backup_time, + bool missing_ok) +{ + /* special treatment for global/pg_control */ + if (file->external_dir_num == 0 && strcmp(file->rel_path, XLOG_CONTROL_FILE) == 0) + { + copy_pgcontrol_file(from_fullpath, FIO_DB_HOST, + to_fullpath, FIO_BACKUP_HOST, file); + return; + } + + /* + * If nonedata file exists in previous backup + * and its mtime is less than parent backup start time ... */ + if (prev_file && file->exists_in_prev && + file->mtime <= parent_backup_time) + { + + file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST, false); + + /* ...and checksum is the same... */ + if (EQ_TRADITIONAL_CRC32(file->crc, prev_file->crc)) + { + file->write_size = BYTES_INVALID; + return; /* ...skip copying file. */ + } + } + + backup_non_data_file_internal(from_fullpath, FIO_DB_HOST, + to_fullpath, file, true); +} + +/* + * Iterate over parent backup chain and lookup given destination file in + * filelist of every chain member starting with FULL backup. + * Apply changed blocks to destination file from every backup in parent chain. + */ +size_t +restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, + const char *to_fullpath, bool use_bitmap, PageState *checksum_map, + XLogRecPtr shift_lsn, datapagemap_t *lsn_map, bool use_headers) +{ + size_t total_write_len = 0; + char *in_buf = (char *)pgut_malloc(STDIO_BUFSIZE); + int backup_seq = 0; + + /* + * FULL -> INCR -> DEST + * 2 1 0 + * Restore of backups of older versions cannot be optimized with bitmap + * because of n_blocks + */ + if (use_bitmap) + /* start with dest backup */ + backup_seq = 0; + else + /* start with full backup */ + backup_seq = parray_num(parent_chain) - 1; + +// for (i = parray_num(parent_chain) - 1; i >= 0; i--) +// for (i = 0; i < parray_num(parent_chain); i++) + while (backup_seq >= 0 && backup_seq < parray_num(parent_chain)) + { + char from_root[MAXPGPATH]; + char from_fullpath[MAXPGPATH]; + FILE *in = NULL; + + pgFile **res_file = NULL; + pgFile *tmp_file = NULL; + + /* page headers */ + BackupPageHeader2 *headers = NULL; + + pgBackup *backup = (pgBackup *) parray_get(parent_chain, backup_seq); + + if (use_bitmap) + backup_seq++; + else + backup_seq--; + + /* lookup file in intermediate backup */ + res_file = (pgFile_t **)parray_bsearch(backup->files, dest_file, pgFileCompareRelPathWithExternal); + tmp_file = (res_file) ? *res_file : NULL; + + /* Destination file is not exists yet at this moment */ + if (tmp_file == NULL) + continue; + + /* + * Skip file if it haven't changed since previous backup + * and thus was not backed up. + */ + if (tmp_file->write_size == BYTES_INVALID) + continue; + + /* If file was truncated in intermediate backup, + * it is ok not to truncate it now, because old blocks will be + * overwritten by new blocks from next backup. + */ + if (tmp_file->write_size == 0) + continue; + + /* + * At this point we are sure, that something is going to be copied + * Open source file. + */ + join_path_components(from_root, backup->root_dir, DATABASE_DIR); + join_path_components(from_fullpath, from_root, tmp_file->rel_path); + + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open backup file \"%s\": %s", from_fullpath, + strerror(errno)); + + /* set stdio buffering for input data file */ + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + /* get headers for this file */ + if (use_headers && tmp_file->n_headers > 0) + headers = get_data_file_headers(&(backup->hdr_map), tmp_file, + parse_program_version(backup->program_version), + true); + + if (use_headers && !headers && tmp_file->n_headers > 0) + elog(ERROR, "Failed to get page headers for file \"%s\"", from_fullpath); + + /* + * Restore the file. + * Datafiles are backed up block by block and every block + * have BackupPageHeader with meta information, so we cannot just + * copy the file from backup. + */ + total_write_len += restore_data_file_internal(in, out, tmp_file, + parse_program_version(backup->program_version), + from_fullpath, to_fullpath, dest_file->n_blocks, + use_bitmap ? &(dest_file)->pagemap : NULL, + checksum_map, backup->checksum_version, + /* shiftmap can be used only if backup state precedes the shift */ + backup->stop_lsn <= shift_lsn ? lsn_map : NULL, + headers); + + if (fclose(in) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", from_fullpath, + strerror(errno)); + + pg_free(headers); + +// datapagemap_print_debug(&(dest_file)->pagemap); + } + pg_free(in_buf); + + return total_write_len; +} + +/* Restore block from "in" file to "out" file. + * If "nblocks" is greater than zero, then skip restoring blocks, + * whose position if greater than "nblocks". + * If map is NULL, then page bitmap cannot be used for restore optimization + * Page bitmap optimize restore of incremental chains, consisting of more than one + * backup. We restoring from newest to oldest and page, once restored, marked in map. + * When the same page, but in older backup, encountered, we check the map, if it is + * marked as already restored, then page is skipped. + */ +size_t +restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint32 backup_version, + const char *from_fullpath, const char *to_fullpath, int nblocks, + datapagemap_t *map, PageState *checksum_map, int checksum_version, + datapagemap_t *lsn_map, BackupPageHeader2 *headers) +{ + BlockNumber blknum = 0; + int n_hdr = -1; + size_t write_len = 0; + off_t cur_pos_out = 0; + off_t cur_pos_in = 0; + + /* should not be possible */ + Assert(!(backup_version >= 20400 && file->n_headers <= 0)); + + /* + * We rely on stdio buffering of input and output. + * For buffering to be efficient, we try to minimize the + * number of lseek syscalls, because it forces buffer flush. + * For that, we track current write position in + * output file and issue fseek only when offset of block to be + * written not equal to current write position, which happens + * a lot when blocks from incremental backup are restored, + * but should never happen in case of blocks from FULL backup. + */ + if (fio_fseek(out, cur_pos_out) < 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, to_fullpath, strerror(errno)); + + for (;;) + { + off_t write_pos; + size_t len; + size_t read_len; + DataPage page; + int32 compressed_size = 0; + bool is_compressed = false; + + /* incremental restore vars */ + uint16 page_crc = 0; + XLogRecPtr page_lsn = InvalidXLogRecPtr; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during data file restore"); + + /* newer backups have headers in separate storage */ + if (headers) + { + n_hdr++; + if (n_hdr >= file->n_headers) + break; + + blknum = headers[n_hdr].block; + page_lsn = headers[n_hdr].lsn; + page_crc = headers[n_hdr].checksum; + /* calculate payload size by comparing current and next page positions, + * page header is not included */ + compressed_size = headers[n_hdr+1].pos - headers[n_hdr].pos - sizeof(BackupPageHeader); + + Assert(compressed_size > 0); + Assert(compressed_size <= BLCKSZ); + + read_len = compressed_size + sizeof(BackupPageHeader); + } + else + { + /* We get into this function either when restoring old backup + * or when merging something. Align read_len only when restoring + * or merging old backups. + */ + if (get_page_header(in, from_fullpath, &(page).bph, NULL, false)) + { + cur_pos_in += sizeof(BackupPageHeader); + + /* backward compatibility kludge TODO: remove in 3.0 */ + blknum = page.bph.block; + compressed_size = page.bph.compressed_size; + + /* this has a potential to backfire when retrying merge of old backups, + * so we just forbid the retrying of failed merges between versions >= 2.4.0 and + * version < 2.4.0 + */ + if (backup_version >= 20400) + read_len = compressed_size; + else + /* For some unknown and possibly dump reason I/O operations + * in versions < 2.4.0 were always aligned to 8 bytes. + * Now we have to deal with backward compatibility. + */ + read_len = MAXALIGN(compressed_size); + + } + else + break; + } + + /* + * Backward compatibility kludge: in the good old days + * n_blocks attribute was available only in DELTA backups. + * File truncate in PAGE and PTRACK happened on the fly when + * special value PageIsTruncated is encountered. + * It was inefficient. + * + * Nowadays every backup type has n_blocks, so instead of + * writing and then truncating redundant data, writing + * is not happening in the first place. + * TODO: remove in 3.0.0 + */ + if (compressed_size == PageIsTruncated) + { + /* + * Block header contains information that this block was truncated. + * We need to truncate file to this length. + */ + + elog(VERBOSE, "Truncate file \"%s\" to block %u", to_fullpath, blknum); + + /* To correctly truncate file, we must first flush STDIO buffers */ + if (fio_fflush(out) != 0) + elog(ERROR, "Cannot flush file \"%s\": %s", to_fullpath, strerror(errno)); + + /* Set position to the start of file */ + if (fio_fseek(out, 0) < 0) + elog(ERROR, "Cannot seek to the start of file \"%s\": %s", to_fullpath, strerror(errno)); + + if (fio_ftruncate(out, blknum * BLCKSZ) != 0) + elog(ERROR, "Cannot truncate file \"%s\": %s", to_fullpath, strerror(errno)); + + break; + } + + Assert(compressed_size > 0); + Assert(compressed_size <= BLCKSZ); + + /* no point in writing redundant data */ + if (nblocks > 0 && blknum >= nblocks) + break; + + if (compressed_size > BLCKSZ) + elog(ERROR, "Size of a blknum %i exceed BLCKSZ: %i", blknum, compressed_size); + + /* Incremental restore in LSN mode */ + if (map && lsn_map && datapagemap_is_set(lsn_map, blknum)) + datapagemap_add(map, blknum); + + if (map && checksum_map && checksum_map[blknum].checksum != 0) + { + //elog(INFO, "HDR CRC: %u, MAP CRC: %u", page_crc, checksum_map[blknum].checksum); + /* + * The heart of incremental restore in CHECKSUM mode + * If page in backup has the same checksum and lsn as + * page in backup, then page can be skipped. + */ + if (page_crc == checksum_map[blknum].checksum && + page_lsn == checksum_map[blknum].lsn) + { + datapagemap_add(map, blknum); + } + } + + /* if this page is marked as already restored, then skip it */ + if (map && datapagemap_is_set(map, blknum)) + { + /* Backward compatibility kludge TODO: remove in 3.0 + * go to the next page. + */ + if (!headers && fseek(in, read_len, SEEK_CUR) != 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, from_fullpath, strerror(errno)); + continue; + } + + if (headers && + cur_pos_in != headers[n_hdr].pos) + { + if (fseek(in, headers[n_hdr].pos, SEEK_SET) != 0) + elog(ERROR, "Cannot seek to offset %u of \"%s\": %s", + headers[n_hdr].pos, from_fullpath, strerror(errno)); + + cur_pos_in = headers[n_hdr].pos; + } + + /* read a page from file */ + if (headers) + len = fread(&page, 1, read_len, in); + else + len = fread(page.data, 1, read_len, in); + + if (len != read_len) + elog(ERROR, "Cannot read block %u file \"%s\": %s", + blknum, from_fullpath, strerror(errno)); + + cur_pos_in += read_len; + + /* + * if page size is smaller than BLCKSZ, decompress the page. + * BUGFIX for versions < 2.0.23: if page size is equal to BLCKSZ. + * we have to check, whether it is compressed or not using + * page_may_be_compressed() function. + */ + if (compressed_size != BLCKSZ + || page_may_be_compressed(page.data, file->compress_alg, + backup_version)) + { + is_compressed = true; + } + + /* + * Seek and write the restored page. + * When restoring file from FULL backup, pages are written sequentially, + * so there is no need to issue fseek for every page. + */ + write_pos = blknum * BLCKSZ; + + if (cur_pos_out != write_pos) + { + if (fio_fseek(out, write_pos) < 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, to_fullpath, strerror(errno)); + + cur_pos_out = write_pos; + } + + /* If page is compressed and restore is in remote mode, send compressed + * page to the remote side. + */ + if (is_compressed) + { + ssize_t rc; + rc = fio_fwrite_compressed(out, page.data, compressed_size, file->compress_alg); + + if (!fio_is_remote_file(out) && rc != BLCKSZ) + elog(ERROR, "Cannot write block %u of \"%s\": %s, size: %u", + blknum, to_fullpath, strerror(errno), compressed_size); + } + else + { + if (fio_fwrite(out, page.data, BLCKSZ) != BLCKSZ) + elog(ERROR, "Cannot write block %u of \"%s\": %s", + blknum, to_fullpath, strerror(errno)); + } + + write_len += BLCKSZ; + cur_pos_out += BLCKSZ; /* update current write position */ + + /* Mark page as restored to avoid reading this page when restoring parent backups */ + if (map) + datapagemap_add(map, blknum); + } + + //elog(VERBOSE, "Copied file \"%s\": %lu bytes", from_fullpath, write_len); + return write_len; +} + +/* + * Copy file to backup. + * We do not apply compression to these files, because + * it is either small control file or already compressed cfs file. + */ +void +restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file, + const char *from_fullpath, const char *to_fullpath) +{ + size_t read_len = 0; + char *buf = (char *)pgut_malloc(STDIO_BUFSIZE); /* 64kB buffer */ + + /* copy content */ + for (;;) + { + read_len = 0; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during nonedata file restore"); + + read_len = fread(buf, 1, STDIO_BUFSIZE, in); + + if (ferror(in)) + elog(ERROR, "Cannot read backup file \"%s\": %s", + from_fullpath, strerror(errno)); + + if (read_len > 0) + { + if (fio_fwrite(out, buf, read_len) != read_len) + elog(ERROR, "Cannot write to \"%s\": %s", to_fullpath, + strerror(errno)); + } + + if (feof(in)) + break; + } + + pg_free(buf); + + //elog(VERBOSE, "Copied file \"%s\": %lu bytes", from_fullpath, file->write_size); +} + +size_t +restore_non_data_file(parray *parent_chain, pgBackup *dest_backup, + pgFile *dest_file, FILE *out, const char *to_fullpath, + bool already_exists) +{ + char from_root[MAXPGPATH]; + char from_fullpath[MAXPGPATH]; + FILE *in = NULL; + + pgFile *tmp_file = NULL; + pgBackup *tmp_backup = NULL; + + /* Check if full copy of destination file is available in destination backup */ + if (dest_file->write_size > 0) + { + tmp_file = dest_file; + tmp_backup = dest_backup; + } + else + { + /* + * Iterate over parent chain starting from direct parent of destination + * backup to oldest backup in chain, and look for the first + * full copy of destination file. + * Full copy is latest possible destination file with size equal or + * greater than zero. + */ + tmp_backup = dest_backup->parent_backup_link; + while (tmp_backup) + { + pgFile **res_file = NULL; + + /* lookup file in intermediate backup */ + res_file = (pgFile_t **)parray_bsearch(tmp_backup->files, dest_file, pgFileCompareRelPathWithExternal); + tmp_file = (res_file) ? *res_file : NULL; + + /* + * It should not be possible not to find destination file in intermediate + * backup, without encountering full copy first. + */ + if (!tmp_file) + { + elog(ERROR, "Failed to locate nonedata file \"%s\" in backup %s", + dest_file->rel_path, base36enc(tmp_backup->start_time)); + continue; + } + + /* Full copy is found and it is null sized, nothing to do here */ + if (tmp_file->write_size == 0) + { + /* In case of incremental restore truncate file just to be safe */ + if (already_exists && fio_ftruncate(out, 0)) + elog(ERROR, "Cannot truncate file \"%s\": %s", + to_fullpath, strerror(errno)); + return 0; + } + + /* Full copy is found */ + if (tmp_file->write_size > 0) + break; + + tmp_backup = tmp_backup->parent_backup_link; + } + } + + /* sanity */ + if (!tmp_backup) + elog(ERROR, "Failed to locate a backup containing full copy of nonedata file \"%s\"", + to_fullpath); + + if (!tmp_file) + elog(ERROR, "Failed to locate a full copy of nonedata file \"%s\"", to_fullpath); + + if (tmp_file->write_size <= 0) + elog(ERROR, "Full copy of nonedata file has invalid size: %li. " + "Metadata corruption in backup %s in file: \"%s\"", + tmp_file->write_size, base36enc(tmp_backup->start_time), + to_fullpath); + + /* incremental restore */ + if (already_exists) + { + /* compare checksums of already existing file and backup file */ + pg_crc32 file_crc = fio_get_crc32(to_fullpath, FIO_DB_HOST, false); + + if (file_crc == tmp_file->crc) + { + elog(VERBOSE, "Already existing nonedata file \"%s\" has the same checksum, skip restore", + to_fullpath); + return 0; + } + + /* Checksum mismatch, truncate file and overwrite it */ + if (fio_ftruncate(out, 0)) + elog(ERROR, "Cannot truncate file \"%s\": %s", + to_fullpath, strerror(errno)); + } + + if (tmp_file->external_dir_num == 0) + join_path_components(from_root, tmp_backup->root_dir, DATABASE_DIR); + else + { + char external_prefix[MAXPGPATH]; + + join_path_components(external_prefix, tmp_backup->root_dir, EXTERNAL_DIR); + makeExternalDirPathByNum(from_root, external_prefix, tmp_file->external_dir_num); + } + + join_path_components(from_fullpath, from_root, dest_file->rel_path); + + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open backup file \"%s\": %s", from_fullpath, + strerror(errno)); + + /* disable stdio buffering for nonedata files */ + setvbuf(in, NULL, _IONBF, BUFSIZ); + + /* do actual work */ + restore_non_data_file_internal(in, out, tmp_file, from_fullpath, to_fullpath); + + if (fclose(in) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", from_fullpath, + strerror(errno)); + + return tmp_file->write_size; +} + +/* + * Copy file to backup. + * We do not apply compression to these files, because + * it is either small control file or already compressed cfs file. + * TODO: optimize remote copying + */ +void +backup_non_data_file_internal(const char *from_fullpath, + fio_location from_location, + const char *to_fullpath, pgFile *file, + bool missing_ok) +{ + FILE *in = NULL; + FILE *out = NULL; + ssize_t read_len = 0; + char *buf = NULL; + + INIT_FILE_CRC32(true, file->crc); + + /* reset size summary */ + file->read_size = 0; + file->write_size = 0; + file->uncompressed_size = 0; + + /* open backup file for write */ + out = fopen(to_fullpath, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open destination file \"%s\": %s", + to_fullpath, strerror(errno)); + + /* update file permission */ + if (chmod(to_fullpath, file->mode) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); + + /* backup remote file */ + if (fio_is_remote(FIO_DB_HOST)) + { + char *errmsg = NULL; + int rc = fio_send_file(from_fullpath, to_fullpath, out, file, &errmsg); + + /* handle errors */ + if (rc == FILE_MISSING) + { + /* maybe deleted, it's not error in case of backup */ + if (missing_ok) + { + elog(LOG, "File \"%s\" is not found", from_fullpath); + file->write_size = FILE_NOT_FOUND; + goto cleanup; + } + else + elog(ERROR, "File \"%s\" is not found", from_fullpath); + } + else if (rc == WRITE_FAILED) + elog(ERROR, "Cannot write to \"%s\": %s", to_fullpath, strerror(errno)); + else if (rc != SEND_OK) + { + if (errmsg) + elog(ERROR, "%s", errmsg); + else + elog(ERROR, "Cannot access remote file \"%s\"", from_fullpath); + } + + pg_free(errmsg); + } + /* backup local file */ + else + { + /* open source file for read */ + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + { + /* maybe deleted, it's not error in case of backup */ + if (errno == ENOENT) + { + if (missing_ok) + { + elog(LOG, "File \"%s\" is not found", from_fullpath); + file->write_size = FILE_NOT_FOUND; + goto cleanup; + } + else + elog(ERROR, "File \"%s\" is not found", from_fullpath); + } + + elog(ERROR, "Cannot open file \"%s\": %s", from_fullpath, + strerror(errno)); + } + + /* disable stdio buffering for local input/output files to avoid triple buffering */ + setvbuf(in, NULL, _IONBF, BUFSIZ); + setvbuf(out, NULL, _IONBF, BUFSIZ); + + /* allocate 64kB buffer */ + buf = (char *)pgut_malloc(CHUNK_SIZE); + + /* copy content and calc CRC */ + for (;;) + { + read_len = fread(buf, 1, CHUNK_SIZE, in); + + if (ferror(in)) + elog(ERROR, "Cannot read from file \"%s\": %s", + from_fullpath, strerror(errno)); + + if (read_len > 0) + { + if (fwrite(buf, 1, read_len, out) != read_len) + elog(ERROR, "Cannot write to file \"%s\": %s", to_fullpath, + strerror(errno)); + + /* update CRC */ + COMP_FILE_CRC32(true, file->crc, buf, read_len); + file->read_size += read_len; + } + + if (feof(in)) + break; + } + } + + file->write_size = (int64) file->read_size; + + if (file->write_size > 0) + file->uncompressed_size = file->write_size; + +cleanup: + /* finish CRC calculation and store into pgFile */ + FIN_FILE_CRC32(true, file->crc); + + if (in && fclose(in)) + elog(ERROR, "Cannot close the file \"%s\": %s", from_fullpath, strerror(errno)); + + if (out && fclose(out)) + elog(ERROR, "Cannot close the file \"%s\": %s", to_fullpath, strerror(errno)); + + pg_free(buf); +} + +/* + * Create empty file, used for partial restore + */ +bool +create_empty_file(fio_location from_location, const char *to_root, + fio_location to_location, pgFile *file) +{ + char to_path[MAXPGPATH]; + FILE *out; + + /* open file for write */ + join_path_components(to_path, to_root, file->rel_path); + out = fio_fopen(to_path, PG_BINARY_W, to_location); + + if (out == NULL) + elog(ERROR, "Cannot open destination file \"%s\": %s", + to_path, strerror(errno)); + + /* update file permission */ + if (fio_chmod(to_path, file->mode, to_location) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_path, + strerror(errno)); + + if (fio_fclose(out)) + elog(ERROR, "Cannot close \"%s\": %s", to_path, strerror(errno)); + + return true; +} + +/* + * Validate given page. + * This function is expected to be executed multiple times, + * so avoid using elog within it. + * lsn from page is assigned to page_lsn pointer. + * TODO: switch to enum for return codes. + */ +int +validate_one_page(Page page, BlockNumber absolute_blkno, + XLogRecPtr stop_lsn, PageState *page_st, + uint32 checksum_version) +{ + page_st->lsn = InvalidXLogRecPtr; + page_st->checksum = 0; + + /* new level of paranoia */ + if (page == NULL) + return PAGE_IS_NOT_FOUND; + + /* check that page header is ok */ + if (!parse_page(page, &(page_st)->lsn)) + { + int i; + /* Check if the page is zeroed. */ + for (i = 0; i < BLCKSZ && page[i] == 0; i++); + + /* Page is zeroed. No need to verify checksums */ + if (i == BLCKSZ) + return PAGE_IS_ZEROED; + + /* Page does not looking good */ + return PAGE_HEADER_IS_INVALID; + } + + /* Verify checksum */ + page_st->checksum = 0;//pg_checksum_page(page, absolute_blkno); + + if (checksum_version) + { + /* Checksums are enabled, so check them. */ + if (page_st->checksum != ((PageHeader) page)->pd_checksum) + return PAGE_CHECKSUM_MISMATCH; + } + + /* At this point page header is sane, if checksums are enabled - the`re ok. + * Check that page is not from future. + * Note, this check should be used only by validate command. + */ + if (stop_lsn > 0) + { + /* Get lsn from page header. Ensure that page is from our time. */ + if (page_st->lsn > stop_lsn) + return PAGE_LSN_FROM_FUTURE; + } + + return PAGE_IS_VALID; +} + +/* + * Valiate pages of datafile in PGDATA one by one. + * + * returns true if the file is valid + * also returns true if the file was not found + */ +bool +check_data_file(ConnectionArgs *arguments, pgFile *file, + const char *from_fullpath, uint32 checksum_version) +{ + FILE *in; + BlockNumber blknum = 0; + BlockNumber nblocks = 0; + int page_state; + char curr_page[BLCKSZ]; + bool is_valid = true; + + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + { + /* + * If file is not found, this is not en error. + * It could have been deleted by concurrent postgres transaction. + */ + if (errno == ENOENT) + { + elog(LOG, "File \"%s\" is not found", from_fullpath); + return true; + } + + elog(WARNING, "Cannot open file \"%s\": %s", + from_fullpath, strerror(errno)); + return false; + } + + if (file->size % BLCKSZ != 0) + elog(WARNING, "File: \"%s\", invalid file size %zu", from_fullpath, file->size); + + /* + * Compute expected number of blocks in the file. + * NOTE This is a normal situation, if the file size has changed + * since the moment we computed it. + */ + nblocks = file->size/BLCKSZ; + + for (blknum = 0; blknum < nblocks; blknum++) + { + PageState page_st; + page_state = prepare_page(NULL, file, InvalidXLogRecPtr, + blknum, in, BACKUP_MODE_FULL, + curr_page, false, checksum_version, + from_fullpath, &page_st); + + if (page_state == PageIsTruncated) + break; + + if (page_state == PageIsCorrupted) + { + /* Page is corrupted, no need to elog about it, + * prepare_page() already done that + */ + is_valid = false; + continue; + } + } + + fclose(in); + return is_valid; +} + +/* Valiate pages of datafile in backup one by one */ +bool +validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version, HeaderMap *hdr_map) +{ + size_t read_len = 0; + bool is_valid = true; + FILE *in; + pg_crc32 crc; + bool use_crc32c = backup_version <= 20021 || backup_version >= 20025; + BackupPageHeader2 *headers = NULL; + int n_hdr = -1; + off_t cur_pos_in = 0; + + //elog(VERBOSE, "Validate relation blocks for file \"%s\"", fullpath); + + /* should not be possible */ + Assert(!(backup_version >= 20400 && file->n_headers <= 0)); + + in = fopen(fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open file \"%s\": %s", + fullpath, strerror(errno)); + + headers = get_data_file_headers(hdr_map, file, backup_version, false); + + if (!headers && file->n_headers > 0) + { + elog(WARNING, "Cannot get page headers for file \"%s\"", fullpath); + return false; + } + + /* calc CRC of backup file */ + INIT_FILE_CRC32(use_crc32c, crc); + + /* read and validate pages one by one */ + while (true) + { + int rc = 0; + size_t len = 0; + DataPage compressed_page; /* used as read buffer */ + int compressed_size = 0; + DataPage page; + BlockNumber blknum = 0; + PageState page_st; + + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during data file validation"); + + /* newer backups have page headers in separate storage */ + if (headers) + { + n_hdr++; + if (n_hdr >= file->n_headers) + break; + + blknum = headers[n_hdr].block; + /* calculate payload size by comparing current and next page positions, + * page header is not included. + */ + compressed_size = headers[n_hdr+1].pos - headers[n_hdr].pos - sizeof(BackupPageHeader); + + Assert(compressed_size > 0); + Assert(compressed_size <= BLCKSZ); + + read_len = sizeof(BackupPageHeader) + compressed_size; + + if (cur_pos_in != headers[n_hdr].pos) + { + if (fio_fseek(in, headers[n_hdr].pos) < 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, fullpath, strerror(errno)); + else + elog(INFO, "Seek to %u", headers[n_hdr].pos); + + cur_pos_in = headers[n_hdr].pos; + } + } + /* old backups rely on header located directly in data file */ + else + { + if (get_page_header(in, fullpath, &(compressed_page).bph, &crc, use_crc32c)) + { + /* Backward compatibility kludge, TODO: remove in 3.0 + * for some reason we padded compressed pages in old versions + */ + blknum = compressed_page.bph.block; + compressed_size = compressed_page.bph.compressed_size; + read_len = MAXALIGN(compressed_size); + } + else + break; + } + + /* backward compatibility kludge TODO: remove in 3.0 */ + if (compressed_size == PageIsTruncated) + { + elog(INFO, "Block %u of \"%s\" is truncated", + blknum, fullpath); + continue; + } + + Assert(compressed_size <= BLCKSZ); + Assert(compressed_size > 0); + + if (headers) + len = fread(&compressed_page, 1, read_len, in); + else + len = fread(compressed_page.data, 1, read_len, in); + + if (len != read_len) + { + elog(WARNING, "Cannot read block %u file \"%s\": %s", + blknum, fullpath, strerror(errno)); + return false; + } + + /* update current position */ + cur_pos_in += read_len; + + if (headers) + COMP_FILE_CRC32(use_crc32c, crc, &compressed_page, read_len); + else + COMP_FILE_CRC32(use_crc32c, crc, compressed_page.data, read_len); + + if (compressed_size != BLCKSZ + || page_may_be_compressed(compressed_page.data, file->compress_alg, + backup_version)) + { + int32 uncompressed_size = 0; + const char *errormsg = NULL; + + uncompressed_size = do_decompress(page.data, BLCKSZ, + compressed_page.data, + compressed_size, + file->compress_alg, + &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + { + elog(WARNING, "An error occured during decompressing block %u of file \"%s\": %s", + blknum, fullpath, errormsg); + return false; + } + + if (uncompressed_size != BLCKSZ) + { + if (compressed_size == BLCKSZ) + { + is_valid = false; + continue; + } + elog(WARNING, "Page %u of file \"%s\" uncompressed to %d bytes. != BLCKSZ", + blknum, fullpath, uncompressed_size); + return false; + } + + rc = validate_one_page(page.data, + file->segno * RELSEG_SIZE + blknum, + stop_lsn, &page_st, checksum_version); + } + else + rc = validate_one_page(compressed_page.data, + file->segno * RELSEG_SIZE + blknum, + stop_lsn, &page_st, checksum_version); + + switch (rc) + { + case PAGE_IS_NOT_FOUND: + elog(LOG, "File \"%s\", block %u, page is NULL", file->rel_path, blknum); + break; + case PAGE_IS_ZEROED: + elog(LOG, "File: %s blknum %u, empty zeroed page", file->rel_path, blknum); + break; + case PAGE_HEADER_IS_INVALID: + elog(WARNING, "Page header is looking insane: %s, block %i", file->rel_path, blknum); + is_valid = false; + break; + case PAGE_CHECKSUM_MISMATCH: + elog(WARNING, "File: %s blknum %u have wrong checksum: %u", file->rel_path, blknum, page_st.checksum); + is_valid = false; + break; + case PAGE_LSN_FROM_FUTURE: + elog(WARNING, "File: %s, block %u, checksum is %s. " + "Page is from future: pageLSN %X/%X stopLSN %X/%X", + file->rel_path, blknum, + checksum_version ? "correct" : "not enabled", + (uint32) (page_st.lsn >> 32), (uint32) page_st.lsn, + (uint32) (stop_lsn >> 32), (uint32) stop_lsn); + break; + } + } + + FIN_FILE_CRC32(use_crc32c, crc); + fclose(in); + + if (crc != file->crc) + { + elog(WARNING, "Invalid CRC of backup file \"%s\": %X. Expected %X", + fullpath, crc, file->crc); + //is_valid = false; + } + + pg_free(headers); + + return is_valid; +} + +/* read local data file and construct map with block checksums */ +PageState* +get_checksum_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr dest_stop_lsn, BlockNumber segmentno) +{ + PageState *checksum_map = NULL; + FILE *in = NULL; + BlockNumber blknum = 0; + char read_buffer[BLCKSZ]; + char in_buf[STDIO_BUFSIZE]; + + /* open file */ + in = fopen(fullpath, "r+b"); + if (!in) + elog(ERROR, "Cannot open source file \"%s\": %s", fullpath, strerror(errno)); + + /* truncate up to blocks */ + if (ftruncate(fileno(in), n_blocks * BLCKSZ) != 0) + elog(ERROR, "Cannot truncate file to blknum %u \"%s\": %s", + n_blocks, fullpath, strerror(errno)); + + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + /* initialize array of checksums */ + checksum_map = (PageState *)pgut_malloc(n_blocks * sizeof(PageState)); + memset(checksum_map, 0, n_blocks * sizeof(PageState)); + + for (blknum = 0; blknum < n_blocks; blknum++) + { + size_t read_len = fread(read_buffer, 1, BLCKSZ, in); + PageState page_st; + + /* report error */ + if (ferror(in)) + elog(ERROR, "Cannot read block %u of \"%s\": %s", + blknum, fullpath, strerror(errno)); + + if (read_len == BLCKSZ) + { + int rc = validate_one_page(read_buffer, segmentno + blknum, + dest_stop_lsn, &page_st, + checksum_version); + + if (rc == PAGE_IS_VALID) + { +// if (checksum_version) +// checksum_map[blknum].checksum = ((PageHeader) read_buffer)->pd_checksum; +// else +// checksum_map[blknum].checksum = page_st.checksum; + checksum_map[blknum].checksum = page_st.checksum; + checksum_map[blknum].lsn = page_st.lsn; + } + } + else + elog(ERROR, "Failed to read blknum %u from file \"%s\"", blknum, fullpath); + + if (feof(in)) + break; + + if (interrupted) + elog(ERROR, "Interrupted during page reading"); + } + + if (in) + fclose(in); + + return checksum_map; +} + +/* return bitmap of valid blocks, bitmap is empty, then NULL is returned */ +datapagemap_t * +get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr shift_lsn, BlockNumber segmentno) +{ + FILE *in = NULL; + BlockNumber blknum = 0; + char read_buffer[BLCKSZ]; + char in_buf[STDIO_BUFSIZE]; + datapagemap_t *lsn_map = NULL; + + Assert(shift_lsn > 0); + + /* open file */ + in = fopen(fullpath, "r+b"); + if (!in) + elog(ERROR, "Cannot open source file \"%s\": %s", fullpath, strerror(errno)); + + /* truncate up to blocks */ + if (ftruncate(fileno(in), n_blocks * BLCKSZ) != 0) + elog(ERROR, "Cannot truncate file to blknum %u \"%s\": %s", + n_blocks, fullpath, strerror(errno)); + + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + lsn_map = (datapagemap_t *)pgut_malloc(sizeof(datapagemap_t)); + memset(lsn_map, 0, sizeof(datapagemap_t)); + + for (blknum = 0; blknum < n_blocks; blknum++) + { + size_t read_len = fread(read_buffer, 1, BLCKSZ, in); + PageState page_st; + + /* report error */ + if (ferror(in)) + elog(ERROR, "Cannot read block %u of \"%s\": %s", + blknum, fullpath, strerror(errno)); + + if (read_len == BLCKSZ) + { + int rc = validate_one_page(read_buffer, segmentno + blknum, + shift_lsn, &page_st, checksum_version); + + if (rc == PAGE_IS_VALID) + datapagemap_add(lsn_map, blknum); + } + else + elog(ERROR, "Cannot read block %u from file \"%s\": %s", + blknum, fullpath, strerror(errno)); + + if (feof(in)) + break; + + if (interrupted) + elog(ERROR, "Interrupted during page reading"); + } + + if (in) + fclose(in); + + if (lsn_map->bitmapsize == 0) + { + pg_free(lsn_map); + lsn_map = NULL; + } + + return lsn_map; +} + +/* Every page in data file contains BackupPageHeader, extract it */ +bool +get_page_header(FILE *in, const char *fullpath, BackupPageHeader* bph, + pg_crc32 *crc, bool use_crc32c) +{ + /* read BackupPageHeader */ + size_t read_len = fread(bph, 1, sizeof(BackupPageHeader), in); + + if (ferror(in)) + elog(ERROR, "Cannot read file \"%s\": %s", + fullpath, strerror(errno)); + + if (read_len != sizeof(BackupPageHeader)) + { + if (read_len == 0 && feof(in)) + return false; /* EOF found */ + else if (read_len != 0 && feof(in)) + elog(ERROR, + "Odd size page found at offset %lu of \"%s\"", + ftell(in), fullpath); + else + elog(ERROR, "Cannot read header at offset %lu of \"%s\": %s", + ftell(in), fullpath, strerror(errno)); + } + + /* In older versions < 2.4.0, when crc for file was calculated, header was + * not included in crc calculations. Now it is. And now we have + * the problem of backward compatibility for backups of old versions + */ + if (crc) + COMP_FILE_CRC32(use_crc32c, *crc, bph, read_len); + + if (bph->block == 0 && bph->compressed_size == 0) + elog(ERROR, "Empty block in file \"%s\"", fullpath); + + Assert(bph->compressed_size != 0); + return true; +} + +/* Open local backup file for writing, set permissions and buffering */ +FILE* +open_local_file_rw(const char *to_fullpath, char **out_buf, uint32 buf_size) +{ + FILE *out = NULL; + /* open backup file for write */ + out = fopen(to_fullpath, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open backup file \"%s\": %s", + to_fullpath, strerror(errno)); + + /* update file permission */ + if (chmod(to_fullpath, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); + + /* enable stdio buffering for output file */ + *out_buf = (char *)pgut_malloc(buf_size); + setvbuf(out, *out_buf, _IOFBF, buf_size); + + return out; +} + +/* backup local file */ +int +send_pages(ConnectionArgs* conn_arg, const char *to_fullpath, const char *from_fullpath, + pgFile *file, XLogRecPtr prev_backup_start_lsn, CompressAlg calg, int clevel, + uint32 checksum_version, bool use_pagemap, BackupPageHeader2 **headers, + BackupMode backup_mode) +{ + FILE *in = NULL; + FILE *out = NULL; + int hdr_num = -1; + off_t cur_pos_out = 0; + char curr_page[BLCKSZ]; + int n_blocks_read = 0; + BlockNumber blknum = 0; + datapagemap_iterator_t *iter = NULL; + int compressed_size = 0; + + /* stdio buffers */ + char *in_buf = NULL; + char *out_buf = NULL; + + /* open source file for read */ + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + { + /* + * If file is not found, this is not en error. + * It could have been deleted by concurrent postgres transaction. + */ + if (errno == ENOENT) + return FILE_MISSING; + + elog(ERROR, "Cannot open file \"%s\": %s", from_fullpath, strerror(errno)); + } + + /* + * Enable stdio buffering for local input file, + * unless the pagemap is involved, which + * imply a lot of random access. + */ + + if (use_pagemap) + { + iter = datapagemap_iterate(&file->pagemap); + datapagemap_next(iter, &blknum); /* set first block */ + + setvbuf(in, NULL, _IONBF, BUFSIZ); + } + else + { + in_buf = (char *)pgut_malloc(STDIO_BUFSIZE); + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + } + + while (blknum < file->n_blocks) + { + PageState page_st; + int rc = prepare_page(conn_arg, file, prev_backup_start_lsn, + blknum, in, backup_mode, curr_page, + true, checksum_version, + from_fullpath, &page_st); + if (rc == PageIsTruncated) + break; + + else if (rc == PageIsOk) + { + /* lazily open backup file (useful for s3) */ + if (!out) + out = open_local_file_rw(to_fullpath, &out_buf, STDIO_BUFSIZE); + + hdr_num++; + + if (!*headers) + *headers = (BackupPageHeader2 *) pgut_malloc(sizeof(BackupPageHeader2)); + else + *headers = (BackupPageHeader2 *) pgut_realloc(*headers, (hdr_num+1) * sizeof(BackupPageHeader2)); + + (*headers)[hdr_num].block = blknum; + (*headers)[hdr_num].pos = cur_pos_out; + (*headers)[hdr_num].lsn = page_st.lsn; + (*headers)[hdr_num].checksum = page_st.checksum; + + compressed_size = compress_and_backup_page(file, blknum, in, out, &(file->crc), + rc, curr_page, calg, clevel, + from_fullpath, to_fullpath); + cur_pos_out += compressed_size + sizeof(BackupPageHeader); + } + + n_blocks_read++; + + /* next block */ + if (use_pagemap) + { + /* exit if pagemap is exhausted */ + if (!datapagemap_next(iter, &blknum)) + break; + } + else + blknum++; + } + + /* + * Add dummy header, so we can later extract the length of last header + * as difference between their offsets. + */ + if (*headers) + { + file->n_headers = hdr_num +1; + *headers = (BackupPageHeader2 *) pgut_realloc(*headers, (hdr_num+2) * sizeof(BackupPageHeader2)); + (*headers)[hdr_num+1].pos = cur_pos_out; + } + + /* cleanup */ + if (in && fclose(in)) + elog(ERROR, "Cannot close the source file \"%s\": %s", + to_fullpath, strerror(errno)); + + /* close local output file */ + if (out && fclose(out)) + elog(ERROR, "Cannot close the backup file \"%s\": %s", + to_fullpath, strerror(errno)); + + pg_free(iter); + pg_free(in_buf); + pg_free(out_buf); + + return n_blocks_read; +} + +/* + * Attempt to open header file, read content and return as + * array of headers. + * TODO: some access optimizations would be great here: + * less fseeks, buffering, descriptor sharing, etc. + */ +BackupPageHeader2* +get_data_file_headers(HeaderMap *hdr_map, pgFile *file, uint32 backup_version, bool strict) +{ + bool success = false; + FILE *in = NULL; + size_t read_len = 0; + pg_crc32 hdr_crc; + BackupPageHeader2 *headers = NULL; + /* header decompression */ + int z_len = 0; + char *zheaders = NULL; + const char *errormsg = NULL; + + if (backup_version < 20400) + return NULL; + + if (file->n_headers <= 0) + return NULL; + + /* TODO: consider to make this descriptor thread-specific */ + in = fopen(hdr_map->path, PG_BINARY_R); + + if (!in) + { + elog(strict ? ERROR : WARNING, "Cannot open header file \"%s\": %s", hdr_map->path, strerror(errno)); + return NULL; + } + /* disable buffering for header file */ + setvbuf(in, NULL, _IONBF, BUFSIZ); + + if (fseek(in, file->hdr_off, SEEK_SET)) + { + elog(strict ? ERROR : WARNING, "Cannot seek to position %lu in page header map \"%s\": %s", + file->hdr_off, hdr_map->path, strerror(errno)); + goto cleanup; + } + + /* + * The actual number of headers in header file is n+1, last one is a dummy header, + * used for calculation of read_len for actual last header. + */ + read_len = (file->n_headers+1) * sizeof(BackupPageHeader2); + + /* allocate memory for compressed headers */ + zheaders = (char *)pgut_malloc(file->hdr_size); + memset(zheaders, 0, file->hdr_size); + + if (fread(zheaders, 1, file->hdr_size, in) != file->hdr_size) + { + elog(strict ? ERROR : WARNING, "Cannot read header file at offset: %li len: %i \"%s\": %s", + file->hdr_off, file->hdr_size, hdr_map->path, strerror(errno)); + goto cleanup; + } + + /* allocate memory for uncompressed headers */ + headers = (BackupPageHeader2 *)pgut_malloc(read_len); + memset(headers, 0, read_len); + + z_len = do_decompress(headers, read_len, zheaders, file->hdr_size, + ZLIB_COMPRESS, &errormsg); + if (z_len <= 0) + { + if (errormsg) + elog(strict ? ERROR : WARNING, "An error occured during metadata decompression for file \"%s\": %s", + file->rel_path, errormsg); + else + elog(strict ? ERROR : WARNING, "An error occured during metadata decompression for file \"%s\": %i", + file->rel_path, z_len); + + goto cleanup; + } + + /* validate checksum */ + INIT_FILE_CRC32(true, hdr_crc); + COMP_FILE_CRC32(true, hdr_crc, headers, read_len); + FIN_FILE_CRC32(true, hdr_crc); + + if (hdr_crc != file->hdr_crc) + { + elog(strict ? ERROR : WARNING, "Header map for file \"%s\" crc mismatch \"%s\" " + "offset: %lu, len: %lu, current: %u, expected: %u", + file->rel_path, hdr_map->path, file->hdr_off, read_len, hdr_crc, file->hdr_crc); + goto cleanup; + } + + success = true; + +cleanup: + + pg_free(zheaders); + if (in && fclose(in)) + elog(ERROR, "Cannot close file \"%s\"", hdr_map->path); + + if (!success) + { + pg_free(headers); + headers = NULL; + } + + return headers; +} + +/* write headers of all blocks belonging to file to header map and + * save its offset and size */ +void +write_page_headers(BackupPageHeader2 *headers, pgFile *file, HeaderMap *hdr_map, bool is_merge) +{ + size_t read_len = 0; + char *map_path = NULL; + /* header compression */ + int z_len = 0; + char *zheaders = NULL; + const char *errormsg = NULL; + + if (file->n_headers <= 0) + return; + + /* when running merge we must write headers into temp map */ + map_path = (is_merge) ? hdr_map->path_tmp : hdr_map->path; + read_len = (file->n_headers+1) * sizeof(BackupPageHeader2); + + /* calculate checksums */ + INIT_FILE_CRC32(true, file->hdr_crc); + COMP_FILE_CRC32(true, file->hdr_crc, headers, read_len); + FIN_FILE_CRC32(true, file->hdr_crc); + + zheaders = (char *)pgut_malloc(read_len*2); + memset(zheaders, 0, read_len*2); + + /* compress headers */ + z_len = do_compress(zheaders, read_len*2, headers, + read_len, ZLIB_COMPRESS, 1, &errormsg); + + /* writing to header map must be serialized */ + pthread_lock(&(hdr_map->mutex)); /* what if we crash while trying to obtain mutex? */ + + if (!hdr_map->fp) + { + elog(LOG, "Creating page header map \"%s\"", map_path); + + hdr_map->fp = fopen(map_path, PG_BINARY_W); + if (hdr_map->fp == NULL) + elog(ERROR, "Cannot open header file \"%s\": %s", + map_path, strerror(errno)); + + /* enable buffering for header file */ + hdr_map->buf = (char *)pgut_malloc(LARGE_CHUNK_SIZE); + setvbuf(hdr_map->fp, hdr_map->buf, _IOFBF, LARGE_CHUNK_SIZE); + + /* update file permission */ + if (chmod(map_path, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", map_path, + strerror(errno)); + + file->hdr_off = 0; + } + else + file->hdr_off = hdr_map->offset; + + if (z_len <= 0) + { + if (errormsg) + elog(ERROR, "An error occured during compressing metadata for file \"%s\": %s", + file->rel_path, errormsg); + else + elog(ERROR, "An error occured during compressing metadata for file \"%s\": %i", + file->rel_path, z_len); + } + + //elog(VERBOSE, "Writing headers for file \"%s\" offset: %li, len: %i, crc: %u", + // file->rel_path, file->hdr_off, z_len, file->hdr_crc); + + if (fwrite(zheaders, 1, z_len, hdr_map->fp) != z_len) + elog(ERROR, "Cannot write to file \"%s\": %s", map_path, strerror(errno)); + + file->hdr_size = z_len; /* save the length of compressed headers */ + hdr_map->offset += z_len; /* update current offset in map */ + + /* End critical section */ + pthread_mutex_unlock(&(hdr_map->mutex)); + + pg_free(zheaders); +} + +void +init_header_map(pgBackup *backup) +{ + backup->hdr_map.fp = NULL; + backup->hdr_map.buf = NULL; + join_path_components(backup->hdr_map.path, backup->root_dir, HEADER_MAP); + join_path_components(backup->hdr_map.path_tmp, backup->root_dir, HEADER_MAP_TMP); + backup->hdr_map.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; +} + +void +cleanup_header_map(HeaderMap *hdr_map) +{ + /* cleanup descriptor */ + if (hdr_map->fp && fclose(hdr_map->fp)) + elog(ERROR, "Cannot close file \"%s\"", hdr_map->path); + hdr_map->fp = NULL; + hdr_map->offset = 0; + pg_free(hdr_map->buf); + hdr_map->buf = NULL; +} diff --git a/src/bin/pg_probackup/delete.cpp b/src/bin/pg_probackup/delete.cpp new file mode 100644 index 000000000..1d4327687 --- /dev/null +++ b/src/bin/pg_probackup/delete.cpp @@ -0,0 +1,1115 @@ +/*------------------------------------------------------------------------- + * + * delete.c: delete backup files. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include +#include +#include "common/fe_memutils.h" + +static void delete_walfiles_in_tli(XLogRecPtr keep_lsn, timelineInfo *tli, + uint32 xlog_seg_size, bool dry_run); +static void do_retention_internal(parray *backup_list, parray *to_keep_list, + parray *to_purge_list); +static void do_retention_merge(parray *backup_list, parray *to_keep_list, + parray *to_purge_list); +static void do_retention_purge(parray *to_keep_list, parray *to_purge_list); +static void do_retention_wal(bool dry_run); + +// TODO: more useful messages for dry run. +static bool backup_deleted = false; /* At least one backup was deleted */ +static bool backup_merged = false; /* At least one merge was enacted */ +static bool wal_deleted = false; /* At least one WAL segments was deleted */ + +void +do_delete(time_t backup_id) +{ + int i; + parray *backup_list, + *delete_list; + pgBackup *target_backup = NULL; + size_t size_to_delete = 0; + char size_to_delete_pretty[20]; + + /* Get complete list of backups */ + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + delete_list = parray_new(); + + /* Find backup to be deleted and make increment backups array to be deleted */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + if (backup->start_time == backup_id) + { + target_backup = backup; + break; + } + } + + /* sanity */ + if (!target_backup) + elog(ERROR, "Failed to find backup %s, cannot delete", base36enc(backup_id)); + + /* form delete list */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* check if backup is descendant of delete target */ + if (is_parent(target_backup->start_time, backup, true)) + { + parray_append(delete_list, backup); + + elog(LOG, "Backup %s %s be deleted", + base36enc(backup->start_time), dry_run? "can":"will"); + + size_to_delete += backup->data_bytes; + if (backup->stream) + size_to_delete += backup->wal_bytes; + } + } + + /* Report the resident size to delete */ + if (size_to_delete >= 0) + { + pretty_size(size_to_delete, size_to_delete_pretty, lengthof(size_to_delete_pretty)); + elog(INFO, "Resident data size to free by delete of backup %s : %s", + base36enc(target_backup->start_time), size_to_delete_pretty); + } + + if (!dry_run) + { + /* Lock marked for delete backups */ + catalog_lock_backup_list(delete_list, parray_num(delete_list) - 1, 0, false); + + /* Delete backups from the end of list */ + for (i = (int) parray_num(delete_list) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(delete_list, (size_t) i); + + if (interrupted) + elog(ERROR, "interrupted during delete backup"); + + delete_backup_files(backup); + } + } + + /* Clean WAL segments */ + if (delete_wal) + do_retention_wal(dry_run); + + /* cleanup */ + parray_free(delete_list); + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); +} + +/* + * Merge and purge backups by retention policy. Retention policy is configured by + * retention_redundancy and retention_window variables. + * + * Invalid backups handled in Oracle style, so invalid backups are ignored + * for the purpose of retention fulfillment, + * i.e. CORRUPT full backup do not taken in account when determine + * which FULL backup should be keeped for redundancy obligation(only valid do), + * but if invalid backup is not guarded by retention - it is removed + */ +void do_retention(void) +{ + parray *backup_list = NULL; + parray *to_keep_list = parray_new(); + parray *to_purge_list = parray_new(); + + bool retention_is_set = false; /* At least one retention policy is set */ + bool backup_list_is_empty = false; + + backup_deleted = false; + backup_merged = false; + + /* Get a complete list of backups. */ + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + if (parray_num(backup_list) == 0) + backup_list_is_empty = true; + + if (delete_expired || merge_expired) + { + if (instance_config.retention_redundancy > 0) + elog(LOG, "REDUNDANCY=%u", instance_config.retention_redundancy); + if (instance_config.retention_window > 0) + elog(LOG, "WINDOW=%u", instance_config.retention_window); + + if (instance_config.retention_redundancy == 0 && + instance_config.retention_window == 0) + { + /* Retention is disabled but we still can cleanup wal */ + elog(WARNING, "Retention policy is not set"); + if (!delete_wal) + return; + } + else + /* At least one retention policy is active */ + retention_is_set = true; + } + + if (retention_is_set && backup_list_is_empty) + elog(WARNING, "Backup list is empty, retention purge and merge are problematic"); + + /* Populate purge and keep lists, and show retention state messages */ + if (retention_is_set && !backup_list_is_empty) + do_retention_internal(backup_list, to_keep_list, to_purge_list); + + if (merge_expired && !dry_run && !backup_list_is_empty) + do_retention_merge(backup_list, to_keep_list, to_purge_list); + + if (delete_expired && !dry_run && !backup_list_is_empty) + do_retention_purge(to_keep_list, to_purge_list); + + /* TODO: some sort of dry run for delete_wal */ + if (delete_wal) + do_retention_wal(dry_run); + + /* TODO: consider dry-run flag */ + + if (!backup_merged) + elog(INFO, "There are no backups to merge by retention policy"); + + if (backup_deleted) + elog(INFO, "Purging finished"); + else + elog(INFO, "There are no backups to delete by retention policy"); + + if (!wal_deleted) + elog(INFO, "There is no WAL to purge by retention policy"); + + /* Cleanup */ + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); + parray_free(to_keep_list); + parray_free(to_purge_list); +} + +/* Evaluate every backup by retention policies and populate purge and keep lists. + * Also for every backup print its status ('Active' or 'Expired') according + * to active retention policies. + */ +static void +do_retention_internal(parray *backup_list, parray *to_keep_list, parray *to_purge_list) +{ + int i; + + parray *redundancy_full_backup_list = NULL; + + /* For retention calculation */ + uint32 n_full_backups = 0; + int cur_full_backup_num = 0; + time_t days_threshold = 0; + + /* For fancy reporting */ + uint32 actual_window = 0; + + /* Calculate n_full_backups and days_threshold */ + if (instance_config.retention_redundancy > 0) + { + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* Consider only valid FULL backups for Redundancy */ + if (instance_config.retention_redundancy > 0 && + backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + n_full_backups++; + + /* Add every FULL backup that satisfy Redundancy policy to separate list */ + if (n_full_backups <= instance_config.retention_redundancy) + { + if (!redundancy_full_backup_list) + redundancy_full_backup_list = parray_new(); + + parray_append(redundancy_full_backup_list, backup); + } + } + } + /* Sort list of full backups to keep */ + if (redundancy_full_backup_list) + parray_qsort(redundancy_full_backup_list, pgBackupCompareIdDesc); + } + + if (instance_config.retention_window > 0) + { + days_threshold = current_time - + (instance_config.retention_window * 60 * 60 * 24); + } + + elog(INFO, "Evaluate backups by retention"); + for (i = (int) parray_num(backup_list) - 1; i >= 0; i--) + { + + bool redundancy_keep = false; + time_t backup_time = 0; + pgBackup *backup = (pgBackup *) parray_get(backup_list, (size_t) i); + + /* check if backup`s FULL ancestor is in redundancy list */ + if (redundancy_full_backup_list) + { + pgBackup *full_backup = find_parent_full_backup(backup); + + if (full_backup && parray_bsearch(redundancy_full_backup_list, + full_backup, + pgBackupCompareIdDesc)) + redundancy_keep = true; + } + + /* Remember the serial number of latest valid FULL backup */ + if (backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + cur_full_backup_num++; + } + + /* Invalid and running backups most likely to have recovery_time == 0, + * so in this case use start_time instead. + */ + if (backup->recovery_time) + backup_time = backup->recovery_time; + else + backup_time = backup->start_time; + + /* Check if backup in needed by retention policy */ + if ((days_threshold == 0 || (days_threshold > backup_time)) && + (instance_config.retention_redundancy == 0 || !redundancy_keep)) + { + /* This backup is not guarded by retention + * + * Redundancy = 1 + * FULL CORRUPT in retention (not count toward redundancy limit) + * FULL in retention + * ------retention redundancy ------- + * PAGE3 in retention + * ------retention window ----------- + * PAGE2 out of retention + * PAGE1 out of retention + * FULL out of retention <- We are here + * FULL CORRUPT out of retention + */ + + /* Save backup from purge if backup is pinned and + * expire date is not yet due. + */ + if ((backup->expire_time > 0) && + (backup->expire_time > current_time)) + { + char expire_timestamp[100]; + time2iso(expire_timestamp, lengthof(expire_timestamp), backup->expire_time); + + elog(LOG, "Backup %s is pinned until '%s', retain", + base36enc(backup->start_time), expire_timestamp); + continue; + } + + /* Add backup to purge_list */ + elog(VERBOSE, "Mark backup %s for purge.", base36enc(backup->start_time)); + parray_append(to_purge_list, backup); + continue; + } + } + + /* sort keep_list and purge list */ + parray_qsort(to_keep_list, pgBackupCompareIdDesc); + parray_qsort(to_purge_list, pgBackupCompareIdDesc); + + /* FULL + * PAGE + * PAGE <- Only such backups must go into keep list + ---------retention window ---- + * PAGE + * FULL + * PAGE + * FULL + */ + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* Do not keep invalid backups by retention + * Turns out it was not a very good idea - [Issue #114] + */ + //if (backup->status != BACKUP_STATUS_OK && + // backup->status != BACKUP_STATUS_DONE) + // continue; + + /* only incremental backups should be in keep list */ + if (backup->backup_mode == BACKUP_MODE_FULL) + continue; + + /* orphan backup cannot be in keep list */ + if (!backup->parent_backup_link) + continue; + + /* skip if backup already in purge list */ + if (parray_bsearch(to_purge_list, backup, pgBackupCompareIdDesc)) + continue; + + /* if parent in purge_list, add backup to keep list */ + if (parray_bsearch(to_purge_list, + backup->parent_backup_link, + pgBackupCompareIdDesc)) + { + /* make keep list a bit more compact */ + parray_append(to_keep_list, backup); + continue; + } + } + + /* Message about retention state of backups + * TODO: message is ugly, rewrite it to something like show table in stdout. + */ + + cur_full_backup_num = 1; + for (i = 0; i < parray_num(backup_list); i++) + { + char *action = (char *)"Active"; + uint32 pinning_window = 0; + + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + if (parray_bsearch(to_purge_list, backup, pgBackupCompareIdDesc)) + action = (char *)"Expired"; + + if (backup->recovery_time == 0) + actual_window = 0; + else + actual_window = (current_time - backup->recovery_time)/(3600 * 24); + + /* For pinned backups show expire date */ + if (backup->expire_time > 0 && backup->expire_time > backup->recovery_time) + pinning_window = (backup->expire_time - backup->recovery_time)/(3600 * 24); + + /* TODO: add ancestor(chain full backup) ID */ + elog(INFO, "Backup %s, mode: %s, status: %s. Redundancy: %i/%i, Time Window: %ud/%ud. %s", + base36enc(backup->start_time), + pgBackupGetBackupMode(backup), + status2str(backup->status), + cur_full_backup_num, + instance_config.retention_redundancy, + actual_window, + pinning_window ? pinning_window : instance_config.retention_window, + action); + + if (backup->backup_mode == BACKUP_MODE_FULL) + cur_full_backup_num++; + } +} + +/* Merge partially expired incremental chains */ +static void +do_retention_merge(parray *backup_list, parray *to_keep_list, parray *to_purge_list) +{ + int i; + int j; + + /* IMPORTANT: we can merge to only those FULL backup, that is NOT + * guarded by retention and final target of such merge must be + * an incremental backup that is guarded by retention !!! + * + * PAGE4 E + * PAGE3 D + --------retention window --- + * PAGE2 C + * PAGE1 B + * FULL A + * + * after retention merge: + * PAGE4 E + * FULL D + */ + + /* Merging happens here */ + for (i = 0; i < parray_num(to_keep_list); i++) + { + char *keep_backup_id = NULL; + pgBackup *full_backup = NULL; + parray *merge_list = NULL; + + pgBackup *keep_backup = (pgBackup *) parray_get(to_keep_list, i); + + /* keep list may shrink during merge */ + if (!keep_backup) + continue; + + elog(INFO, "Consider backup %s for merge", base36enc(keep_backup->start_time)); + + /* Got valid incremental backup, find its FULL ancestor */ + full_backup = find_parent_full_backup(keep_backup); + + /* Failed to find parent */ + if (!full_backup) + { + elog(WARNING, "Failed to find FULL parent for %s", base36enc(keep_backup->start_time)); + continue; + } + + /* Check that ancestor is in purge_list */ + if (!parray_bsearch(to_purge_list, + full_backup, + pgBackupCompareIdDesc)) + { + elog(WARNING, "Skip backup %s for merging, " + "because his FULL parent is not marked for purge", base36enc(keep_backup->start_time)); + continue; + } + + /* FULL backup in purge list, thanks to compacting of keep_list current backup is + * final target for merge, but there could be intermediate incremental + * backups from purge_list. + */ + + keep_backup_id = base36enc_dup(keep_backup->start_time); + elog(INFO, "Merge incremental chain between full backup %s and backup %s", + base36enc(full_backup->start_time), keep_backup_id); + pg_free(keep_backup_id); + + merge_list = parray_new(); + + /* Form up a merge list */ + while (keep_backup->parent_backup_link) + { + parray_append(merge_list, keep_backup); + keep_backup = keep_backup->parent_backup_link; + } + + /* sanity */ + if (!merge_list) + continue; + + /* sanity */ + if (parray_num(merge_list) == 0) + { + parray_free(merge_list); + continue; + } + + /* In the end add FULL backup for easy locking */ + parray_append(merge_list, full_backup); + + /* Remove FULL backup from purge list */ + parray_rm(to_purge_list, full_backup, pgBackupCompareId); + + /* Lock merge chain */ + catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true); + + /* Consider this extreme case */ + // PAGEa1 PAGEb1 both valid + // \ / + // FULL + + /* Check that FULL backup do not has multiple descendants + * full_backup always point to current full_backup after merge + */ +// if (is_prolific(backup_list, full_backup)) +// { +// elog(WARNING, "Backup %s has multiple valid descendants. " +// "Automatic merge is not possible.", base36enc(full_backup->start_time)); +// } + + /* Merge list example: + * 0 PAGE3 + * 1 PAGE2 + * 2 PAGE1 + * 3 FULL + * + * Merge incremental chain from PAGE3 into FULL. + */ + + keep_backup = (pgBackup *)parray_get(merge_list, 0); + merge_chain(merge_list, full_backup, keep_backup); + backup_merged = true; + + for (j = parray_num(merge_list) - 2; j >= 0; j--) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(merge_list, j); + + /* Try to remove merged incremental backup from both keep and purge lists */ + parray_rm(to_purge_list, tmp_backup, pgBackupCompareId); + parray_set(to_keep_list, i, NULL); + } + + pgBackupValidate(full_backup, NULL); + if (full_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Merging of backup %s failed", base36enc(full_backup->start_time)); + + /* Cleanup */ + parray_free(merge_list); + } + + elog(INFO, "Retention merging finished"); + +} + +/* Purge expired backups */ +static void +do_retention_purge(parray *to_keep_list, parray *to_purge_list) +{ + int i; + int j; + + /* Remove backups by retention policy. Retention policy is configured by + * retention_redundancy and retention_window + * Remove only backups, that do not have children guarded by retention + * + * TODO: We do not consider the situation if child is marked for purge + * but parent isn`t. Maybe something bad happened with time on server? + */ + + for (j = 0; j < parray_num(to_purge_list); j++) + { + bool purge = true; + + pgBackup *delete_backup = (pgBackup *) parray_get(to_purge_list, j); + + elog(LOG, "Consider backup %s for purge", + base36enc(delete_backup->start_time)); + + /* Evaluate marked for delete backup against every backup in keep list. + * If marked for delete backup is recognized as parent of one of those, + * then this backup should not be deleted. + */ + for (i = 0; i < parray_num(to_keep_list); i++) + { + char *keeped_backup_id; + + pgBackup *keep_backup = (pgBackup *) parray_get(to_keep_list, i); + + /* item could have been nullified in merge */ + if (!keep_backup) + continue; + + /* Full backup cannot be a descendant */ + if (keep_backup->backup_mode == BACKUP_MODE_FULL) + continue; + + keeped_backup_id = base36enc_dup(keep_backup->start_time); + + elog(LOG, "Check if backup %s is parent of backup %s", + base36enc(delete_backup->start_time), keeped_backup_id); + + if (is_parent(delete_backup->start_time, keep_backup, true)) + { + + /* We must not delete this backup, evict it from purge list */ + elog(LOG, "Retain backup %s because his " + "descendant %s is guarded by retention", + base36enc(delete_backup->start_time), keeped_backup_id); + + purge = false; + pg_free(keeped_backup_id); + break; + } + pg_free(keeped_backup_id); + } + + /* Retain backup */ + if (!purge) + continue; + + /* Actual purge */ + if (!lock_backup(delete_backup, false)) + { + /* If the backup still is used, do not interrupt and go to the next */ + elog(WARNING, "Cannot lock backup %s directory, skip purging", + base36enc(delete_backup->start_time)); + continue; + } + + /* Delete backup and update status to DELETED */ + delete_backup_files(delete_backup); + backup_deleted = true; + + } +} + +/* + * Purge WAL + * Iterate over timelines + * Look for WAL segment not reachable from existing backups + * and delete them. + */ +static void +do_retention_wal(bool dry_run) +{ + parray *tli_list; + int i; + + tli_list = catalog_get_timelines(&instance_config); + + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); + + /* + * Empty timeline (only mentioned in timeline history file) + * has nothing to cleanup. + */ + if (tlinfo->n_xlog_files == 0 && parray_num(tlinfo->xlog_filelist) == 0) + continue; + + /* + * If closest backup exists, then timeline is reachable from + * at least one backup and no file should be removed. + * Unless wal-depth is enabled. + */ + if ((tlinfo->closest_backup) && instance_config.wal_depth <= 0) + continue; + + /* WAL retention keeps this timeline from purge */ + if (instance_config.wal_depth >= 0 && tlinfo->anchor_tli > 0 && + tlinfo->anchor_tli != tlinfo->tli) + continue; + + /* + * Purge all WAL segments before START LSN of oldest backup. + * If timeline doesn't have a backup, then whole timeline + * can be safely purged. + * Note, that oldest_backup is not necessarily valid here, + * but still we keep wal for it. + * If wal-depth is enabled then use anchor_lsn instead + * of oldest_backup. + */ + if (tlinfo->oldest_backup) + { + if (instance_config.wal_depth >= 0 && !(XLogRecPtrIsInvalid(tlinfo->anchor_lsn))) + { + delete_walfiles_in_tli(tlinfo->anchor_lsn, + tlinfo, instance_config.xlog_seg_size, dry_run); + } + else + { + delete_walfiles_in_tli(tlinfo->oldest_backup->start_lsn, + tlinfo, instance_config.xlog_seg_size, dry_run); + } + } + else + { + if (instance_config.wal_depth >= 0 && !(XLogRecPtrIsInvalid(tlinfo->anchor_lsn))) + delete_walfiles_in_tli(tlinfo->anchor_lsn, + tlinfo, instance_config.xlog_seg_size, dry_run); + else + delete_walfiles_in_tli(InvalidXLogRecPtr, + tlinfo, instance_config.xlog_seg_size, dry_run); + } + } +} + +/* + * Delete backup files of the backup and update the status of the backup to + * BACKUP_STATUS_DELETED. + */ +void +delete_backup_files(pgBackup *backup) +{ + size_t i; + char timestamp[100]; + parray *files; + size_t num_files; + char full_path[MAXPGPATH]; + + /* + * If the backup was deleted already, there is nothing to do. + */ + if (backup->status == BACKUP_STATUS_DELETED) + { + elog(WARNING, "Backup %s already deleted", + base36enc(backup->start_time)); + return; + } + + time2iso(timestamp, lengthof(timestamp), backup->recovery_time); + + elog(INFO, "Delete: %s %s", + base36enc(backup->start_time), timestamp); + + /* + * Update STATUS to BACKUP_STATUS_DELETING in preparation for the case which + * the error occurs before deleting all backup files. + */ + write_backup_status(backup, BACKUP_STATUS_DELETING, instance_name, false); + + /* list files to be deleted */ + files = parray_new(); + dir_list_file(files, backup->root_dir, false, false, true, false, false, 0, FIO_BACKUP_HOST); + + /* delete leaf node first */ + parray_qsort(files, pgFileCompareRelPathWithExternalDesc); + num_files = parray_num(files); + for (i = 0; i < num_files; i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + + join_path_components(full_path, backup->root_dir, file->rel_path); + + if (interrupted) + elog(ERROR, "interrupted during delete backup"); + + if (progress) + elog(INFO, "Progress: (%zd/%zd). Delete file \"%s\"", + i + 1, num_files, full_path); + + pgFileDelete(file->mode, full_path); + } + + parray_walk(files, pgFileFree); + parray_free(files); + backup->status = BACKUP_STATUS_DELETED; + + return; +} + +/* + * Purge WAL archive. One timeline at a time. + * If 'keep_lsn' is InvalidXLogRecPtr, then whole timeline can be purged + * If 'keep_lsn' is valid LSN, then every lesser segment can be purged. + * If 'dry_run' is set, then don`t actually delete anything. + * + * Case 1: + * archive is not empty, 'keep_lsn' is valid and we can delete something. + * Case 2: + * archive is not empty, 'keep_lsn' is valid and prevening us from deleting anything. + * Case 3: + * archive is not empty, 'keep_lsn' is invalid, drop all WAL files in archive, + * belonging to the timeline. + * Case 4: + * archive is empty, 'keep_lsn' is valid, assume corruption of WAL archive. + * Case 5: + * archive is empty, 'keep_lsn' is invalid, drop backup history files + * and partial WAL segments in archive. + * + * Q: Maybe we should stop treating partial WAL segments as second-class citizens? + */ +static void +delete_walfiles_in_tli(XLogRecPtr keep_lsn, timelineInfo *tlinfo, + uint32 xlog_seg_size, bool dry_run) +{ + XLogSegNo FirstToDeleteSegNo; + XLogSegNo OldestToKeepSegNo = 0; + char first_to_del_str[MAXFNAMELEN]; + char oldest_to_keep_str[MAXFNAMELEN]; + int i; + size_t wal_size_logical = 0; + size_t wal_size_actual = 0; + char wal_pretty_size[20]; + bool purge_all = false; + + + /* Timeline is completely empty */ + if (parray_num(tlinfo->xlog_filelist) == 0) + { + elog(INFO, "Timeline %i is empty, nothing to remove", tlinfo->tli); + return; + } + + if (XLogRecPtrIsInvalid(keep_lsn)) + { + /* Drop all files in timeline */ + elog(INFO, "On timeline %i all files %s be removed", + tlinfo->tli, dry_run?"can":"will"); + FirstToDeleteSegNo = tlinfo->begin_segno; + OldestToKeepSegNo = tlinfo->end_segno; + purge_all = true; + } + else + { + /* Drop all segments between begin_segno and segment with keep_lsn (excluding) */ + FirstToDeleteSegNo = tlinfo->begin_segno; + GetXLogSegNo(keep_lsn, OldestToKeepSegNo, xlog_seg_size); + } + + if (OldestToKeepSegNo > 0 && OldestToKeepSegNo > FirstToDeleteSegNo) + { + /* translate segno number into human readable format */ + GetXLogFileName(first_to_del_str, tlinfo->tli, FirstToDeleteSegNo, xlog_seg_size); + GetXLogFileName(oldest_to_keep_str, tlinfo->tli, OldestToKeepSegNo, xlog_seg_size); + + elog(INFO, "On timeline %i WAL segments between %s and %s %s be removed", + tlinfo->tli, first_to_del_str, + oldest_to_keep_str, dry_run?"can":"will"); + } + + /* sanity */ + if (OldestToKeepSegNo > FirstToDeleteSegNo) + { + wal_size_logical = (OldestToKeepSegNo - FirstToDeleteSegNo) * xlog_seg_size; + + /* In case of 'purge all' scenario OldestToKeepSegNo will be deleted too */ + if (purge_all) + wal_size_logical += xlog_seg_size; + } + else if (OldestToKeepSegNo < FirstToDeleteSegNo) + { + /* It is actually possible for OldestToKeepSegNo to be less than FirstToDeleteSegNo + * in case of : + * 1. WAL archive corruption. + * 2. There is no actual WAL archive to speak of and + * 'keep_lsn' is coming from STREAM backup. + */ + + if (FirstToDeleteSegNo > 0 && OldestToKeepSegNo > 0) + { + GetXLogFileName(first_to_del_str, tlinfo->tli, FirstToDeleteSegNo, xlog_seg_size); + GetXLogFileName(oldest_to_keep_str, tlinfo->tli, OldestToKeepSegNo, xlog_seg_size); + + elog(LOG, "On timeline %i first segment %s is greater than oldest segment to keep %s", + tlinfo->tli, first_to_del_str, oldest_to_keep_str); + } + } + else if (OldestToKeepSegNo == FirstToDeleteSegNo && !purge_all) + { + /* 'Nothing to delete' scenario because of 'keep_lsn' + * with possible exception of partial and backup history files. + */ + elog(INFO, "Nothing to remove on timeline %i", tlinfo->tli); + } + + /* Report the logical size to delete */ + if (wal_size_logical > 0) + { + pretty_size(wal_size_logical, wal_pretty_size, lengthof(wal_pretty_size)); + elog(INFO, "Logical WAL size to remove on timeline %i : %s", + tlinfo->tli, wal_pretty_size); + } + + /* Calculate the actual size to delete */ + for (i = 0; i < parray_num(tlinfo->xlog_filelist); i++) + { + xlogFile *wal_file = (xlogFile *) parray_get(tlinfo->xlog_filelist, i); + + if (purge_all || wal_file->segno < OldestToKeepSegNo) + wal_size_actual += wal_file->file.size; + } + + /* Report the actual size to delete */ + if (wal_size_actual > 0) + { + pretty_size(wal_size_actual, wal_pretty_size, lengthof(wal_pretty_size)); + elog(INFO, "Resident WAL size to free on timeline %i : %s", + tlinfo->tli, wal_pretty_size); + } + + if (dry_run) + return; + + for (i = 0; i < parray_num(tlinfo->xlog_filelist); i++) + { + xlogFile *wal_file = (xlogFile *) parray_get(tlinfo->xlog_filelist, i); + + if (interrupted) + elog(ERROR, "interrupted during WAL archive purge"); + + /* Any segment equal or greater than EndSegNo must be kept + * unless it`s a 'purge all' scenario. + */ + if (purge_all || wal_file->segno < OldestToKeepSegNo) + { + char wal_fullpath[MAXPGPATH]; + + join_path_components(wal_fullpath, instance_config.arclog_path, wal_file->file.name); + + /* save segment from purging */ + if (instance_config.wal_depth >= 0 && wal_file->keep) + { + elog(VERBOSE, "Retain WAL segment \"%s\"", wal_fullpath); + continue; + } + + /* unlink segment */ + if (fio_unlink(wal_fullpath, FIO_BACKUP_HOST) < 0) + { + /* Missing file is not considered as error condition */ + if (errno != ENOENT) + elog(ERROR, "Could not remove file \"%s\": %s", + wal_fullpath, strerror(errno)); + } + else + { + if (wal_file->type == SEGMENT) + elog(VERBOSE, "Removed WAL segment \"%s\"", wal_fullpath); + else if (wal_file->type == TEMP_SEGMENT) + elog(VERBOSE, "Removed temp WAL segment \"%s\"", wal_fullpath); + else if (wal_file->type == PARTIAL_SEGMENT) + elog(VERBOSE, "Removed partial WAL segment \"%s\"", wal_fullpath); + else if (wal_file->type == BACKUP_HISTORY_FILE) + elog(VERBOSE, "Removed backup history file \"%s\"", wal_fullpath); + } + + wal_deleted = true; + } + } +} + + +/* Delete all backup files and wal files of given instance. */ +int +do_delete_instance(void) +{ + parray *backup_list; + int i; + char instance_config_path[MAXPGPATH]; + + + /* Delete all backups. */ + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + catalog_lock_backup_list(backup_list, 0, parray_num(backup_list) - 1, true); + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + delete_backup_files(backup); + } + + /* Cleanup */ + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); + + /* Delete all wal files. */ + pgut_rmtree(arclog_path, false, true); + + /* Delete backup instance config file */ + join_path_components(instance_config_path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); + if (remove(instance_config_path)) + { + elog(ERROR, "Can't remove \"%s\": %s", instance_config_path, + strerror(errno)); + } + + /* Delete instance root directories */ + if (rmdir(backup_instance_path) != 0) + elog(ERROR, "Can't remove \"%s\": %s", backup_instance_path, + strerror(errno)); + + if (rmdir(arclog_path) != 0) + elog(ERROR, "Can't remove \"%s\": %s", arclog_path, + strerror(errno)); + + elog(INFO, "Instance '%s' successfully deleted", instance_name); + return 0; +} + +/* Delete all backups of given status in instance */ +void +do_delete_status(InstanceConfig *instance_config, const char *status) +{ + int i; + parray *backup_list, *delete_list; + const char *pretty_status; + int n_deleted = 0, n_found = 0; + size_t size_to_delete = 0; + char size_to_delete_pretty[20]; + pgBackup *backup; + + BackupStatus status_for_delete = str2status(status); + delete_list = parray_new(); + + if (status_for_delete == BACKUP_STATUS_INVALID) + elog(ERROR, "Unknown value for '--status' option: '%s'", status); + + /* + * User may have provided status string in lower case, but + * we should print backup statuses consistently with show command, + * so convert it. + */ + pretty_status = status2str(status_for_delete); + + backup_list = catalog_get_backup_list(instance_config->name, INVALID_BACKUP_ID); + + if (parray_num(backup_list) == 0) + { + elog(WARNING, "Instance '%s' has no backups", instance_config->name); + return; + } + + if (dry_run) + elog(INFO, "Deleting all backups with status '%s' in dry run mode", pretty_status); + else + elog(INFO, "Deleting all backups with status '%s'", pretty_status); + + /* Selects backups with specified status and their children into delete_list array. */ + for (i = 0; i < parray_num(backup_list); i++) + { + backup = (pgBackup *) parray_get(backup_list, i); + + if (backup->status == status_for_delete) + { + n_found++; + + /* incremental backup can be already in delete_list due to append_children() */ + if (parray_contains(delete_list, backup)) + continue; + parray_append(delete_list, backup); + + append_children(backup_list, backup, delete_list); + } + } + + parray_qsort(delete_list, pgBackupCompareIdDesc); + + /* delete and calculate free size from delete_list */ + for (i = 0; i < parray_num(delete_list); i++) + { + backup = (pgBackup *)parray_get(delete_list, i); + + elog(INFO, "Backup %s with status %s %s be deleted", + base36enc(backup->start_time), status2str(backup->status), dry_run ? "can" : "will"); + + size_to_delete += backup->data_bytes; + if (backup->stream) + size_to_delete += backup->wal_bytes; + + if (!dry_run && lock_backup(backup, false)) + delete_backup_files(backup); + + n_deleted++; + } + + /* Inform about data size to free */ + if (size_to_delete >= 0) + { + pretty_size(size_to_delete, size_to_delete_pretty, lengthof(size_to_delete_pretty)); + elog(INFO, "Resident data size to free by delete of %i backups: %s", + n_deleted, size_to_delete_pretty); + } + + /* delete selected backups */ + if (!dry_run && n_deleted > 0) + elog(INFO, "Successfully deleted %i %s from instance '%s'", + n_deleted, n_deleted == 1 ? "backup" : "backups", + instance_config->name); + + + if (n_found == 0) + elog(WARNING, "Instance '%s' has no backups with status '%s'", + instance_config->name, pretty_status); + + // we don`t do WAL purge here, because it is impossible to correctly handle + // dry-run case. + + /* Cleanup */ + parray_free(delete_list); + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); +} diff --git a/src/bin/pg_probackup/dir.cpp b/src/bin/pg_probackup/dir.cpp new file mode 100644 index 000000000..cf7943bfa --- /dev/null +++ b/src/bin/pg_probackup/dir.cpp @@ -0,0 +1,1856 @@ +/*------------------------------------------------------------------------- + * + * dir.c: directory operation utility. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" +#include "file.h" + + +#if PG_VERSION_NUM < 110000 +#include "catalog/catalog.h" +#endif +#include "catalog/pg_tablespace.h" + +#include +#include +#include + +#include "configuration.h" +#include "common/fe_memutils.h" + +/* + * The contents of these directories are removed or recreated during server + * start so they are not included in backups. The directories themselves are + * kept and included as empty to preserve access permissions. + */ +const char *pgdata_exclude_dir[] = +{ + PG_XLOG_DIR, + /* + * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even + * when stats_temp_directory is set because PGSS_TEXT_FILE is always created + * there. + */ + (char *)"pg_stat_tmp", + (char *)"pgsql_tmp", + + /* + * It is generally not useful to backup the contents of this directory even + * if the intention is to restore to another master. See backup.sgml for a + * more detailed description. + */ + (char *)"pg_replslot", + + /* Contents removed on startup, see dsm_cleanup_for_mmap(). */ + (char *)"pg_dynshmem", + + /* Contents removed on startup, see AsyncShmemInit(). */ + (char *)"pg_notify", + + /* + * Old contents are loaded for possible debugging but are not required for + * normal operation, see OldSerXidInit(). + */ + (char *)"pg_serial", + + /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */ + (char *)"pg_snapshots", + + /* Contents zeroed on startup, see StartupSUBTRANS(). */ + (char *)"pg_subtrans", + + /* end of list */ + NULL, /* pg_log will be set later */ + NULL +}; + +static char *pgdata_exclude_files[] = +{ + /* Skip auto conf temporary file. */ + (char *)"postgresql.auto.conf.tmp", + + /* Skip current log file temporary file */ + (char *)"current_logfiles.tmp", + (char *)"recovery.conf", + (char *)"postmaster.pid", + (char *)"postmaster.opts", + (char *)"probackup_recovery.conf", + (char *)"recovery.signal", + (char *)"standby.signal", + NULL +}; + +static char *pgdata_exclude_files_non_exclusive[] = +{ + /*skip in non-exclusive backup */ + (char *)"backup_label", + (char *)"tablespace_map", + NULL +}; + +/* Tablespace mapping structures */ + +typedef struct TablespaceListCell +{ + struct TablespaceListCell *next; + char old_dir[MAXPGPATH]; + char new_dir[MAXPGPATH]; +} TablespaceListCell; + +typedef struct TablespaceList +{ + TablespaceListCell *head; + TablespaceListCell *tail; +} TablespaceList; + +typedef struct TablespaceCreatedListCell +{ + struct TablespaceCreatedListCell *next; + char link_name[MAXPGPATH]; + char linked_dir[MAXPGPATH]; +} TablespaceCreatedListCell; + +typedef struct TablespaceCreatedList +{ + TablespaceCreatedListCell *head; + TablespaceCreatedListCell *tail; +} TablespaceCreatedList; + +static int pgCompareString(const void *str1, const void *str2); + +static char dir_check_file(pgFile *file, bool backup_logs); + +static void dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, + bool exclude, bool follow_symlink, bool backup_logs, + bool skip_hidden, int external_dir_num, fio_location location); +static void opt_path_map(ConfigOption *opt, const char *arg, + TablespaceList *list, const char *type); + +/* Tablespace mapping */ +static TablespaceList tablespace_dirs = {NULL, NULL}; +/* Extra directories mapping */ +static TablespaceList external_remap_list = {NULL, NULL}; + +/* + * Create directory, also create parent directories if necessary. + */ +int +dir_create_dir(const char *dir, mode_t mode) +{ + char parent[MAXPGPATH]; + + strncpy(parent, dir, MAXPGPATH); + get_parent_directory(parent); + + /* Create parent first */ + if (access(parent, F_OK) == -1) + dir_create_dir(parent, mode); + + /* Create directory */ + if (mkdir(dir, mode) == -1) + { + if (errno == EEXIST) /* already exist */ + return 0; + elog(ERROR, "cannot create directory \"%s\": %s", dir, strerror(errno)); + } + + return 0; +} + +pgFile * +pgFileNew(const char *path, const char *rel_path, bool follow_symlink, + int external_dir_num, fio_location location) +{ + struct stat st; + pgFile *file; + + /* stat the file */ + if (fio_stat(path, &st, follow_symlink, location) < 0) + { + /* file not found is not an error case */ + if (errno == ENOENT) + return NULL; + elog(ERROR, "cannot stat file \"%s\": %s", path, + strerror(errno)); + } + + file = pgFileInit(rel_path); + file->size = st.st_size; + file->mode = st.st_mode; + file->mtime = st.st_mtime; + file->external_dir_num = external_dir_num; + + return file; +} + +pgFile * +pgFileInit(const char *rel_path) +{ + pgFile *file; + char *file_name = NULL; + + file = (pgFile *) pgut_malloc(sizeof(pgFile)); + MemSet(file, 0, sizeof(pgFile)); + + file->rel_path = pgut_strdup(rel_path); + canonicalize_path(file->rel_path); + + /* Get file name from the path */ + file_name = last_dir_separator(file->rel_path); + + if (file_name == NULL) + file->name = file->rel_path; + else + { + file_name++; + file->name = file_name; + } + + /* Number of blocks readed during backup */ + file->n_blocks = BLOCKNUM_INVALID; + + /* Number of blocks backed up during backup */ + file->n_headers = 0; + + return file; +} + +/* + * Delete file pointed by the pgFile. + * If the pgFile points directory, the directory must be empty. + */ +void +pgFileDelete(mode_t mode, const char *full_path) +{ + if (S_ISDIR(mode)) + { + if (rmdir(full_path) == -1) + { + if (errno == ENOENT) + return; + else if (errno == ENOTDIR) /* could be symbolic link */ + goto delete_file; + + elog(ERROR, "Cannot remove directory \"%s\": %s", + full_path, strerror(errno)); + } + return; + } + +delete_file: + if (remove(full_path) == -1) + { + if (errno == ENOENT) + return; + elog(ERROR, "Cannot remove file \"%s\": %s", full_path, + strerror(errno)); + } +} + +/* + * Read the local file to compute its CRC. + * We cannot make decision about file decompression because + * user may ask to backup already compressed files and we should be + * obvious about it. + */ +pg_crc32 +pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok) +{ + FILE *fp; + pg_crc32 crc = 0; + char *buf; + size_t len = 0; + + INIT_FILE_CRC32(use_crc32c, crc); + + /* open file in binary read mode */ + fp = fopen(file_path, PG_BINARY_R); + if (fp == NULL) + { + if (errno == ENOENT) + { + if (missing_ok) + { + FIN_FILE_CRC32(use_crc32c, crc); + return crc; + } + } + + elog(ERROR, "Cannot open file \"%s\": %s", + file_path, strerror(errno)); + } + + /* disable stdio buffering */ + setvbuf(fp, NULL, _IONBF, BUFSIZ); + buf = (char *)pgut_malloc(STDIO_BUFSIZE); + + /* calc CRC of file */ + for (;;) + { + if (interrupted) + elog(ERROR, "interrupted during CRC calculation"); + + len = fread(buf, 1, STDIO_BUFSIZE, fp); + + if (ferror(fp)) + elog(ERROR, "Cannot read \"%s\": %s", file_path, strerror(errno)); + + /* update CRC */ + COMP_FILE_CRC32(use_crc32c, crc, buf, len); + + if (feof(fp)) + break; + } + + FIN_FILE_CRC32(use_crc32c, crc); + fclose(fp); + pg_free(buf); + + return crc; +} + +#ifdef HAVE_LIBZ +/* + * Read the local file to compute its CRC. + * We cannot make decision about file decompression because + * user may ask to backup already compressed files and we should be + * obvious about it. + */ +pg_crc32 +pgFileGetCRCgz(const char *file_path, bool use_crc32c, bool missing_ok) +{ + gzFile fp; + pg_crc32 crc = 0; + int len = 0; + int err; + char *buf; + + INIT_FILE_CRC32(use_crc32c, crc); + + /* open file in binary read mode */ + fp = gzopen(file_path, PG_BINARY_R); + if (fp == NULL) + { + if (errno == ENOENT) + { + if (missing_ok) + { + FIN_FILE_CRC32(use_crc32c, crc); + return crc; + } + } + + elog(ERROR, "Cannot open file \"%s\": %s", + file_path, strerror(errno)); + } + + buf = (char *)pgut_malloc(STDIO_BUFSIZE); + + /* calc CRC of file */ + for (;;) + { + if (interrupted) + elog(ERROR, "interrupted during CRC calculation"); + + len = gzread(fp, buf, STDIO_BUFSIZE); + + if (len <= 0) + { + /* we either run into eof or error */ + if (gzeof(fp)) + break; + else + { + const char *err_str = NULL; + + err_str = gzerror(fp, &err); + elog(ERROR, "Cannot read from compressed file %s", err_str); + } + } + + /* update CRC */ + COMP_FILE_CRC32(use_crc32c, crc, buf, len); + } + + FIN_FILE_CRC32(use_crc32c, crc); + gzclose(fp); + pg_free(buf); + + return crc; +} +#endif + +void +pgFileFree(void *file) +{ + pgFile *file_ptr; + + if (file == NULL) + return; + + file_ptr = (pgFile *) file; + + pfree(file_ptr->linked); + pfree(file_ptr->rel_path); + + pfree(file); +} + +/* Compare two pgFile with their path in ascending order of ASCII code. */ +int +pgFileMapComparePath(const void *f1, const void *f2) +{ + page_map_entry *f1p = *(page_map_entry **)f1; + page_map_entry *f2p = *(page_map_entry **)f2; + + return strcmp(f1p->path, f2p->path); +} + +/* Compare two pgFile with their name in ascending order of ASCII code. */ +int +pgFileCompareName(const void *f1, const void *f2) +{ + pgFile *f1p = *(pgFile **)f1; + pgFile *f2p = *(pgFile **)f2; + + return strcmp(f1p->name, f2p->name); +} + +/* + * Compare two pgFile with their relative path and external_dir_num in ascending + * order of ASСII code. + */ +int +pgFileCompareRelPathWithExternal(const void *f1, const void *f2) +{ + pgFile *f1p = *(pgFile **)f1; + pgFile *f2p = *(pgFile **)f2; + int res; + + res = strcmp(f1p->rel_path, f2p->rel_path); + if (res == 0) + { + if (f1p->external_dir_num > f2p->external_dir_num) + return 1; + else if (f1p->external_dir_num < f2p->external_dir_num) + return -1; + else + return 0; + } + return res; +} + +/* + * Compare two pgFile with their rel_path and external_dir_num + * in descending order of ASCII code. + */ +int +pgFileCompareRelPathWithExternalDesc(const void *f1, const void *f2) +{ + return -pgFileCompareRelPathWithExternal(f1, f2); +} + +/* Compare two pgFile with their linked directory path. */ +int +pgFileCompareLinked(const void *f1, const void *f2) +{ + pgFile *f1p = *(pgFile **)f1; + pgFile *f2p = *(pgFile **)f2; + + return strcmp(f1p->linked, f2p->linked); +} + +/* Compare two pgFile with their size */ +int +pgFileCompareSize(const void *f1, const void *f2) +{ + pgFile *f1p = *(pgFile **)f1; + pgFile *f2p = *(pgFile **)f2; + + if (f1p->size > f2p->size) + return 1; + else if (f1p->size < f2p->size) + return -1; + else + return 0; +} + +static int +pgCompareString(const void *str1, const void *str2) +{ + return strcmp(*(char **) str1, *(char **) str2); +} + +/* Compare two Oids */ +int +pgCompareOid(const void *f1, const void *f2) +{ + Oid *v1 = *(Oid **) f1; + Oid *v2 = *(Oid **) f2; + + if (*v1 > *v2) + return 1; + else if (*v1 < *v2) + return -1; + else + return 0;} + + +void +db_map_entry_free(void *entry) +{ + db_map_entry *m = (db_map_entry *) entry; + + free(m->datname); + free(entry); +} + +/* + * List files, symbolic links and directories in the directory "root" and add + * pgFile objects to "files". We add "root" to "files" if add_root is true. + * + * When follow_symlink is true, symbolic link is ignored and only file or + * directory linked to will be listed. + */ +void +dir_list_file(parray *files, const char *root, bool exclude, bool follow_symlink, + bool add_root, bool backup_logs, bool skip_hidden, int external_dir_num, + fio_location location) +{ + pgFile *file; + + file = pgFileNew(root, "", follow_symlink, external_dir_num, location); + if (file == NULL) + { + /* For external directory this is not ok */ + if (external_dir_num > 0) + elog(ERROR, "External directory is not found: \"%s\"", root); + else + return; + } + + if (!S_ISDIR(file->mode)) + { + if (external_dir_num > 0) + elog(ERROR, " --external-dirs option \"%s\": directory or symbolic link expected", + root); + else + elog(WARNING, "Skip \"%s\": unexpected file format", root); + return; + } + if (add_root) + parray_append(files, file); + + dir_list_file_internal(files, file, root, exclude, follow_symlink, + backup_logs, skip_hidden, external_dir_num, location); + + if (!add_root) + pgFileFree(file); +} + +#define CHECK_FALSE 0 +#define CHECK_TRUE 1 +#define CHECK_EXCLUDE_FALSE 2 + +/* + * Check file or directory. + * + * Check for exclude. + * Extract information about the file parsing its name. + * Skip files: + * - skip temp tables files + * - skip unlogged tables files + * Skip recursive tablespace content + * Set flags for: + * - database directories + * - datafiles + */ +static char +dir_check_file(pgFile *file, bool backup_logs) +{ + int i; + int sscanf_res; + bool in_tablespace = false; + + in_tablespace = path_is_prefix_of_path(PG_TBLSPC_DIR, file->rel_path); + + /* Check if we need to exclude file by name */ + if (S_ISREG(file->mode)) + { + if (!exclusive_backup) + { + for (i = 0; pgdata_exclude_files_non_exclusive[i]; i++) + if (strcmp(file->rel_path, + pgdata_exclude_files_non_exclusive[i]) == 0) + { + /* Skip */ + elog(VERBOSE, "Excluding file: %s", file->name); + return CHECK_FALSE; + } + } + + for (i = 0; pgdata_exclude_files[i]; i++) + if (strcmp(file->rel_path, pgdata_exclude_files[i]) == 0) + { + /* Skip */ + elog(VERBOSE, "Excluding file: %s", file->name); + return CHECK_FALSE; + } + } + /* + * If the directory name is in the exclude list, do not list the + * contents. + */ + else if (S_ISDIR(file->mode) && !in_tablespace && file->external_dir_num == 0) + { + /* + * If the item in the exclude list starts with '/', compare to + * the absolute path of the directory. Otherwise compare to the + * directory name portion. + */ + for (i = 0; pgdata_exclude_dir[i]; i++) + { + /* relative path exclude */ + if (strcmp(file->rel_path, pgdata_exclude_dir[i]) == 0) + { + elog(VERBOSE, "Excluding directory content: %s", file->rel_path); + return CHECK_EXCLUDE_FALSE; + } + } + + if (!backup_logs) + { + if (strcmp(file->rel_path, PG_LOG_DIR) == 0) + { + /* Skip */ + elog(VERBOSE, "Excluding directory content: %s", file->rel_path); + return CHECK_EXCLUDE_FALSE; + } + } + } + + /* + * Do not copy tablespaces twice. It may happen if the tablespace is located + * inside the PGDATA. + */ + if (S_ISDIR(file->mode) && + strcmp(file->name, TABLESPACE_VERSION_DIRECTORY) == 0) + { + Oid tblspcOid; + char tmp_rel_path[MAXPGPATH]; + + /* + * Valid path for the tablespace is + * pg_tblspc/tblsOid/TABLESPACE_VERSION_DIRECTORY + */ + if (!path_is_prefix_of_path(PG_TBLSPC_DIR, file->rel_path)) + return CHECK_FALSE; + sscanf_res = sscanf(file->rel_path, PG_TBLSPC_DIR "/%u/%s", + &tblspcOid, tmp_rel_path); + if (sscanf_res == 0) + return CHECK_FALSE; + } + + if (in_tablespace) + { + char tmp_rel_path[MAXPGPATH]; + + sscanf_res = sscanf(file->rel_path, PG_TBLSPC_DIR "/%u/%[^/]/%u/", + &(file->tblspcOid), tmp_rel_path, + &(file->dbOid)); + + /* + * We should skip other files and directories rather than + * TABLESPACE_VERSION_DIRECTORY, if this is recursive tablespace. + */ + if (sscanf_res == 2 && strcmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY) != 0) + return CHECK_FALSE; + + if (sscanf_res == 3 && S_ISDIR(file->mode) && + strcmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY) == 0) + file->is_database = true; + } + else if (path_is_prefix_of_path("global", file->rel_path)) + { + file->tblspcOid = GLOBALTABLESPACE_OID; + + if (S_ISDIR(file->mode) && strcmp(file->name, "global") == 0) + file->is_database = true; + } + else if (path_is_prefix_of_path("base", file->rel_path)) + { + file->tblspcOid = DEFAULTTABLESPACE_OID; + + sscanf(file->rel_path, "base/%u/", &(file->dbOid)); + + if (S_ISDIR(file->mode) && strcmp(file->name, "base") != 0) + file->is_database = true; + } + + /* Do not backup ptrack_init files */ + if (S_ISREG(file->mode) && strcmp(file->name, "ptrack_init") == 0) + return CHECK_FALSE; + + /* + * Check files located inside database directories including directory + * 'global' + */ + if (S_ISREG(file->mode) && file->tblspcOid != 0 && + file->name && file->name[0]) + { + if (strcmp(file->name, "pg_internal.init") == 0) + return CHECK_FALSE; + /* Do not backup ptrack2.x map files */ + else if (strcmp(file->name, "ptrack.map") == 0) + return CHECK_FALSE; + else if (strcmp(file->name, "ptrack.map.mmap") == 0) + return CHECK_FALSE; + else if (strcmp(file->name, "ptrack.map.tmp") == 0) + return CHECK_FALSE; + /* Do not backup temp files */ + else if (file->name[0] == 't' && isdigit(file->name[1])) + return CHECK_FALSE; + else if (isdigit(file->name[0])) + { + char *fork_name; + int len; + char suffix[MAXPGPATH]; + + fork_name = strstr(file->name, "_"); + if (fork_name) + { + /* Auxiliary fork of the relfile */ + if (strcmp(fork_name, "vm") == 0) + file->forkName = vm; + + else if (strcmp(fork_name, "fsm") == 0) + file->forkName = fsm; + + else if (strcmp(fork_name, "cfm") == 0) + file->forkName = cfm; + + else if (strcmp(fork_name, "ptrack") == 0) + file->forkName = ptrack; + + else if (strcmp(fork_name, "init") == 0) + file->forkName = init; + + /* Do not backup ptrack files */ + if (file->forkName == ptrack) + return CHECK_FALSE; + } + else + { + /* + * snapfs files: + * RELFILENODE.BLOCKNO.snapmap.SNAPID + * RELFILENODE.BLOCKNO.snap.SNAPID + */ + if (strstr(file->name, "snap") != NULL) + return true; + + len = strlen(file->name); + /* reloid.cfm */ + if (len > 3 && strcmp(file->name + len - 3, "cfm") == 0) + return CHECK_TRUE; + + sscanf_res = sscanf(file->name, "%u.%d.%s", &(file->relOid), + &(file->segno), suffix); + if (sscanf_res == 0) + elog(ERROR, "Cannot parse file name \"%s\"", file->name); + else if (sscanf_res == 1 || sscanf_res == 2) + file->is_datafile = true; + } + } + } + + return CHECK_TRUE; +} + +/* + * List files in parent->path directory. If "exclude" is true do not add into + * "files" files from pgdata_exclude_files and directories from + * pgdata_exclude_dir. + */ +static void +dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, + bool exclude, bool follow_symlink, bool backup_logs, + bool skip_hidden, int external_dir_num, fio_location location) +{ + DIR *dir; + struct dirent *dent; + + if (!S_ISDIR(parent->mode)) + elog(ERROR, "\"%s\" is not a directory", parent_dir); + + /* Open directory and list contents */ + dir = fio_opendir(parent_dir, location); + if (dir == NULL) + { + if (errno == ENOENT) + { + /* Maybe the directory was removed */ + return; + } + elog(ERROR, "Cannot open directory \"%s\": %s", + parent_dir, strerror(errno)); + } + + errno = 0; + while ((dent = fio_readdir(dir))) + { + pgFile *file; + char child[MAXPGPATH]; + char rel_child[MAXPGPATH]; + char check_res; + + join_path_components(child, parent_dir, dent->d_name); + join_path_components(rel_child, parent->rel_path, dent->d_name); + + file = pgFileNew(child, rel_child, follow_symlink, external_dir_num, + location); + if (file == NULL) + continue; + + /* Skip entries point current dir or parent dir */ + if (S_ISDIR(file->mode) && + (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0)) + { + pgFileFree(file); + continue; + } + + /* skip hidden files and directories */ + if (skip_hidden && file->name[0] == '.') + { + elog(WARNING, "Skip hidden file: '%s'", child); + pgFileFree(file); + continue; + } + + /* + * Add only files, directories and links. Skip sockets and other + * unexpected file formats. + */ + if (!S_ISDIR(file->mode) && !S_ISREG(file->mode)) + { + elog(WARNING, "Skip '%s': unexpected file format", child); + pgFileFree(file); + continue; + } + + if (exclude) + { + check_res = dir_check_file(file, backup_logs); + if (check_res == CHECK_FALSE) + { + /* Skip */ + pgFileFree(file); + continue; + } + else if (check_res == CHECK_EXCLUDE_FALSE) + { + /* We add the directory itself which content was excluded */ + parray_append(files, file); + continue; + } + } + + parray_append(files, file); + + /* + * If the entry is a directory call dir_list_file_internal() + * recursively. + */ + if (S_ISDIR(file->mode)) + dir_list_file_internal(files, file, child, exclude, follow_symlink, + backup_logs, skip_hidden, external_dir_num, location); + } + + if (errno && errno != ENOENT) + { + int errno_tmp = errno; + fio_closedir(dir); + elog(ERROR, "Cannot read directory \"%s\": %s", + parent_dir, strerror(errno_tmp)); + } + fio_closedir(dir); +} + +/* + * Retrieve tablespace path, either relocated or original depending on whether + * -T was passed or not. + * + * Copy of function get_tablespace_mapping() from pg_basebackup.c. + */ +static const char * +get_tablespace_mapping(const char *dir) +{ + TablespaceListCell *cell; + + for (cell = tablespace_dirs.head; cell; cell = cell->next) + if (strcmp(dir, cell->old_dir) == 0) + return cell->new_dir; + + return dir; +} + +/* + * Split argument into old_dir and new_dir and append to mapping + * list. + * + * Copy of function tablespace_list_append() from pg_basebackup.c. + */ +static void +opt_path_map(ConfigOption *opt, const char *arg, TablespaceList *list, + const char *type) +{ + TablespaceListCell *cell = pgut_new(TablespaceListCell); + char *dst; + char *dst_ptr; + const char *arg_ptr; + + memset(cell, 0, sizeof(TablespaceListCell)); + dst_ptr = dst = cell->old_dir; + for (arg_ptr = arg; *arg_ptr; arg_ptr++) + { + if (dst_ptr - dst >= MAXPGPATH) + elog(ERROR, "directory name too long"); + + if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=') + ; /* skip backslash escaping = */ + else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\')) + { + if (*cell->new_dir) + elog(ERROR, "multiple \"=\" signs in %s mapping\n", type); + else + dst = dst_ptr = cell->new_dir; + } + else + *dst_ptr++ = *arg_ptr; + } + + if (!*cell->old_dir || !*cell->new_dir) + elog(ERROR, "invalid %s mapping format \"%s\", " + "must be \"OLDDIR=NEWDIR\"", type, arg); + canonicalize_path(cell->old_dir); + canonicalize_path(cell->new_dir); + + /* + * This check isn't absolutely necessary. But all tablespaces are created + * with absolute directories, so specifying a non-absolute path here would + * just never match, possibly confusing users. It's also good to be + * consistent with the new_dir check. + */ + if (!is_absolute_path(cell->old_dir)) + elog(ERROR, "old directory is not an absolute path in %s mapping: %s\n", + type, cell->old_dir); + + if (!is_absolute_path(cell->new_dir)) + elog(ERROR, "new directory is not an absolute path in %s mapping: %s\n", + type, cell->new_dir); + + if (list->tail) + list->tail->next = cell; + else + list->head = cell; + list->tail = cell; +} + +/* Parse tablespace mapping */ +void +opt_tablespace_map(ConfigOption *opt, const char *arg) +{ + opt_path_map(opt, arg, &tablespace_dirs, "tablespace"); +} + +/* Parse external directories mapping */ +void +opt_externaldir_map(ConfigOption *opt, const char *arg) +{ + opt_path_map(opt, arg, &external_remap_list, "external directory"); +} + +/* + * Create directories from **dest_files** in **data_dir**. + * + * If **extract_tablespaces** is true then try to extract tablespace data + * directories into their initial path using tablespace_map file. + * Use **backup_dir** for tablespace_map extracting. + * + * Enforce permissions from backup_content.control. The only + * problem now is with PGDATA itself. + * TODO: we must preserve PGDATA permissions somewhere. Is it actually a problem? + * Shouldn`t starting postgres force correct permissions on PGDATA? + * + * TODO: symlink handling. If user located symlink in PG_TBLSPC_DIR, it will + * be restored as directory. + */ +void +create_data_directories(parray *dest_files, const char *data_dir, const char *backup_dir, + bool extract_tablespaces, bool incremental, fio_location location) +{ + int i; + parray *links = NULL; + mode_t pg_tablespace_mode = DIR_PERMISSION; + char to_path[MAXPGPATH]; + + /* get tablespace map */ + if (extract_tablespaces) + { + links = parray_new(); + read_tablespace_map(links, backup_dir); + /* Sort links by a link name */ + parray_qsort(links, pgFileCompareName); + } + + /* + * We have no idea about tablespace permission + * For PG < 11 we can just force default permissions. + */ +#if PG_VERSION_NUM >= 110000 + if (links) + { + /* For PG>=11 we use temp kludge: trust permissions on 'pg_tblspc' + * and force them on every tablespace. + * TODO: remove kludge and ask data_directory_mode + * at the start of backup. + */ + for (i = 0; i < parray_num(dest_files); i++) + { + pgFile *file = (pgFile *) parray_get(dest_files, i); + + if (!S_ISDIR(file->mode)) + continue; + + /* skip external directory content */ + if (file->external_dir_num != 0) + continue; + + /* look for 'pg_tblspc' directory */ + if (strcmp(file->rel_path, PG_TBLSPC_DIR) == 0) + { + pg_tablespace_mode = file->mode; + break; + } + } + } +#endif + + /* + * We iterate over dest_files and for every directory with parent 'pg_tblspc' + * we must lookup this directory name in tablespace map. + * If we got a match, we treat this directory as tablespace. + * It means that we create directory specified in tablespace_map and + * original directory created as symlink to it. + */ + + elog(LOG, "Restore directories and symlinks..."); + + /* create directories */ + for (i = 0; i < parray_num(dest_files); i++) + { + char parent_dir[MAXPGPATH]; + pgFile *dir = (pgFile *) parray_get(dest_files, i); + + if (!S_ISDIR(dir->mode)) + continue; + + /* skip external directory content */ + if (dir->external_dir_num != 0) + continue; + + /* tablespace_map exists */ + if (links) + { + /* get parent dir of rel_path */ + strncpy(parent_dir, dir->rel_path, MAXPGPATH); + get_parent_directory(parent_dir); + + /* check if directory is actually link to tablespace */ + if (strcmp(parent_dir, PG_TBLSPC_DIR) == 0) + { + /* this directory located in pg_tblspc + * check it against tablespace map + */ + pgFile **link = (pgFile **) parray_bsearch(links, dir, pgFileCompareName); + + /* got match */ + if (link) + { + const char *linked_path = get_tablespace_mapping((*link)->linked); + + if (!is_absolute_path(linked_path)) + elog(ERROR, "Tablespace directory is not an absolute path: %s\n", + linked_path); + + join_path_components(to_path, data_dir, dir->rel_path); + + elog(VERBOSE, "Create directory \"%s\" and symbolic link \"%s\"", + linked_path, to_path); + + /* create tablespace directory */ + fio_mkdir(linked_path, pg_tablespace_mode, location); + + /* create link to linked_path */ + if (fio_symlink(linked_path, to_path, incremental, location) < 0) + elog(ERROR, "Could not create symbolic link \"%s\": %s", + to_path, strerror(errno)); + + continue; + } + } + } + + /* This is not symlink, create directory */ + elog(VERBOSE, "Create directory \"%s\"", dir->rel_path); + + join_path_components(to_path, data_dir, dir->rel_path); + fio_mkdir(to_path, dir->mode, location); + } + + if (extract_tablespaces) + { + parray_walk(links, pgFileFree); + parray_free(links); + } +} + +/* + * Read names of symbolic names of tablespaces with links to directories from + * tablespace_map or tablespace_map.txt. + */ +void +read_tablespace_map(parray *files, const char *backup_dir) +{ + FILE *fp; + char db_path[MAXPGPATH], + map_path[MAXPGPATH]; + char buf[MAXPGPATH * 2]; + + join_path_components(db_path, backup_dir, DATABASE_DIR); + join_path_components(map_path, db_path, PG_TABLESPACE_MAP_FILE); + + /* Exit if database/tablespace_map doesn't exist */ + if (!fileExists(map_path, FIO_BACKUP_HOST)) + { + elog(LOG, "there is no file tablespace_map"); + return; + } + + fp = fio_open_stream(map_path, FIO_BACKUP_HOST); + if (fp == NULL) + elog(ERROR, "cannot open \"%s\": %s", map_path, strerror(errno)); + + while (fgets(buf, lengthof(buf), fp)) + { + char link_name[MAXPGPATH], + path[MAXPGPATH]; + pgFile *file; + + if (sscanf(buf, "%1023s %1023s", link_name, path) != 2) + elog(ERROR, "invalid format found in \"%s\"", map_path); + + file = pgut_new(pgFile); + memset(file, 0, sizeof(pgFile)); + + /* follow the convention for pgFileFree */ + file->name = pgut_strdup(link_name); + file->linked = pgut_strdup(path); + canonicalize_path(file->linked); + + parray_append(files, file); + } + + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", map_path); + + fio_close_stream(fp); +} + +/* + * Check that all tablespace mapping entries have correct linked directory + * paths. Linked directories must be empty or do not exist, unless + * we are running incremental restore, then linked directories can be nonempty. + * + * If tablespace-mapping option is supplied, all OLDDIR entries must have + * entries in tablespace_map file. + * + * + * TODO: maybe when running incremental restore with tablespace remapping, then + * new tablespace directory MUST be empty? because there is no way + * we can be sure, that files laying there belong to our instance. + */ +void +check_tablespace_mapping(pgBackup *backup, bool incremental, bool *tblspaces_are_empty) +{ +// char this_backup_path[MAXPGPATH]; + parray *links; + size_t i; + TablespaceListCell *cell; + pgFile *tmp_file = pgut_new(pgFile); + + links = parray_new(); + +// pgBackupGetPath(backup, this_backup_path, lengthof(this_backup_path), NULL); + read_tablespace_map(links, backup->root_dir); + /* Sort links by the path of a linked file*/ + parray_qsort(links, pgFileCompareLinked); + + elog(LOG, "check tablespace directories of backup %s", + base36enc(backup->start_time)); + + /* 1 - each OLDDIR must have an entry in tablespace_map file (links) */ + for (cell = tablespace_dirs.head; cell; cell = cell->next) + { + tmp_file->linked = cell->old_dir; + + if (parray_bsearch(links, tmp_file, pgFileCompareLinked) == NULL) + elog(ERROR, "--tablespace-mapping option's old directory " + "doesn't have an entry in tablespace_map file: \"%s\"", + cell->old_dir); + + /* For incremental restore, check that new directory is empty */ +// if (incremental) +// { +// if (!is_absolute_path(cell->new_dir)) +// elog(ERROR, "tablespace directory is not an absolute path: %s\n", +// cell->new_dir); +// +// if (!dir_is_empty(cell->new_dir, FIO_DB_HOST)) +// elog(ERROR, "restore tablespace destination is not empty: \"%s\"", +// cell->new_dir); +// } + } + + /* 2 - all linked directories must be empty */ + for (i = 0; i < parray_num(links); i++) + { + pgFile *link = (pgFile *) parray_get(links, i); + const char *linked_path = link->linked; + TablespaceListCell *cell; + + for (cell = tablespace_dirs.head; cell; cell = cell->next) + if (strcmp(link->linked, cell->old_dir) == 0) + { + linked_path = cell->new_dir; + break; + } + + if (!is_absolute_path(linked_path)) + elog(ERROR, "tablespace directory is not an absolute path: %s\n", + linked_path); + + if (!dir_is_empty(linked_path, FIO_DB_HOST)) + { + if (!incremental) + elog(ERROR, "restore tablespace destination is not empty: \"%s\"", + linked_path); + *tblspaces_are_empty = false; + } + } + + free(tmp_file); + parray_walk(links, pgFileFree); + parray_free(links); +} + +void +check_external_dir_mapping(pgBackup *backup, bool incremental) +{ + TablespaceListCell *cell; + parray *external_dirs_to_restore; + int i; + + elog(LOG, "check external directories of backup %s", + base36enc(backup->start_time)); + + if (!backup->external_dir_str) + { + if (external_remap_list.head) + elog(ERROR, "--external-mapping option's old directory doesn't " + "have an entry in list of external directories of current " + "backup: \"%s\"", external_remap_list.head->old_dir); + return; + } + + external_dirs_to_restore = make_external_directory_list( + backup->external_dir_str, + false); + /* 1 - each OLDDIR must have an entry in external_dirs_to_restore */ + for (cell = external_remap_list.head; cell; cell = cell->next) + { + bool found = false; + + for (i = 0; i < parray_num(external_dirs_to_restore); i++) + { + char *external_dir = (char *)parray_get(external_dirs_to_restore, i); + + if (strcmp(cell->old_dir, external_dir) == 0) + { + /* Swap new dir name with old one, it is used by 2-nd step */ + parray_set(external_dirs_to_restore, i, + pgut_strdup(cell->new_dir)); + pfree(external_dir); + + found = true; + break; + } + } + if (!found) + elog(ERROR, "--external-mapping option's old directory doesn't " + "have an entry in list of external directories of current " + "backup: \"%s\"", cell->old_dir); + } + + /* 2 - all linked directories must be empty */ + for (i = 0; i < parray_num(external_dirs_to_restore); i++) + { + char *external_dir = (char *) parray_get(external_dirs_to_restore, + i); + + if (!incremental && !dir_is_empty(external_dir, FIO_DB_HOST)) + elog(ERROR, "External directory is not empty: \"%s\"", + external_dir); + } + + free_dir_list(external_dirs_to_restore); +} + +char * +get_external_remap(char *current_dir) +{ + TablespaceListCell *cell; + + for (cell = external_remap_list.head; cell; cell = cell->next) + { + char *old_dir = cell->old_dir; + + if (strcmp(old_dir, current_dir) == 0) + return cell->new_dir; + } + return current_dir; +} + +/* Parsing states for get_control_value() */ +#define CONTROL_WAIT_NAME 1 +#define CONTROL_INNAME 2 +#define CONTROL_WAIT_COLON 3 +#define CONTROL_WAIT_VALUE 4 +#define CONTROL_INVALUE 5 +#define CONTROL_WAIT_NEXT_NAME 6 + +/* + * Get value from json-like line "str" of backup_content.control file. + * + * The line has the following format: + * {"name1":"value1", "name2":"value2"} + * + * The value will be returned to "value_str" as string if it is not NULL. If it + * is NULL the value will be returned to "value_int64" as int64. + * + * Returns true if the value was found in the line. + */ +static bool +get_control_value(const char *str, const char *name, + char *value_str, int64 *value_int64, bool is_mandatory) +{ + int state = CONTROL_WAIT_NAME; + char *name_ptr = (char *) name; + char *buf = (char *) str; + char buf_int64[32], /* Buffer for "value_int64" */ + *buf_int64_ptr = buf_int64; + + /* Set default values */ + if (value_str) + *value_str = '\0'; + else if (value_int64) + *value_int64 = 0; + + while (*buf) + { + switch (state) + { + case CONTROL_WAIT_NAME: + if (*buf == '"') + state = CONTROL_INNAME; + else if (IsAlpha(*buf)) + goto bad_format; + break; + case CONTROL_INNAME: + /* Found target field. Parse value. */ + if (*buf == '"') + state = CONTROL_WAIT_COLON; + /* Check next field */ + else if (*buf != *name_ptr) + { + name_ptr = (char *) name; + state = CONTROL_WAIT_NEXT_NAME; + } + else + name_ptr++; + break; + case CONTROL_WAIT_COLON: + if (*buf == ':') + state = CONTROL_WAIT_VALUE; + else if (!IsSpace(*buf)) + goto bad_format; + break; + case CONTROL_WAIT_VALUE: + if (*buf == '"') + { + state = CONTROL_INVALUE; + buf_int64_ptr = buf_int64; + } + else if (IsAlpha(*buf)) + goto bad_format; + break; + case CONTROL_INVALUE: + /* Value was parsed, exit */ + if (*buf == '"') + { + if (value_str) + { + *value_str = '\0'; + } + else if (value_int64) + { + /* Length of buf_uint64 should not be greater than 31 */ + if (buf_int64_ptr - buf_int64 >= 32) + elog(ERROR, "field \"%s\" is out of range in the line %s of the file %s", + name, str, DATABASE_FILE_LIST); + + *buf_int64_ptr = '\0'; + if (!parse_int64(buf_int64, value_int64, 0)) + { + /* We assume that too big value is -1 */ + if (errno == ERANGE) + *value_int64 = BYTES_INVALID; + else + goto bad_format; + } + } + + return true; + } + else + { + if (value_str) + { + *value_str = *buf; + value_str++; + } + else + { + *buf_int64_ptr = *buf; + buf_int64_ptr++; + } + } + break; + case CONTROL_WAIT_NEXT_NAME: + if (*buf == ',') + state = CONTROL_WAIT_NAME; + break; + default: + /* Should not happen */ + break; + } + + buf++; + } + + /* There is no close quotes */ + if (state == CONTROL_INNAME || state == CONTROL_INVALUE) + goto bad_format; + + /* Did not find target field */ + if (is_mandatory) + elog(ERROR, "field \"%s\" is not found in the line %s of the file %s", + name, str, DATABASE_FILE_LIST); + return false; + +bad_format: + elog(ERROR, "%s file has invalid format in line %s", + DATABASE_FILE_LIST, str); + return false; /* Make compiler happy */ +} + +/* + * Construct parray of pgFile from the backup content list. + * If root is not NULL, path will be absolute path. + */ +parray * +dir_read_file_list(const char *root, const char *external_prefix, + const char *file_txt, fio_location location, pg_crc32 expected_crc) +{ + FILE *fp; + parray *files; + char buf[BLCKSZ]; + char stdio_buf[STDIO_BUFSIZE]; + pg_crc32 content_crc = 0; + + fp = fio_open_stream(file_txt, location); + if (fp == NULL) + elog(ERROR, "cannot open \"%s\": %s", file_txt, strerror(errno)); + + /* enable stdio buffering for local file */ + if (!fio_is_remote(location)) + setvbuf(fp, stdio_buf, _IOFBF, STDIO_BUFSIZE); + + files = parray_new(); + + INIT_FILE_CRC32(true, content_crc); + + while (fgets(buf, lengthof(buf), fp)) + { + char path[MAXPGPATH]; + char linked[MAXPGPATH]; + char compress_alg_string[MAXPGPATH]; + int64 write_size, + mode, /* bit length of mode_t depends on platforms */ + is_datafile, + is_cfs, + external_dir_num, + crc, + segno, + n_blocks, + n_headers, + dbOid, /* used for partial restore */ + hdr_crc, + hdr_off, + hdr_size; + pgFile *file; + + COMP_FILE_CRC32(true, content_crc, buf, strlen(buf)); + + get_control_value(buf, "path", path, NULL, true); + get_control_value(buf, "size", NULL, &write_size, true); + get_control_value(buf, "mode", NULL, &mode, true); + get_control_value(buf, "is_datafile", NULL, &is_datafile, true); + get_control_value(buf, "is_cfs", NULL, &is_cfs, false); + get_control_value(buf, "crc", NULL, &crc, true); + get_control_value(buf, "compress_alg", compress_alg_string, NULL, false); + get_control_value(buf, "external_dir_num", NULL, &external_dir_num, false); + get_control_value(buf, "dbOid", NULL, &dbOid, false); + + file = pgFileInit(path); + file->write_size = (int64) write_size; + file->mode = (mode_t) mode; + file->is_datafile = is_datafile ? true : false; + file->is_cfs = is_cfs ? true : false; + file->crc = (pg_crc32) crc; + file->compress_alg = parse_compress_alg(compress_alg_string); + file->external_dir_num = external_dir_num; + file->dbOid = dbOid ? dbOid : 0; + + /* + * Optional fields + */ + + if (get_control_value(buf, "linked", linked, NULL, false) && linked[0]) + { + file->linked = pgut_strdup(linked); + canonicalize_path(file->linked); + } + + if (get_control_value(buf, "segno", NULL, &segno, false)) + file->segno = (int) segno; + + if (get_control_value(buf, "n_blocks", NULL, &n_blocks, false)) + file->n_blocks = (int) n_blocks; + + if (get_control_value(buf, "n_headers", NULL, &n_headers, false)) + file->n_headers = (int) n_headers; + + if (get_control_value(buf, "hdr_crc", NULL, &hdr_crc, false)) + file->hdr_crc = (pg_crc32) hdr_crc; + + if (get_control_value(buf, "hdr_off", NULL, &hdr_off, false)) + file->hdr_off = hdr_off; + + if (get_control_value(buf, "hdr_size", NULL, &hdr_size, false)) + file->hdr_size = (int) hdr_size; + + parray_append(files, file); + } + + FIN_FILE_CRC32(true, content_crc); + + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", file_txt); + + fio_close_stream(fp); + + if (expected_crc != 0 && + expected_crc != content_crc) + { + elog(WARNING, "Invalid CRC of backup control file '%s': %u. Expected: %u", + file_txt, content_crc, expected_crc); + return NULL; + } + + return files; +} + +/* + * Check if directory empty. + */ +bool +dir_is_empty(const char *path, fio_location location) +{ + DIR *dir; + struct dirent *dir_ent; + + dir = fio_opendir(path, location); + if (dir == NULL) + { + /* Directory in path doesn't exist */ + if (errno == ENOENT) + return true; + elog(ERROR, "cannot open directory \"%s\": %s", path, strerror(errno)); + } + + errno = 0; + while ((dir_ent = fio_readdir(dir))) + { + /* Skip entries point current dir or parent dir */ + if (strcmp(dir_ent->d_name, ".") == 0 || + strcmp(dir_ent->d_name, "..") == 0) + continue; + + /* Directory is not empty */ + fio_closedir(dir); + return false; + } + if (errno) + elog(ERROR, "cannot read directory \"%s\": %s", path, strerror(errno)); + + fio_closedir(dir); + + return true; +} + +/* + * Return true if the path is a existing regular file. + */ +bool +fileExists(const char *path, fio_location location) +{ + struct stat buf; + + if (fio_stat(path, &buf, true, location) == -1 && errno == ENOENT) + return false; + else if (!S_ISREG(buf.st_mode)) + return false; + else + return true; +} + +size_t +pgFileSize(const char *path) +{ + struct stat buf; + + if (stat(path, &buf) == -1) + elog(ERROR, "Cannot stat file \"%s\": %s", path, strerror(errno)); + + return buf.st_size; +} + +/* + * Construct parray containing remapped external directories paths + * from string like /path1:/path2 + */ +parray * +make_external_directory_list(const char *colon_separated_dirs, bool remap) +{ + char *p; + parray *list = parray_new(); + char *tmp = pg_strdup(colon_separated_dirs); + +#ifndef WIN32 +#define EXTERNAL_DIRECTORY_DELIMITER ":" +#else +#define EXTERNAL_DIRECTORY_DELIMITER ";" +#endif + + p = strtok(tmp, EXTERNAL_DIRECTORY_DELIMITER); + while(p!=NULL) + { + char *external_path = pg_strdup(p); + + canonicalize_path(external_path); + if (is_absolute_path(external_path)) + { + if (remap) + { + char *full_path = get_external_remap(external_path); + + if (full_path != external_path) + { + full_path = pg_strdup(full_path); + pfree(external_path); + external_path = full_path; + } + } + parray_append(list, external_path); + } + else + elog(ERROR, "External directory \"%s\" is not an absolute path", + external_path); + + p = strtok(NULL, EXTERNAL_DIRECTORY_DELIMITER); + } + pfree(tmp); + parray_qsort(list, pgCompareString); + return list; +} + +/* Free memory of parray containing strings */ +void +free_dir_list(parray *list) +{ + parray_walk(list, pfree); + parray_free(list); +} + +/* Append to string "path_prefix" int "dir_num" */ +void +makeExternalDirPathByNum(char *ret_path, const char *path_prefix, const int dir_num) +{ + sprintf(ret_path, "%s%d", path_prefix, dir_num); +} + +/* Check if "dir" presents in "dirs_list" */ +bool +backup_contains_external(const char *dir, parray *dirs_list) +{ + void *search_result; + + if (!dirs_list) /* There is no external dirs in backup */ + return false; + search_result = parray_bsearch(dirs_list, dir, pgCompareString); + return search_result != NULL; +} + +/* + * Print database_map + */ +void +print_database_map(FILE *out, parray *database_map) +{ + int i; + + for (i = 0; i < parray_num(database_map); i++) + { + db_map_entry *db_entry = (db_map_entry *) parray_get(database_map, i); + + fio_fprintf(out, "{\"dbOid\":\"%u\", \"datname\":\"%s\"}\n", + db_entry->dbOid, db_entry->datname); + } + +} + +/* + * Create file 'database_map' and add its meta to backup_files_list + * NULL check for database_map must be done by the caller. + */ +void +write_database_map(pgBackup *backup, parray *database_map, parray *backup_files_list) +{ + FILE *fp; + pgFile *file; + char database_dir[MAXPGPATH]; + char database_map_path[MAXPGPATH]; + + join_path_components(database_dir, backup->root_dir, DATABASE_DIR); + join_path_components(database_map_path, database_dir, DATABASE_MAP); + + fp = fio_fopen(database_map_path, PG_BINARY_W, FIO_BACKUP_HOST); + if (fp == NULL) + elog(ERROR, "Cannot open database map \"%s\": %s", database_map_path, + strerror(errno)); + + print_database_map(fp, database_map); + if (fio_fflush(fp) || fio_fclose(fp)) + { + fio_unlink(database_map_path, FIO_BACKUP_HOST); + elog(ERROR, "Cannot write database map \"%s\": %s", + database_map_path, strerror(errno)); + } + + /* Add metadata to backup_content.control */ + file = pgFileNew(database_map_path, DATABASE_MAP, true, 0, + FIO_BACKUP_HOST); + file->crc = pgFileGetCRC(database_map_path, true, false); + file->write_size = file->size; + file->uncompressed_size = file->read_size; + + parray_append(backup_files_list, file); +} + +/* + * read database map, return NULL if database_map in empty or missing + */ +parray * +read_database_map(pgBackup *backup) +{ + FILE *fp; + parray *database_map; + char buf[MAXPGPATH]; + char path[MAXPGPATH]; + char database_map_path[MAXPGPATH]; + +// pgBackupGetPath(backup, path, lengthof(path), DATABASE_DIR); + join_path_components(path, backup->root_dir, DATABASE_DIR); + join_path_components(database_map_path, path, DATABASE_MAP); + + fp = fio_open_stream(database_map_path, FIO_BACKUP_HOST); + if (fp == NULL) + { + /* It is NOT ok for database_map to be missing at this point, so + * we should error here. + * It`s a job of the caller to error if database_map is not empty. + */ + elog(ERROR, "Cannot open \"%s\": %s", database_map_path, strerror(errno)); + } + + database_map = parray_new(); + + while (fgets(buf, lengthof(buf), fp)) + { + char datname[MAXPGPATH]; + int64 dbOid; + + db_map_entry *db_entry = (db_map_entry *) pgut_malloc(sizeof(db_map_entry)); + + get_control_value(buf, "dbOid", NULL, &dbOid, true); + get_control_value(buf, "datname", datname, NULL, true); + + db_entry->dbOid = dbOid; + db_entry->datname = pgut_strdup(datname); + + parray_append(database_map, db_entry); + } + + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", database_map_path); + + fio_close_stream(fp); + + /* Return NULL if file is empty */ + if (parray_num(database_map) == 0) + { + parray_free(database_map); + return NULL; + } + + return database_map; +} diff --git a/src/bin/pg_probackup/fetch.cpp b/src/bin/pg_probackup/fetch.cpp new file mode 100644 index 000000000..ffc8da72f --- /dev/null +++ b/src/bin/pg_probackup/fetch.cpp @@ -0,0 +1,110 @@ +/*------------------------------------------------------------------------- + * + * fetch.c + * Functions for fetching files from PostgreSQL data directory + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include +#include "common/fe_memutils.h" + +/* + * Read a file into memory. The file to be read is /. + * The file contents are returned in a malloc'd buffer, and *filesize + * is set to the length of the file. + * + * The returned buffer is always zero-terminated; the size of the returned + * buffer is actually *filesize + 1. That's handy when reading a text file. + * This function can be used to read binary files as well, you can just + * ignore the zero-terminator in that case. + * + */ +char * +slurpFile(const char *datadir, const char *path, size_t *filesize, bool safe, fio_location location) +{ + int fd; + char *buffer; + struct stat statbuf; + char fullpath[MAXPGPATH]; + int len; + + join_path_components(fullpath, datadir, path); + + if ((fd = fio_open(fullpath, O_RDONLY | PG_BINARY, location)) == -1) + { + if (safe) + return NULL; + else + elog(ERROR, "Could not open file \"%s\" for reading: %s", + fullpath, strerror(errno)); + } + + if (fio_stat(fullpath, &statbuf, true, location) < 0) + { + if (safe) + return NULL; + else + elog(ERROR, "Could not stat file \"%s\": %s", + fullpath, strerror(errno)); + } + + len = statbuf.st_size; + buffer = (char *)pg_malloc(len + 1); + + if (fio_read(fd, buffer, len) != len) + { + if (safe) + return NULL; + else + elog(ERROR, "Could not read file \"%s\": %s\n", + fullpath, strerror(errno)); + } + + fio_close(fd); + + /* Zero-terminate the buffer. */ + buffer[len] = '\0'; + + if (filesize) + *filesize = len; + return buffer; +} + +/* + * Receive a single file as a malloc'd buffer. + */ +char * +fetchFile(PGconn *conn, const char *filename, size_t *filesize) +{ + PGresult *res; + char *result; + const char *params[1]; + int len; + + params[0] = filename; + res = pgut_execute_extended(conn, "SELECT pg_catalog.pg_read_binary_file($1)", + 1, params, false, false); + + /* sanity check the result set */ + if (PQntuples(res) != 1 || PQgetisnull(res, 0, 0)) + elog(ERROR, "unexpected result set while fetching remote file \"%s\"", + filename); + + /* Read result to local variables */ + len = PQgetlength(res, 0, 0); + result = (char *)pg_malloc(len + 1); + memcpy(result, PQgetvalue(res, 0, 0), len); + result[len] = '\0'; + + PQclear(res); + *filesize = len; + + return result; +} diff --git a/src/bin/pg_probackup/file.cpp b/src/bin/pg_probackup/file.cpp new file mode 100644 index 000000000..a221b3a4e --- /dev/null +++ b/src/bin/pg_probackup/file.cpp @@ -0,0 +1,2743 @@ +/*------------------------------------------------------------------------- + * + * file.c + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" +#include +#include +#include + +#include "file.h" +#include "storage/checksum.h" +#include "common/fe_memutils.h" + +#define PRINTF_BUF_SIZE 1024 +#define FILE_PERMISSIONS 0600 + +static __thread unsigned long fio_fdset = 0; +static __thread void* fio_stdin_buffer; +static __thread int fio_stdout = 0; +static __thread int fio_stdin = 0; +static __thread int fio_stderr = 0; + +fio_location MyLocation; + +typedef struct +{ + BlockNumber nblocks; + BlockNumber segmentno; + XLogRecPtr horizonLsn; + uint32 checksumVersion; + int calg; + int clevel; + int bitmapsize; + int path_len; +} fio_send_request; + + +typedef struct +{ + char path[MAXPGPATH]; + bool exclude; + bool follow_symlink; + bool add_root; + bool backup_logs; + bool exclusive_backup; + bool skip_hidden; + int external_dir_num; +} fio_list_dir_request; + +typedef struct +{ + mode_t mode; + size_t size; + time_t mtime; + bool is_datafile; + bool is_database; + Oid tblspcOid; + Oid dbOid; + Oid relOid; + ForkName forkName; + int segno; + int external_dir_num; + int linked_len; +} fio_pgFile; + +typedef struct +{ + BlockNumber n_blocks; + BlockNumber segmentno; + XLogRecPtr stop_lsn; + uint32 checksumVersion; +} fio_checksum_map_request; + +typedef struct +{ + BlockNumber n_blocks; + BlockNumber segmentno; + XLogRecPtr shift_lsn; + uint32 checksumVersion; +} fio_lsn_map_request; + + +/* Convert FIO pseudo handle to index in file descriptor array */ +#define fio_fileno(f) (((size_t)f - 1) | FIO_PIPE_MARKER) + +#if defined(WIN32) +#undef open(a, b, c) +#undef fopen(a, b) +#endif + +/* Use specified file descriptors as stdin/stdout for FIO functions */ +void fio_redirect(int in, int out, int err) +{ + fio_stdin = in; + fio_stdout = out; + fio_stderr = err; +} + +void fio_error(int rc, int size, char const* file, int line) +{ + if (remote_agent) + { + fprintf(stderr, "%s:%d: processed %d bytes instead of %d: %s\n", file, line, rc, size, rc >= 0 ? "end of data" : strerror(errno)); + exit(EXIT_FAILURE); + } + else + { + char buf[PRINTF_BUF_SIZE+1]; +// Assert(false); + int err_size = read(fio_stderr, buf, PRINTF_BUF_SIZE); + if (err_size > 0) + { + buf[err_size] = '\0'; + elog(ERROR, "Agent error: %s", buf); + } + else + elog(ERROR, "Communication error: %s", rc >= 0 ? "end of data" : strerror(errno)); + } +} + +/* Check if file descriptor is local or remote (created by FIO) */ +static bool fio_is_remote_fd(int fd) +{ + return (fd & FIO_PIPE_MARKER) != 0; +} + +#ifdef WIN32 + +#undef stat + +/* + * The stat() function in win32 is not guaranteed to update the st_size + * field when run. So we define our own version that uses the Win32 API + * to update this field. + */ +static int +fio_safestat(const char *path, struct stat *buf) +{ + int r; + WIN32_FILE_ATTRIBUTE_DATA attr; + + r = stat(path, buf); + if (r < 0) + return r; + + if (!GetFileAttributesEx(path, GetFileExInfoStandard, &attr)) + { + errno = ENOENT; + return -1; + } + + /* + * XXX no support for large files here, but we don't do that in general on + * Win32 yet. + */ + buf->st_size = attr.nFileSizeLow; + + return 0; +} + +#define stat(x, y) fio_safestat(x, y) + +/* TODO: use real pread on Linux */ +static ssize_t pread(int fd, void* buf, size_t size, off_t off) +{ + off_t rc = lseek(fd, off, SEEK_SET); + if (rc != off) + return -1; + return read(fd, buf, size); +} +static int remove_file_or_dir(char const* path) +{ + int rc = remove(path); +#ifdef WIN32 + if (rc < 0 && errno == EACCESS) + rc = rmdir(path); +#endif + return rc; +} +#else +#define remove_file_or_dir(path) remove(path) +#endif + +/* Check if specified location is local for current node */ +bool fio_is_remote(fio_location location) +{ + bool is_remote = MyLocation != FIO_LOCAL_HOST + && location != FIO_LOCAL_HOST + && location != MyLocation; + if (is_remote && !fio_stdin && !launch_agent()) + elog(ERROR, "Failed to establish SSH connection: %s", strerror(errno)); + return is_remote; +} + +/* Check if specified location is local for current node */ +bool fio_is_remote_simple(fio_location location) +{ + bool is_remote = MyLocation != FIO_LOCAL_HOST + && location != FIO_LOCAL_HOST + && location != MyLocation; + return is_remote; +} + +/* Try to read specified amount of bytes unless error or EOF are encountered */ +static ssize_t fio_read_all(int fd, void* buf, size_t size) +{ + size_t offs = 0; + while (offs < size) + { + ssize_t rc = read(fd, (char*)buf + offs, size - offs); + if (rc < 0) + { + if (errno == EINTR) + continue; + elog(ERROR, "fio_read_all error, fd %i: %s", fd, strerror(errno)); + return rc; + } + else if (rc == 0) + break; + + offs += rc; + } + return offs; +} + +/* Try to write specified amount of bytes unless error is encountered */ +static ssize_t fio_write_all(int fd, void const* buf, size_t size) +{ + size_t offs = 0; + while (offs < size) + { + ssize_t rc = write(fd, (char*)buf + offs, size - offs); + if (rc <= 0) + { + if (errno == EINTR) + continue; + + elog(ERROR, "fio_write_all error, fd %i: %s", fd, strerror(errno)); + + return rc; + } + offs += rc; + } + return offs; +} + +/* Get version of remote agent */ +int fio_get_agent_version(void) +{ + fio_header hdr; + hdr.cop = FIO_AGENT_VERSION; + hdr.size = 0; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + return hdr.arg; +} + +/* Open input stream. Remote file is fetched to the in-memory buffer and then accessed through Linux fmemopen */ +FILE* fio_open_stream(char const* path, fio_location location) +{ + FILE* f; + if (fio_is_remote(location)) + { + fio_header hdr; + hdr.cop = FIO_LOAD; + hdr.size = strlen(path) + 1; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, hdr.size), hdr.size); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + if (hdr.size > 0) + { + Assert(fio_stdin_buffer == NULL); + fio_stdin_buffer = pgut_malloc(hdr.size); + IO_CHECK(fio_read_all(fio_stdin, fio_stdin_buffer, hdr.size), hdr.size); +#ifdef WIN32 + f = tmpfile(); + IO_CHECK(fwrite(f, 1, hdr.size, fio_stdin_buffer), hdr.size); + SYS_CHECK(fseek(f, 0, SEEK_SET)); +#else + f = fmemopen(fio_stdin_buffer, hdr.size, "r"); +#endif + } + else + { + f = NULL; + } + } + else + { + f = fopen(path, "rt"); + } + return f; +} + +/* Close input stream */ +int fio_close_stream(FILE* f) +{ + if (fio_stdin_buffer) + { + free(fio_stdin_buffer); + fio_stdin_buffer = NULL; + } + return fclose(f); +} + +/* Open directory */ +DIR* fio_opendir(char const* path, fio_location location) +{ + DIR* dir; + if (fio_is_remote(location)) + { + int i; + fio_header hdr; + unsigned long mask; + + mask = fio_fdset; + for (i = 0; (mask & 1) != 0; i++, mask >>= 1); + if (i == FIO_FDMAX) { + elog(ERROR, "Descriptor pool for remote files is exhausted, " + "probably too many remote directories are opened"); + } + hdr.cop = FIO_OPENDIR; + hdr.handle = i; + hdr.size = strlen(path) + 1; + fio_fdset |= 1 << i; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, hdr.size), hdr.size); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + fio_fdset &= ~(1 << hdr.handle); + return NULL; + } + dir = (DIR*)(size_t)(i + 1); + } + else + { + dir = opendir(path); + } + return dir; +} + +/* Get next directory entry */ +struct dirent* fio_readdir(DIR *dir) +{ + if (fio_is_remote_file((FILE*)dir)) + { + fio_header hdr; + static __thread struct dirent entry; + + hdr.cop = FIO_READDIR; + hdr.handle = (size_t)dir - 1; + hdr.size = 0; + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + if (hdr.size) { + Assert(hdr.size == sizeof(entry)); + IO_CHECK(fio_read_all(fio_stdin, &entry, sizeof(entry)), sizeof(entry)); + } + + return hdr.size ? &entry : NULL; + } + else + { + return readdir(dir); + } +} + +/* Close directory */ +int fio_closedir(DIR *dir) +{ + if (fio_is_remote_file((FILE*)dir)) + { + fio_header hdr; + hdr.cop = FIO_CLOSEDIR; + hdr.handle = (size_t)dir - 1; + hdr.size = 0; + fio_fdset &= ~(1 << hdr.handle); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + return 0; + } + else + { + return closedir(dir); + } +} + +/* Open file */ +int fio_open(char const* path, int mode, fio_location location) +{ + int fd; + if (fio_is_remote(location)) + { + int i; + fio_header hdr; + unsigned long mask; + + mask = fio_fdset; + for (i = 0; (mask & 1) != 0; i++, mask >>= 1); + if (i == FIO_FDMAX) + elog(ERROR, "Descriptor pool for remote files is exhausted, " + "probably too many remote files are opened"); + + hdr.cop = FIO_OPEN; + hdr.handle = i; + hdr.size = strlen(path) + 1; + hdr.arg = mode; +// hdr.arg = mode & ~O_EXCL; +// elog(INFO, "PATH: %s MODE: %i, %i", path, mode, O_EXCL); +// elog(INFO, "MODE: %i", hdr.arg); + fio_fdset |= 1 << i; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, hdr.size), hdr.size); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + fio_fdset &= ~(1 << hdr.handle); + return -1; + } + fd = i | FIO_PIPE_MARKER; + } + else + { + fd = open(path, mode, FILE_PERMISSIONS); + } + return fd; +} + + +/* Close ssh session */ +void +fio_disconnect(void) +{ + if (fio_stdin) + { + fio_header hdr; + hdr.cop = FIO_DISCONNECT; + hdr.size = 0; + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_DISCONNECTED); + SYS_CHECK(close(fio_stdin)); + SYS_CHECK(close(fio_stdout)); + fio_stdin = 0; + fio_stdout = 0; + wait_ssh(); + } +} + +/* Open stdio file */ +FILE* fio_fopen(char const* path, char const* mode, fio_location location) +{ + FILE *f = NULL; + + if (fio_is_remote(location)) + { + int flags = 0; + int fd; + if (strcmp(mode, PG_BINARY_W) == 0) { + flags = O_TRUNC|PG_BINARY|O_RDWR|O_CREAT; + } else if (strcmp(mode, "w") == 0) { + flags = O_TRUNC|O_RDWR|O_CREAT; + } else if (strcmp(mode, PG_BINARY_R) == 0) { + flags = O_RDONLY|PG_BINARY; + } else if (strcmp(mode, "r") == 0) { + flags = O_RDONLY; + } else if (strcmp(mode, PG_BINARY_R "+") == 0) { + /* stdio fopen("rb+") actually doesn't create unexisted file, but probackup frequently + * needs to open existed file or create new one if not exists. + * In stdio it can be done using two fopen calls: fopen("r+") and if failed then fopen("w"). + * But to eliminate extra call which especially critical in case of remote connection + * we change r+ semantic to create file if not exists. + */ + flags = O_RDWR|O_CREAT|PG_BINARY; + } else if (strcmp(mode, "r+") == 0) { /* see comment above */ + flags |= O_RDWR|O_CREAT; + } else if (strcmp(mode, "a") == 0) { + flags |= O_CREAT|O_RDWR|O_APPEND; + } else { + Assert(false); + } + fd = fio_open(path, flags, location); + if (fd >= 0) + f = (FILE*)(size_t)((fd + 1) & ~FIO_PIPE_MARKER); + } + else + { + f = fopen(path, mode); + if (f == NULL && strcmp(mode, PG_BINARY_R "+") == 0) + f = fopen(path, PG_BINARY_W); + } + return f; +} + +/* Format output to file stream */ +int fio_fprintf(FILE* f, char const* format, ...) +{ + int rc; + va_list args; + va_start (args, format); + if (fio_is_remote_file(f)) + { + char buf[PRINTF_BUF_SIZE]; +#ifdef HAS_VSNPRINTF + rc = vsnprintf(buf, sizeof(buf), format, args); +#else + rc = vsprintf(buf, format, args); +#endif + if (rc > 0) { + fio_fwrite(f, buf, rc); + } + } + else + { + rc = vfprintf(f, format, args); + } + va_end (args); + return rc; +} + +/* Flush stream data (does nothing for remote file) */ +int fio_fflush(FILE* f) +{ + int rc = 0; + if (!fio_is_remote_file(f)) + rc = fflush(f); + return rc; +} + +/* Sync file to the disk (does nothing for remote file) */ +int fio_flush(int fd) +{ + return fio_is_remote_fd(fd) ? 0 : fsync(fd); +} + +/* Close output stream */ +int fio_fclose(FILE* f) +{ + return fio_is_remote_file(f) + ? fio_close(fio_fileno(f)) + : fclose(f); +} + +/* Close file */ +int fio_close(int fd) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_CLOSE; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + fio_fdset &= ~(1 << hdr.handle); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + /* Note, that file is closed without waiting for confirmation */ + + return 0; + } + else + { + return close(fd); + } +} + +/* Truncate stdio file */ +int fio_ftruncate(FILE* f, off_t size) +{ + return fio_is_remote_file(f) + ? fio_truncate(fio_fileno(f), size) + : ftruncate(fileno(f), size); +} + +/* Truncate file */ +int fio_truncate(int fd, off_t size) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_TRUNCATE; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = size; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + return 0; + } + else + { + return ftruncate(fd, size); + } +} + + +/* + * Read file from specified location. + */ +int fio_pread(FILE* f, void* buf, off_t offs) +{ + if (fio_is_remote_file(f)) + { + int fd = fio_fileno(f); + fio_header hdr; + + hdr.cop = FIO_PREAD; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = offs; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + if (hdr.size != 0) + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + /* TODO: error handling */ + + return hdr.arg; + } + else + { + /* For local file, opened by fopen, we should use stdio functions */ + int rc = fseek(f, offs, SEEK_SET); + + if (rc < 0) + return rc; + + return fread(buf, 1, BLCKSZ, f); + } +} + +/* Set position in stdio file */ +int fio_fseek(FILE* f, off_t offs) +{ + return fio_is_remote_file(f) + ? fio_seek(fio_fileno(f), offs) + : fseek(f, offs, SEEK_SET); +} + +/* Set position in file */ +int fio_seek(int fd, off_t offs) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_SEEK; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = offs; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + return 0; + } + else + { + return lseek(fd, offs, SEEK_SET); + } +} + +/* Write data to stdio file */ +size_t fio_fwrite(FILE* f, void const* buf, size_t size) +{ + return fio_is_remote_file(f) + ? fio_write(fio_fileno(f), buf, size) + : fwrite(buf, 1, size, f); +} + +/* Write data to the file */ +ssize_t fio_write(int fd, void const* buf, size_t size) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_WRITE; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = size; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, buf, size), size); + + return size; + } + else + { + return write(fd, buf, size); + } +} + +int32 +fio_decompress(void* dst, void const* src, size_t size, int compress_alg) +{ + const char *errormsg = NULL; + int32 uncompressed_size = do_decompress(dst, BLCKSZ, + src, + size, + (CompressAlg)compress_alg, &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + { + elog(WARNING, "An error occured during decompressing block: %s", errormsg); + return -1; + } + + if (uncompressed_size != BLCKSZ) + { + elog(ERROR, "Page uncompressed to %d bytes != BLCKSZ", + uncompressed_size); + return -1; + } + return uncompressed_size; +} + +/* Write data to the file */ +ssize_t fio_fwrite_compressed(FILE* f, void const* buf, size_t size, int compress_alg) +{ + if (fio_is_remote_file(f)) + { + fio_header hdr; + + hdr.cop = FIO_WRITE_COMPRESSED; + hdr.handle = fio_fileno(f) & ~FIO_PIPE_MARKER; + hdr.size = size; + hdr.arg = compress_alg; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, buf, size), size); + + return size; + } + else + { + char uncompressed_buf[BLCKSZ]; + int32 uncompressed_size = fio_decompress(uncompressed_buf, buf, size, compress_alg); + + return (uncompressed_size < 0) + ? uncompressed_size + : fwrite(uncompressed_buf, 1, uncompressed_size, f); + } +} + +static ssize_t +fio_write_compressed_impl(int fd, void const* buf, size_t size, int compress_alg) +{ + char uncompressed_buf[BLCKSZ]; + int32 uncompressed_size = fio_decompress(uncompressed_buf, buf, size, compress_alg); + return fio_write_all(fd, uncompressed_buf, uncompressed_size); +} + +/* Read data from stdio file */ +ssize_t fio_fread(FILE* f, void* buf, size_t size) +{ + size_t rc; + if (fio_is_remote_file(f)) + return fio_read(fio_fileno(f), buf, size); + rc = fread(buf, 1, size, f); + return rc == 0 && !feof(f) ? -1 : rc; +} + +/* Read data from file */ +ssize_t fio_read(int fd, void* buf, size_t size) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_READ; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = size; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + return hdr.size; + } + else + { + return read(fd, buf, size); + } +} + +/* Get information about file */ +int fio_stat(char const* path, struct stat* st, bool follow_symlink, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + + hdr.cop = FIO_STAT; + hdr.handle = -1; + hdr.arg = follow_symlink; + hdr.size = path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_STAT); + IO_CHECK(fio_read_all(fio_stdin, st, sizeof(*st)), sizeof(*st)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + return -1; + } + return 0; + } + else + { + return follow_symlink ? stat(path, st) : lstat(path, st); + } +} + +/* Check presence of the file */ +int fio_access(char const* path, int mode, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_ACCESS; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_ACCESS); + + if (hdr.arg != 0) + { + errno = hdr.arg; + return -1; + } + return 0; + } + else + { + return access(path, mode); + } +} + +/* Create symbolic link */ +int fio_symlink(char const* target, char const* link_path, bool overwrite, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t target_len = strlen(target) + 1; + size_t link_path_len = strlen(link_path) + 1; + hdr.cop = FIO_SYMLINK; + hdr.handle = -1; + hdr.size = target_len + link_path_len; + hdr.arg = overwrite ? 1 : 0; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, target, target_len), target_len); + IO_CHECK(fio_write_all(fio_stdout, link_path, link_path_len), link_path_len); + + return 0; + } + else + { + if (overwrite) + remove_file_or_dir(link_path); + + return symlink(target, link_path); + } +} + +static void fio_symlink_impl(int out, char *buf, bool overwrite) +{ + char *linked_path = buf; + char *link_path = buf + strlen(buf) + 1; + + if (overwrite) + remove_file_or_dir(link_path); + + if (symlink(linked_path, link_path)) + elog(ERROR, "Could not create symbolic link \"%s\": %s", + link_path, strerror(errno)); +} + +/* Rename file */ +int fio_rename(char const* old_path, char const* new_path, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t old_path_len = strlen(old_path) + 1; + size_t new_path_len = strlen(new_path) + 1; + hdr.cop = FIO_RENAME; + hdr.handle = -1; + hdr.size = old_path_len + new_path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, old_path, old_path_len), old_path_len); + IO_CHECK(fio_write_all(fio_stdout, new_path, new_path_len), new_path_len); + + //TODO: wait for confirmation. + + return 0; + } + else + { + return rename(old_path, new_path); + } +} + +/* Sync file to disk */ +int fio_sync(char const* path, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_SYNC; + hdr.handle = -1; + hdr.size = path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + return -1; + } + + return 0; + } + else + { + int fd; + + fd = open(path, O_WRONLY | PG_BINARY, FILE_PERMISSIONS); + if (fd < 0) + return -1; + + if (fsync(fd) < 0) + { + close(fd); + return -1; + } + close(fd); + + return 0; + } +} + +/* Get crc32 of file */ +pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(file_path) + 1; + pg_crc32 crc = 0; + hdr.cop = FIO_GET_CRC32; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = 0; + + if (decompress) + hdr.arg = 1; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, file_path, path_len), path_len); + IO_CHECK(fio_read_all(fio_stdin, &crc, sizeof(crc)), sizeof(crc)); + + return crc; + } + else + { +#ifdef HAVE_LIBZ + if (decompress) + return pgFileGetCRCgz(file_path, true, true); + else +#endif + return pgFileGetCRC(file_path, true, true); + } +} + +/* Remove file */ +int fio_unlink(char const* path, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_UNLINK; + hdr.handle = -1; + hdr.size = path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + // TODO: error is swallowed ? + return 0; + } + else + { + return remove_file_or_dir(path); + } +} + +/* Create directory */ +int fio_mkdir(const char* path, int mode, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_MKDIR; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_MKDIR); + + return hdr.arg; + } + else + { + return dir_create_dir(path, mode); + } +} + +/* Change file mode */ +int fio_chmod(char const* path, int mode, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_CHMOD; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + return 0; + } + else + { + return chmod(path, mode); + } +} + +#ifdef HAVE_LIBZ + +#define ZLIB_BUFFER_SIZE (64*1024) +#define MAX_WBITS 15 /* 32K LZ77 window */ +#define DEF_MEM_LEVEL 8 +#define FIO_GZ_REMOTE_MARKER 1 + +typedef struct fioGZFile +{ + z_stream strm; + int fd; + int errnum; + bool compress; + bool eof; + Bytef buf[ZLIB_BUFFER_SIZE]; +} fioGZFile; + +/* On error returns NULL and errno should be checked */ +gzFile +fio_gzopen(char const* path, char const* mode, int level, fio_location location) +{ + int rc; + if (fio_is_remote(location)) + { + fioGZFile* gz = (fioGZFile*) pgut_malloc(sizeof(fioGZFile)); + memset(&gz->strm, 0, sizeof(gz->strm)); + gz->eof = 0; + gz->errnum = Z_OK; + /* check if file opened for writing */ + if (strcmp(mode, PG_BINARY_W) == 0) /* compress */ + { + gz->strm.next_out = gz->buf; + gz->strm.avail_out = ZLIB_BUFFER_SIZE; + rc = deflateInit2(&gz->strm, + level, + Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY); + if (rc == Z_OK) + { + gz->compress = 1; + gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location); + if (gz->fd < 0) + { + free(gz); + return NULL; + } + } + } + else + { + gz->strm.next_in = gz->buf; + gz->strm.avail_in = ZLIB_BUFFER_SIZE; + rc = inflateInit2(&gz->strm, 15 + 16); + gz->strm.avail_in = 0; + if (rc == Z_OK) + { + gz->compress = 0; + gz->fd = fio_open(path, O_RDONLY | PG_BINARY, location); + if (gz->fd < 0) + { + free(gz); + return NULL; + } + } + } + if (rc != Z_OK) + { + elog(ERROR, "zlib internal error when opening file %s: %s", + path, gz->strm.msg); + } + return (gzFile)((size_t)gz + FIO_GZ_REMOTE_MARKER); + } + else + { + gzFile file; + /* check if file opened for writing */ + if (strcmp(mode, PG_BINARY_W) == 0) + { + int fd = open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS); + if (fd < 0) + return NULL; + file = gzdopen(fd, mode); + } + else + file = gzopen(path, mode); + if (file != NULL && level != Z_DEFAULT_COMPRESSION) + { + if (gzsetparams(file, level, Z_DEFAULT_STRATEGY) != Z_OK) + elog(ERROR, "Cannot set compression level %d: %s", + level, strerror(errno)); + } + return file; + } +} + +int +fio_gzread(gzFile f, void *buf, unsigned size) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + int rc; + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + + if (gz->eof) + { + return 0; + } + + gz->strm.next_out = (Bytef *)buf; + gz->strm.avail_out = size; + + while (1) + { + if (gz->strm.avail_in != 0) /* If there is some data in receiver buffer, then decompress it */ + { + rc = inflate(&gz->strm, Z_NO_FLUSH); + if (rc == Z_STREAM_END) + { + gz->eof = 1; + } + else if (rc != Z_OK) + { + gz->errnum = rc; + return -1; + } + if (gz->strm.avail_out != size) + { + return size - gz->strm.avail_out; + } + if (gz->strm.avail_in == 0) + { + gz->strm.next_in = gz->buf; + } + } + else + { + gz->strm.next_in = gz->buf; + } + rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in, + gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in); + if (rc > 0) + { + gz->strm.avail_in += rc; + } + else + { + if (rc == 0) + { + gz->eof = 1; + } + return rc; + } + } + } + else + { + return gzread(f, buf, size); + } +} + +int +fio_gzwrite(gzFile f, void const* buf, unsigned size) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + int rc; + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + + gz->strm.next_in = (Bytef *)buf; + gz->strm.avail_in = size; + + do + { + if (gz->strm.avail_out == ZLIB_BUFFER_SIZE) /* Compress buffer is empty */ + { + gz->strm.next_out = gz->buf; /* Reset pointer to the beginning of buffer */ + + if (gz->strm.avail_in != 0) /* Has something in input buffer */ + { + rc = deflate(&gz->strm, Z_NO_FLUSH); + Assert(rc == Z_OK); + gz->strm.next_out = gz->buf; /* Reset pointer to the beginning of buffer */ + } + else + { + break; + } + } + rc = fio_write(gz->fd, gz->strm.next_out, ZLIB_BUFFER_SIZE - gz->strm.avail_out); + if (rc >= 0) + { + gz->strm.next_out += rc; + gz->strm.avail_out += rc; + } + else + { + return rc; + } + } while (gz->strm.avail_out != ZLIB_BUFFER_SIZE || gz->strm.avail_in != 0); + + return size; + } + else + { + return gzwrite(f, buf, size); + } +} + +int +fio_gzclose(gzFile f) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + int rc; + if (gz->compress) + { + gz->strm.next_out = gz->buf; + rc = deflate(&gz->strm, Z_FINISH); + Assert(rc == Z_STREAM_END && gz->strm.avail_out != ZLIB_BUFFER_SIZE); + deflateEnd(&gz->strm); + rc = fio_write(gz->fd, gz->buf, ZLIB_BUFFER_SIZE - gz->strm.avail_out); + if (rc != ZLIB_BUFFER_SIZE - gz->strm.avail_out) + { + return -1; + } + } + else + { + inflateEnd(&gz->strm); + } + rc = fio_close(gz->fd); + free(gz); + return rc; + } + else + { + return gzclose(f); + } +} + +int fio_gzeof(gzFile f) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + return gz->eof; + } + else + { + return gzeof(f); + } +} + +const char* fio_gzerror(gzFile f, int *errnum) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + if (errnum) + *errnum = gz->errnum; + return gz->strm.msg; + } + else + { + return gzerror(f, errnum); + } +} + +z_off_t fio_gzseek(gzFile f, z_off_t offset, int whence) +{ + Assert(!((size_t)f & FIO_GZ_REMOTE_MARKER)); + return gzseek(f, offset, whence); +} + + +#endif + +/* Send file content + * Note: it should not be used for large files. + */ +static void fio_load_file(int out, char const* path) +{ + int fd = open(path, O_RDONLY); + fio_header hdr; + void* buf = NULL; + + hdr.cop = FIO_SEND; + hdr.size = 0; + + if (fd >= 0) + { + off_t size = lseek(fd, 0, SEEK_END); + buf = pgut_malloc(size); + lseek(fd, 0, SEEK_SET); + IO_CHECK(fio_read_all(fd, buf, size), size); + hdr.size = size; + SYS_CHECK(close(fd)); + } + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (buf) + { + IO_CHECK(fio_write_all(out, buf, hdr.size), hdr.size); + free(buf); + } +} + +/* + * Return number of actually(!) readed blocks, attempts or + * half-readed block are not counted. + * Return values in case of error: + * FILE_MISSING + * OPEN_FAILED + * READ_ERROR + * PAGE_CORRUPTION + * WRITE_FAILED + * + * If none of the above, this function return number of blocks + * readed by remote agent. + * + * In case of DELTA mode horizonLsn must be a valid lsn, + * otherwise it should be set to InvalidXLogRecPtr. + */ +int fio_send_pages(const char *to_fullpath, const char *from_fullpath, pgFile *file, + XLogRecPtr horizonLsn, int calg, int clevel, uint32 checksum_version, + bool use_pagemap, BlockNumber* err_blknum, char **errormsg, + BackupPageHeader2 **headers) +{ + FILE *out = NULL; + char *out_buf = NULL; + struct { + fio_header hdr; + fio_send_request arg; + } req; + BlockNumber n_blocks_read = 0; + BlockNumber blknum = 0; + + /* send message with header + + 8bytes 24bytes var var + -------------------------------------------------------------- + | fio_header | fio_send_request | FILE PATH | BITMAP(if any) | + -------------------------------------------------------------- + */ + + req.hdr.cop = FIO_SEND_PAGES; + + if (use_pagemap) + { + req.hdr.size = sizeof(fio_send_request) + (*file).pagemap.bitmapsize + strlen(from_fullpath) + 1; + req.arg.bitmapsize = (*file).pagemap.bitmapsize; + + /* TODO: add optimization for the case of pagemap + * containing small number of blocks with big serial numbers: + * https://github.com/postgrespro/pg_probackup/blob/remote_page_backup/src/utils/file.c#L1211 + */ + } + else + { + req.hdr.size = sizeof(fio_send_request) + strlen(from_fullpath) + 1; + req.arg.bitmapsize = 0; + } + + req.arg.nblocks = file->size/BLCKSZ; + req.arg.segmentno = file->segno * RELSEG_SIZE; + req.arg.horizonLsn = horizonLsn; + req.arg.checksumVersion = checksum_version; + req.arg.calg = calg; + req.arg.clevel = clevel; + req.arg.path_len = strlen(from_fullpath) + 1; + + file->compress_alg = (CompressAlg)calg; /* TODO: wtf? why here? */ + +//<----- +// datapagemap_iterator_t *iter; +// BlockNumber blkno; +// iter = datapagemap_iterate(pagemap); +// while (datapagemap_next(iter, &blkno)) +// elog(INFO, "block %u", blkno); +// pg_free(iter); +//<----- + + /* send header */ + IO_CHECK(fio_write_all(fio_stdout, &req, sizeof(req)), sizeof(req)); + + /* send file path */ + IO_CHECK(fio_write_all(fio_stdout, from_fullpath, req.arg.path_len), req.arg.path_len); + + /* send pagemap if any */ + if (use_pagemap) + IO_CHECK(fio_write_all(fio_stdout, (*file).pagemap.bitmap, (*file).pagemap.bitmapsize), (*file).pagemap.bitmapsize); + + while (true) + { + fio_header hdr; + char buf[BLCKSZ + sizeof(BackupPageHeader)]; + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (interrupted) + elog(ERROR, "Interrupted during page reading"); + + if (hdr.cop == FIO_ERROR) + { + /* FILE_MISSING, OPEN_FAILED and READ_FAILED */ + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + *errormsg = (char *)pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", buf); + } + + return hdr.arg; + } + else if (hdr.cop == FIO_SEND_FILE_CORRUPTION) + { + *err_blknum = hdr.arg; + + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + *errormsg = (char *)pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", buf); + } + return PAGE_CORRUPTION; + } + else if (hdr.cop == FIO_SEND_FILE_EOF) + { + /* n_blocks_read reported by EOF */ + n_blocks_read = hdr.arg; + + /* receive headers if any */ + if (hdr.size > 0) + { + *headers = (BackupPageHeader2 *)pgut_malloc(hdr.size); + IO_CHECK(fio_read_all(fio_stdin, *headers, hdr.size), hdr.size); + file->n_headers = (hdr.size / sizeof(BackupPageHeader2)) -1; + } + + break; + } + else if (hdr.cop == FIO_PAGE) + { + blknum = hdr.arg; + + Assert(hdr.size <= sizeof(buf)); + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + COMP_FILE_CRC32(true, file->crc, buf, hdr.size); + + /* lazily open backup file */ + if (!out) + out = open_local_file_rw(to_fullpath, &out_buf, STDIO_BUFSIZE); + + if (fio_fwrite(out, buf, hdr.size) != hdr.size) + { + fio_fclose(out); + *err_blknum = blknum; + return WRITE_FAILED; + } + file->write_size += hdr.size; + file->uncompressed_size += BLCKSZ; + } + else + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + + if (out) + fclose(out); + pg_free(out_buf); + + return n_blocks_read; +} + +/* TODO: read file using large buffer + * Return codes: + * FIO_ERROR: + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + + * FIO_SEND_FILE_CORRUPTION + * FIO_SEND_FILE_EOF + */ +static void fio_send_pages_impl(int out, char* buf) +{ + FILE *in = NULL; + BlockNumber blknum = 0; + int current_pos = 0; + BlockNumber n_blocks_read = 0; + PageState page_st; + char read_buffer[BLCKSZ+1]; + char in_buf[STDIO_BUFSIZE]; + fio_header hdr; + fio_send_request *req = (fio_send_request*) buf; + char *from_fullpath = (char*) buf + sizeof(fio_send_request); + bool with_pagemap = req->bitmapsize > 0 ? true : false; + /* error reporting */ + char *errormsg = NULL; + /* parse buffer */ + datapagemap_t *map = NULL; + datapagemap_iterator_t *iter = NULL; + /* page headers */ + int32 hdr_num = -1; + int32 cur_pos_out = 0; + BackupPageHeader2 *headers = NULL; + + /* open source file */ + in = fopen(from_fullpath, PG_BINARY_R); + if (!in) + { + hdr.cop = FIO_ERROR; + + /* do not send exact wording of ENOENT error message + * because it is a very common error in our case, so + * error code is enough. + */ + if (errno == ENOENT) + { + hdr.arg = FILE_MISSING; + hdr.size = 0; + } + else + { + hdr.arg = OPEN_FAILED; + errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot open file \"%s\": %s", + from_fullpath, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + } + + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (errormsg) + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + + if (with_pagemap) + { + map = (datapagemap_t *)pgut_malloc(sizeof(datapagemap_t)); + map->bitmapsize = req->bitmapsize; + map->bitmap = (unsigned char*) buf + sizeof(fio_send_request) + req->path_len; + + /* get first block */ + iter = datapagemap_iterate(map); + datapagemap_next(iter, &blknum); + + setvbuf(in, NULL, _IONBF, BUFSIZ); + } + else + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + /* TODO: what is this barrier for? */ + read_buffer[BLCKSZ] = 1; /* barrier */ + + while (blknum < req->nblocks) + { + int rc = 0; + size_t read_len = 0; + int retry_attempts = PAGE_READ_ATTEMPTS; + + /* TODO: handle signals on the agent */ + if (interrupted) + elog(ERROR, "Interrupted during remote page reading"); + + /* read page, check header and validate checksumms */ + for (;;) + { + /* + * Optimize stdio buffer usage, fseek only when current position + * does not match the position of requested block. + */ + if (current_pos != blknum*BLCKSZ) + { + current_pos = blknum*BLCKSZ; + if (fseek(in, current_pos, SEEK_SET) != 0) + elog(ERROR, "fseek to position %u is failed on remote file '%s': %s", + current_pos, from_fullpath, strerror(errno)); + } + + read_len = fread(read_buffer, 1, BLCKSZ, in); + + current_pos += read_len; + + /* report error */ + if (ferror(in)) + { + hdr.cop = FIO_ERROR; + hdr.arg = READ_FAILED; + + errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot read block %u of '%s': %s", + blknum, from_fullpath, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + goto cleanup; + } + + if (read_len == BLCKSZ) + { + rc = validate_one_page(read_buffer, req->segmentno + blknum, + InvalidXLogRecPtr, &page_st, + req->checksumVersion); + + /* TODO: optimize copy of zeroed page */ + if (rc == PAGE_IS_ZEROED) + break; + else if (rc == PAGE_IS_VALID) + break; + } + + if (feof(in)) + goto eof; +// else /* readed less than BLKSZ bytes, retry */ + + /* File is either has insane header or invalid checksum, + * retry. If retry attempts are exhausted, report corruption. + */ + if (--retry_attempts == 0) + { + hdr.cop = FIO_SEND_FILE_CORRUPTION; + hdr.arg = blknum; + + /* Construct the error message */ + if (rc == PAGE_HEADER_IS_INVALID) + get_header_errormsg(read_buffer, &errormsg); + else if (rc == PAGE_CHECKSUM_MISMATCH) + get_checksum_errormsg(read_buffer, &errormsg, + req->segmentno + blknum); + + /* if error message is not empty, set payload size to its length */ + hdr.size = errormsg ? strlen(errormsg) + 1 : 0; + + /* send header */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + + /* send error message if any */ + if (errormsg) + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + } + + n_blocks_read++; + + /* + * horizonLsn is not 0 only in case of delta backup. + * As far as unsigned number are always greater or equal than zero, + * there is no sense to add more checks. + */ + if ((req->horizonLsn == InvalidXLogRecPtr) || /* full, page, ptrack */ + (page_st.lsn == InvalidXLogRecPtr) || /* zeroed page */ + (req->horizonLsn > 0 && page_st.lsn > req->horizonLsn)) /* delta */ + { + int compressed_size = 0; + char write_buffer[BLCKSZ*2]; + BackupPageHeader* bph = (BackupPageHeader*)write_buffer; + + /* compress page */ + hdr.cop = FIO_PAGE; + hdr.arg = blknum; + + compressed_size = do_compress(write_buffer + sizeof(BackupPageHeader), + sizeof(write_buffer) - sizeof(BackupPageHeader), + read_buffer, BLCKSZ, (CompressAlg)req->calg, req->clevel, + NULL); + + if (compressed_size <= 0 || compressed_size >= BLCKSZ) + { + /* Do not compress page */ + memcpy(write_buffer + sizeof(BackupPageHeader), read_buffer, BLCKSZ); + compressed_size = BLCKSZ; + } + bph->block = blknum; + bph->compressed_size = compressed_size; + + hdr.size = compressed_size + sizeof(BackupPageHeader); + + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, write_buffer, hdr.size), hdr.size); + + /* set page header for this file */ + hdr_num++; + if (!headers) + headers = (BackupPageHeader2 *) pgut_malloc(sizeof(BackupPageHeader2)); + else + headers = (BackupPageHeader2 *) pgut_realloc(headers, (hdr_num+1) * sizeof(BackupPageHeader2)); + + headers[hdr_num].block = blknum; + headers[hdr_num].lsn = page_st.lsn; + headers[hdr_num].checksum = page_st.checksum; + headers[hdr_num].pos = cur_pos_out; + + cur_pos_out += hdr.size; + } + + /* next block */ + if (with_pagemap) + { + /* exit if pagemap is exhausted */ + if (!datapagemap_next(iter, &blknum)) + break; + } + else + blknum++; + } + +eof: + /* We are done, send eof */ + hdr.cop = FIO_SEND_FILE_EOF; + hdr.arg = n_blocks_read; + hdr.size = 0; + + if (headers) + { + hdr.size = (hdr_num+2) * sizeof(BackupPageHeader2); + + /* add dummy header */ + headers = (BackupPageHeader2 *) pgut_realloc(headers, (hdr_num+2) * sizeof(BackupPageHeader2)); + headers[hdr_num+1].pos = cur_pos_out; + } + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (headers) + IO_CHECK(fio_write_all(out, headers, hdr.size), hdr.size); + +cleanup: + pg_free(map); + pg_free(iter); + pg_free(errormsg); + pg_free(headers); + if (in) + fclose(in); + return; +} +#ifdef HAVE_LIBZ +/* Receive chunks of compressed data, decompress them and write to + * destination file. + * Return codes: + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + * WRITE_FAILED (-4) + * ZLIB_ERROR (-5) + * REMOTE_ERROR (-6) + */ +int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, char **errormsg) +{ + fio_header hdr; + int exit_code = SEND_OK; + char *in_buf = (char *)pgut_malloc(CHUNK_SIZE); /* buffer for compressed data */ + char *out_buf = (char *)pgut_malloc(OUT_BUF_SIZE); /* 1MB buffer for decompressed data */ + size_t path_len = strlen(from_fullpath) + 1; + /* decompressor */ + z_stream *strm = NULL; + + hdr.cop = FIO_SEND_FILE; + hdr.size = path_len; + +// elog(VERBOSE, "Thread [%d]: Attempting to open remote compressed WAL file '%s'", +// thread_num, from_fullpath); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len); + + for (;;) + { + fio_header hdr; + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.cop == FIO_SEND_FILE_EOF) + { + break; + } + else if (hdr.cop == FIO_ERROR) + { + /* handle error, reported by the agent */ + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size); + *errormsg = (char *)pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", in_buf); + } + exit_code = hdr.arg; + goto cleanup; + } + else if (hdr.cop == FIO_PAGE) + { + int rc; + Assert(hdr.size <= CHUNK_SIZE); + IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size); + + /* We have received a chunk of compressed data, lets decompress it */ + if (strm == NULL) + { + /* Initialize decompressor */ + strm = (z_stream *)pgut_malloc(sizeof(z_stream)); + memset(strm, 0, sizeof(z_stream)); + + /* The fields next_in, avail_in initialized before init */ + strm->next_in = (Bytef *)in_buf; + strm->avail_in = hdr.size; + + rc = inflateInit2(strm, 15 + 16); + + if (rc != Z_OK) + { + *errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + snprintf(*errormsg, ERRMSG_MAX_LEN, + "Failed to initialize decompression stream for file '%s': %i: %s", + from_fullpath, rc, strm->msg); + exit_code = ZLIB_ERROR; + goto cleanup; + } + } + else + { + strm->next_in = (Bytef *)in_buf; + strm->avail_in = hdr.size; + } + + strm->next_out = (Bytef *)out_buf; /* output buffer */ + strm->avail_out = OUT_BUF_SIZE; /* free space in output buffer */ + + /* + * From zlib documentation: + * The application must update next_in and avail_in when avail_in + * has dropped to zero. It must update next_out and avail_out when + * avail_out has dropped to zero. + */ + while (strm->avail_in != 0) /* while there is data in input buffer, decompress it */ + { + /* decompress until there is no data to decompress, + * or buffer with uncompressed data is full + */ + rc = inflate(strm, Z_NO_FLUSH); + if (rc == Z_STREAM_END) + /* end of stream */ + break; + else if (rc != Z_OK) + { + /* got an error */ + *errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + snprintf(*errormsg, ERRMSG_MAX_LEN, + "Decompression failed for file '%s': %i: %s", + from_fullpath, rc, strm->msg); + exit_code = ZLIB_ERROR; + goto cleanup; + } + + if (strm->avail_out == 0) + { + /* Output buffer is full, write it out */ + if (fwrite(out_buf, 1, OUT_BUF_SIZE, out) != OUT_BUF_SIZE) + { + exit_code = WRITE_FAILED; + goto cleanup; + } + + strm->next_out = (Bytef *)out_buf; /* output buffer */ + strm->avail_out = OUT_BUF_SIZE; + } + } + + /* write out leftovers if any */ + if (strm->avail_out != OUT_BUF_SIZE) + { + int len = OUT_BUF_SIZE - strm->avail_out; + + if (fwrite(out_buf, 1, len, out) != len) + { + exit_code = WRITE_FAILED; + goto cleanup; + } + } + } + else + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + +cleanup: + if (exit_code < OPEN_FAILED) + fio_disconnect(); /* discard possible pending data in pipe */ + + if (strm) + { + inflateEnd(strm); + pg_free(strm); + } + + pg_free(in_buf); + pg_free(out_buf); + return exit_code; +} +#endif + +/* Receive chunks of data and write them to destination file. + * Return codes: + * SEND_OK (0) + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + * WRITE_FAILED (-4) + * + * OPEN_FAILED and READ_FAIL should also set errormsg. + * If pgFile is not NULL then we must calculate crc and read_size for it. + */ +int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, + pgFile *file, char **errormsg) +{ + fio_header hdr; + int exit_code = SEND_OK; + size_t path_len = strlen(from_fullpath) + 1; + char *buf = (char *)pgut_malloc(CHUNK_SIZE); /* buffer */ + + hdr.cop = FIO_SEND_FILE; + hdr.size = path_len; + +// elog(VERBOSE, "Thread [%d]: Attempting to open remote WAL file '%s'", +// thread_num, from_fullpath); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len); + + for (;;) + { + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.cop == FIO_SEND_FILE_EOF) + { + break; + } + else if (hdr.cop == FIO_ERROR) + { + /* handle error, reported by the agent */ + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + *errormsg = (char *)pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", buf); + } + exit_code = hdr.arg; + break; + } + else if (hdr.cop == FIO_PAGE) + { + Assert(hdr.size <= CHUNK_SIZE); + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + /* We have received a chunk of data data, lets write it out */ + if (fwrite(buf, 1, hdr.size, out) != hdr.size) + { + exit_code = WRITE_FAILED; + break; + } + + if (file) + { + file->read_size += hdr.size; + COMP_FILE_CRC32(true, file->crc, buf, hdr.size); + } + } + else + { + /* TODO: fio_disconnect may get assert fail when running after this */ + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + } + + if (exit_code < OPEN_FAILED) + fio_disconnect(); /* discard possible pending data in pipe */ + + pg_free(buf); + return exit_code; +} + +/* Send file content + * On error we return FIO_ERROR message with following codes + * FIO_ERROR: + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + * + * FIO_PAGE + * FIO_SEND_FILE_EOF + * + */ +static void fio_send_file_impl(int out, char const* path) +{ + FILE *fp; + fio_header hdr; + char *buf = (char *)pgut_malloc(CHUNK_SIZE); + size_t read_len = 0; + char *errormsg = NULL; + + /* open source file for read */ + /* TODO: check that file is regular file */ + fp = fopen(path, PG_BINARY_R); + if (!fp) + { + hdr.cop = FIO_ERROR; + + /* do not send exact wording of ENOENT error message + * because it is a very common error in our case, so + * error code is enough. + */ + if (errno == ENOENT) + { + hdr.arg = FILE_MISSING; + hdr.size = 0; + } + else + { + hdr.arg = OPEN_FAILED; + errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot open file '%s': %s", path, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + } + + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (errormsg) + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + + /* disable stdio buffering */ + setvbuf(fp, NULL, _IONBF, BUFSIZ); + + /* copy content */ + for (;;) + { + read_len = fread(buf, 1, CHUNK_SIZE, fp); + + /* report error */ + if (ferror(fp)) + { + hdr.cop = FIO_ERROR; + errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + hdr.arg = READ_FAILED; + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot read from file '%s': %s", path, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + + if (read_len > 0) + { + /* send chunk */ + hdr.cop = FIO_PAGE; + hdr.size = read_len; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, buf, read_len), read_len); + } + + if (feof(fp)) + break; + } + + /* we are done, send eof */ + hdr.cop = FIO_SEND_FILE_EOF; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + +cleanup: + if (fp) + fclose(fp); + pg_free(buf); + pg_free(errormsg); + return; +} + +/* Compile the array of files located on remote machine in directory root */ +void fio_list_dir(parray *files, const char *root, bool exclude, + bool follow_symlink, bool add_root, bool backup_logs, + bool skip_hidden, int external_dir_num) +{ + fio_header hdr; + fio_list_dir_request req; + char *buf = (char *)pgut_malloc(CHUNK_SIZE); + + /* Send to the agent message with parameters for directory listing */ + snprintf(req.path, MAXPGPATH, "%s", root); + req.exclude = exclude; + req.follow_symlink = follow_symlink; + req.add_root = add_root; + req.backup_logs = backup_logs; + req.exclusive_backup = exclusive_backup; + req.skip_hidden = skip_hidden; + req.external_dir_num = external_dir_num; + + hdr.cop = FIO_LIST_DIR; + hdr.size = sizeof(req); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, &req, hdr.size), hdr.size); + + for (;;) + { + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.cop == FIO_SEND_FILE_EOF) + { + /* the work is done */ + break; + } + else if (hdr.cop == FIO_SEND_FILE) + { + pgFile *file = NULL; + fio_pgFile fio_file; + + /* receive rel_path */ + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + file = pgFileInit(buf); + + /* receive metainformation */ + IO_CHECK(fio_read_all(fio_stdin, &fio_file, sizeof(fio_file)), sizeof(fio_file)); + + file->mode = fio_file.mode; + file->size = fio_file.size; + file->mtime = fio_file.mtime; + file->is_datafile = fio_file.is_datafile; + file->is_database = fio_file.is_database; + file->tblspcOid = fio_file.tblspcOid; + file->dbOid = fio_file.dbOid; + file->relOid = fio_file.relOid; + file->forkName = fio_file.forkName; + file->segno = fio_file.segno; + file->external_dir_num = fio_file.external_dir_num; + + if (fio_file.linked_len > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, fio_file.linked_len), fio_file.linked_len); + + file->linked = (char *)pgut_malloc(fio_file.linked_len); + snprintf(file->linked, fio_file.linked_len, "%s", buf); + } + +// elog(INFO, "Received file: %s, mode: %u, size: %lu, mtime: %lu", +// file->rel_path, file->mode, file->size, file->mtime); + + parray_append(files, file); + } + else + { + /* TODO: fio_disconnect may get assert fail when running after this */ + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + } + + pg_free(buf); +} + + +/* + * To get the arrays of files we use the same function dir_list_file(), + * that is used for local backup. + * After that we iterate over arrays and for every file send at least + * two messages to main process: + * 1. rel_path + * 2. metainformation (size, mtime, etc) + * 3. link path (optional) + * + * TODO: replace FIO_SEND_FILE and FIO_SEND_FILE_EOF with dedicated messages + */ +static void fio_list_dir_impl(int out, char* buf) +{ + int i; + fio_header hdr; + fio_list_dir_request *req = (fio_list_dir_request*) buf; + parray *file_files = parray_new(); + + /* + * Disable logging into console any messages with exception of ERROR messages, + * because currently we have no mechanism to notify the main process + * about then message been sent. + * TODO: correctly send elog messages from agent to main process. + */ + instance_config.logger.log_level_console = ERROR; + exclusive_backup = req->exclusive_backup; + + dir_list_file(file_files, req->path, req->exclude, req->follow_symlink, + req->add_root, req->backup_logs, req->skip_hidden, + req->external_dir_num, FIO_LOCAL_HOST); + + /* send information about files to the main process */ + for (i = 0; i < parray_num(file_files); i++) + { + fio_pgFile fio_file; + pgFile *file = (pgFile *) parray_get(file_files, i); + + fio_file.mode = file->mode; + fio_file.size = file->size; + fio_file.mtime = file->mtime; + fio_file.is_datafile = file->is_datafile; + fio_file.is_database = file->is_database; + fio_file.tblspcOid = file->tblspcOid; + fio_file.dbOid = file->dbOid; + fio_file.relOid = file->relOid; + fio_file.forkName = file->forkName; + fio_file.segno = file->segno; + fio_file.external_dir_num = file->external_dir_num; + + if (file->linked) + fio_file.linked_len = strlen(file->linked) + 1; + else + fio_file.linked_len = 0; + + hdr.cop = FIO_SEND_FILE; + hdr.size = strlen(file->rel_path) + 1; + + /* send rel_path first */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, file->rel_path, hdr.size), hdr.size); + + /* now send file metainformation */ + IO_CHECK(fio_write_all(out, &fio_file, sizeof(fio_file)), sizeof(fio_file)); + + /* If file is a symlink, then send link path */ + if (file->linked) + IO_CHECK(fio_write_all(out, file->linked, fio_file.linked_len), fio_file.linked_len); + + pgFileFree(file); + } + + parray_free(file_files); + hdr.cop = FIO_SEND_FILE_EOF; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); +} + +PageState * +fio_get_checksum_map(const char *fullpath, uint32 checksum_version, int n_blocks, + XLogRecPtr dest_stop_lsn, BlockNumber segmentno, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + fio_checksum_map_request req_hdr; + PageState *checksum_map = NULL; + size_t path_len = strlen(fullpath) + 1; + + req_hdr.n_blocks = n_blocks; + req_hdr.segmentno = segmentno; + req_hdr.stop_lsn = dest_stop_lsn; + req_hdr.checksumVersion = checksum_version; + + hdr.cop = FIO_GET_CHECKSUM_MAP; + hdr.size = sizeof(req_hdr) + path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, &req_hdr, sizeof(req_hdr)), sizeof(req_hdr)); + IO_CHECK(fio_write_all(fio_stdout, fullpath, path_len), path_len); + + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.size > 0) + { + checksum_map = (PageState *)pgut_malloc(n_blocks * sizeof(PageState)); + memset(checksum_map, 0, n_blocks * sizeof(PageState)); + IO_CHECK(fio_read_all(fio_stdin, checksum_map, hdr.size * sizeof(PageState)), hdr.size * sizeof(PageState)); + } + + return checksum_map; + } + else + { + + return get_checksum_map(fullpath, checksum_version, + n_blocks, dest_stop_lsn, segmentno); + } +} + +static void fio_get_checksum_map_impl(int out, char *buf) +{ + fio_header hdr; + PageState *checksum_map = NULL; + char *fullpath = (char*) buf + sizeof(fio_checksum_map_request); + fio_checksum_map_request *req = (fio_checksum_map_request*) buf; + + checksum_map = get_checksum_map(fullpath, req->checksumVersion, + req->n_blocks, req->stop_lsn, req->segmentno); + hdr.size = req->n_blocks; + + /* send array of PageState`s to main process */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size > 0) + IO_CHECK(fio_write_all(out, checksum_map, hdr.size * sizeof(PageState)), hdr.size * sizeof(PageState)); + + pg_free(checksum_map); +} + +datapagemap_t * +fio_get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr shift_lsn, BlockNumber segmentno, + fio_location location) +{ + datapagemap_t* lsn_map = NULL; + + if (fio_is_remote(location)) + { + fio_header hdr; + fio_lsn_map_request req_hdr; + size_t path_len = strlen(fullpath) + 1; + + req_hdr.n_blocks = n_blocks; + req_hdr.segmentno = segmentno; + req_hdr.shift_lsn = shift_lsn; + req_hdr.checksumVersion = checksum_version; + + hdr.cop = FIO_GET_LSN_MAP; + hdr.size = sizeof(req_hdr) + path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, &req_hdr, sizeof(req_hdr)), sizeof(req_hdr)); + IO_CHECK(fio_write_all(fio_stdout, fullpath, path_len), path_len); + + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.size > 0) + { + lsn_map = (datapagemap_t *)pgut_malloc(sizeof(datapagemap_t)); + memset(lsn_map, 0, sizeof(datapagemap_t)); + + lsn_map->bitmap = (unsigned char *)pgut_malloc(hdr.size); + lsn_map->bitmapsize = hdr.size; + + IO_CHECK(fio_read_all(fio_stdin, lsn_map->bitmap, hdr.size), hdr.size); + } + } + else + { + lsn_map = get_lsn_map(fullpath, checksum_version, n_blocks, + shift_lsn, segmentno); + } + + return lsn_map; +} + +static void fio_get_lsn_map_impl(int out, char *buf) +{ + fio_header hdr; + datapagemap_t *lsn_map = NULL; + char *fullpath = (char*) buf + sizeof(fio_lsn_map_request); + fio_lsn_map_request *req = (fio_lsn_map_request*) buf; + + lsn_map = get_lsn_map(fullpath, req->checksumVersion, req->n_blocks, + req->shift_lsn, req->segmentno); + if (lsn_map) + hdr.size = lsn_map->bitmapsize; + else + hdr.size = 0; + + /* send bitmap to main process */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size > 0) + IO_CHECK(fio_write_all(out, lsn_map->bitmap, hdr.size), hdr.size); + + if (lsn_map) + { + pg_free(lsn_map->bitmap); + pg_free(lsn_map); + } +} + +/* + * Go to the remote host and get postmaster pid from file postmaster.pid + * and check that process is running, if process is running, return its pid number. + */ +pid_t fio_check_postmaster(const char *pgdata, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + + hdr.cop = FIO_CHECK_POSTMASTER; + hdr.size = strlen(pgdata) + 1; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, pgdata, hdr.size), hdr.size); + + /* receive result */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + return hdr.arg; + } + else + return check_postmaster(pgdata); +} + +static void fio_check_postmaster_impl(int out, char *buf) +{ + fio_header hdr; + pid_t postmaster_pid; + char *pgdata = (char*) buf; + + postmaster_pid = check_postmaster(pgdata); + + /* send arrays of checksums to main process */ + hdr.arg = postmaster_pid; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); +} + +/* + * Delete file pointed by the pgFile. + * If the pgFile points directory, the directory must be empty. + */ +void +fio_delete(mode_t mode, const char *fullpath, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + + hdr.cop = FIO_DELETE; + hdr.size = strlen(fullpath) + 1; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, fullpath, hdr.size), hdr.size); + + } + else + pgFileDelete(mode, fullpath); +} + +static void +fio_delete_impl(mode_t mode, char *buf) +{ + char *fullpath = (char*) buf; + + pgFileDelete(mode, fullpath); +} + +/* Execute commands at remote host */ +void fio_communicate(int in, int out) +{ + /* + * Map of file and directory descriptors. + * The same mapping is used in agent and master process, so we + * can use the same index at both sides. + */ + int fd[FIO_FDMAX]; + DIR* dir[FIO_FDMAX]; + struct dirent* entry; + size_t buf_size = 128*1024; + char* buf = (char*)pgut_malloc(buf_size); + fio_header hdr; + struct stat st; + int rc; + int tmp_fd; + pg_crc32 crc; + +#ifdef WIN32 + SYS_CHECK(setmode(in, _O_BINARY)); + SYS_CHECK(setmode(out, _O_BINARY)); +#endif + + /* Main loop until end of processing all master commands */ + while ((rc = fio_read_all(in, &hdr, sizeof hdr)) == sizeof(hdr)) { + if (hdr.size != 0) { + if (hdr.size > buf_size) { + /* Extend buffer on demand */ + buf_size = hdr.size; + buf = (char*)realloc(buf, buf_size); + } + IO_CHECK(fio_read_all(in, buf, hdr.size), hdr.size); + } + switch (hdr.cop) { + case FIO_LOAD: /* Send file content */ + fio_load_file(out, buf); + break; + case FIO_OPENDIR: /* Open directory for traversal */ + dir[hdr.handle] = opendir(buf); + hdr.arg = dir[hdr.handle] == NULL ? errno : 0; + hdr.size = 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_READDIR: /* Get next directory entry */ + hdr.cop = FIO_SEND; + entry = readdir(dir[hdr.handle]); + if (entry != NULL) + { + hdr.size = sizeof(*entry); + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, entry, hdr.size), hdr.size); + } + else + { + hdr.size = 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + } + break; + case FIO_CLOSEDIR: /* Finish directory traversal */ + SYS_CHECK(closedir(dir[hdr.handle])); + break; + case FIO_OPEN: /* Open file */ + fd[hdr.handle] = open(buf, hdr.arg, FILE_PERMISSIONS); + hdr.arg = fd[hdr.handle] < 0 ? errno : 0; + hdr.size = 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_CLOSE: /* Close file */ + SYS_CHECK(close(fd[hdr.handle])); + break; + case FIO_WRITE: /* Write to the current position in file */ + IO_CHECK(fio_write_all(fd[hdr.handle], buf, hdr.size), hdr.size); + break; + case FIO_WRITE_COMPRESSED: /* Write to the current position in file */ + IO_CHECK(fio_write_compressed_impl(fd[hdr.handle], buf, hdr.size, hdr.arg), BLCKSZ); + break; + case FIO_READ: /* Read from the current position in file */ + if ((size_t)hdr.arg > buf_size) { + buf_size = hdr.arg; + buf = (char*)realloc(buf, buf_size); + } + rc = read(fd[hdr.handle], buf, hdr.arg); + hdr.cop = FIO_SEND; + hdr.size = rc > 0 ? rc : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size != 0) + IO_CHECK(fio_write_all(out, buf, hdr.size), hdr.size); + break; + case FIO_PREAD: /* Read from specified position in file, ignoring pages beyond horizon of delta backup */ + rc = pread(fd[hdr.handle], buf, BLCKSZ, hdr.arg); + hdr.cop = FIO_SEND; + hdr.arg = rc; + hdr.size = rc >= 0 ? rc : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size != 0) + IO_CHECK(fio_write_all(out, buf, hdr.size), hdr.size); + break; + case FIO_AGENT_VERSION: + hdr.arg = AGENT_PROTOCOL_VERSION; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_STAT: /* Get information about file with specified path */ + hdr.size = sizeof(st); + rc = hdr.arg ? stat(buf, &st) : lstat(buf, &st); + hdr.arg = rc < 0 ? errno : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, &st, sizeof(st)), sizeof(st)); + break; + case FIO_ACCESS: /* Check presence of file with specified name */ + hdr.size = 0; + hdr.arg = access(buf, hdr.arg) < 0 ? errno : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_RENAME: /* Rename file */ + SYS_CHECK(rename(buf, buf + strlen(buf) + 1)); + break; + case FIO_SYMLINK: /* Create symbolic link */ + fio_symlink_impl(out, buf, hdr.arg > 0 ? true : false); + break; + case FIO_UNLINK: /* Remove file or directory (TODO: Win32) */ + SYS_CHECK(remove_file_or_dir(buf)); + break; + case FIO_MKDIR: /* Create directory */ + hdr.size = 0; + hdr.arg = dir_create_dir(buf, hdr.arg); + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_CHMOD: /* Change file mode */ + SYS_CHECK(chmod(buf, hdr.arg)); + break; + case FIO_SEEK: /* Set current position in file */ + SYS_CHECK(lseek(fd[hdr.handle], hdr.arg, SEEK_SET)); + break; + case FIO_TRUNCATE: /* Truncate file */ + SYS_CHECK(ftruncate(fd[hdr.handle], hdr.arg)); + break; + case FIO_LIST_DIR: + fio_list_dir_impl(out, buf); + break; + case FIO_SEND_PAGES: + // buf contain fio_send_request header and bitmap. + fio_send_pages_impl(out, buf); + break; + case FIO_SEND_FILE: + fio_send_file_impl(out, buf); + break; + case FIO_SYNC: + /* open file and fsync it */ + tmp_fd = open(buf, O_WRONLY | PG_BINARY, FILE_PERMISSIONS); + if (tmp_fd < 0) + hdr.arg = errno; + else + { + if (fsync(tmp_fd) == 0) + hdr.arg = 0; + else + hdr.arg = errno; + } + close(tmp_fd); + + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_GET_CRC32: + /* calculate crc32 for a file */ +#ifdef HAVE_LIBZ + if (hdr.arg == 1) + crc = pgFileGetCRCgz(buf, true, true); + else +#endif + crc = pgFileGetCRC(buf, true, true); + IO_CHECK(fio_write_all(out, &crc, sizeof(crc)), sizeof(crc)); + break; + case FIO_GET_CHECKSUM_MAP: + /* calculate crc32 for a file */ + fio_get_checksum_map_impl(out, buf); + break; + case FIO_GET_LSN_MAP: + /* calculate crc32 for a file */ + fio_get_lsn_map_impl(out, buf); + break; + case FIO_CHECK_POSTMASTER: + /* calculate crc32 for a file */ + fio_check_postmaster_impl(out, buf); + break; + case FIO_DELETE: + /* delete file */ + fio_delete_impl(hdr.arg, buf); + break; + case FIO_DISCONNECT: + hdr.cop = FIO_DISCONNECTED; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + default: + Assert(false); + } + } + free(buf); + if (rc != 0) { /* Not end of stream: normal pipe close */ + perror("read"); + exit(EXIT_FAILURE); + } +} + diff --git a/src/bin/pg_probackup/file.h b/src/bin/pg_probackup/file.h new file mode 100644 index 000000000..b16add768 --- /dev/null +++ b/src/bin/pg_probackup/file.h @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * file.h + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ + +#ifndef __FILE__H__ +#define __FILE__H__ + +#include "storage/bufpage.h" +#include +#include +#include + +#ifdef HAVE_LIBZ +#include +#endif + +typedef enum +{ + /* message for compatibility check */ + FIO_AGENT_VERSION, /* never move this */ + FIO_OPEN, + FIO_CLOSE, + FIO_WRITE, + FIO_SYNC, + FIO_RENAME, + FIO_SYMLINK, + FIO_UNLINK, + FIO_MKDIR, + FIO_CHMOD, + FIO_SEEK, + FIO_TRUNCATE, + FIO_DELETE, + FIO_PREAD, + FIO_READ, + FIO_LOAD, + FIO_STAT, + FIO_SEND, + FIO_ACCESS, + FIO_OPENDIR, + FIO_READDIR, + FIO_CLOSEDIR, + FIO_PAGE, + FIO_WRITE_COMPRESSED, + FIO_GET_CRC32, + /* used for incremental restore */ + FIO_GET_CHECKSUM_MAP, + FIO_GET_LSN_MAP, + /* used in fio_send_pages */ + FIO_SEND_PAGES, + FIO_ERROR, + FIO_SEND_FILE, +// FIO_CHUNK, + FIO_SEND_FILE_EOF, + FIO_SEND_FILE_CORRUPTION, + FIO_SEND_FILE_HEADERS, + /* messages for closing connection */ + FIO_DISCONNECT, + FIO_DISCONNECTED, + FIO_LIST_DIR, + FIO_CHECK_POSTMASTER +} fio_operations; + +typedef enum +{ + FIO_LOCAL_HOST, /* data is locate at local host */ + FIO_DB_HOST, /* data is located at Postgres server host */ + FIO_BACKUP_HOST, /* data is located at backup host */ + FIO_REMOTE_HOST /* date is located at remote host */ +} fio_location; + +#define FIO_FDMAX 64 +#define FIO_PIPE_MARKER 0x40000000 + +#define SYS_CHECK(cmd) do if ((cmd) < 0) { fprintf(stderr, "%s:%d: (%s) %s\n", __FILE__, __LINE__, #cmd, strerror(errno)); exit(EXIT_FAILURE); } while (0) +#define IO_CHECK(cmd, size) do { int _rc = (cmd); if (_rc != (size)) fio_error(_rc, size, __FILE__, __LINE__); } while (0) + +typedef struct +{ +// fio_operations cop; +// 16 + unsigned cop : 32; + unsigned handle : 32; + unsigned size : 32; + unsigned arg; +} fio_header; + +extern fio_location MyLocation; + +/* Check if FILE handle is local or remote (created by FIO) */ +#define fio_is_remote_file(file) ((size_t)(file) <= FIO_FDMAX) + +extern void fio_redirect(int in, int out, int err); +extern void fio_communicate(int in, int out); + +extern int fio_get_agent_version(void); +extern FILE* fio_fopen(char const* name, char const* mode, fio_location location); +extern size_t fio_fwrite(FILE* f, void const* buf, size_t size); +extern ssize_t fio_fwrite_compressed(FILE* f, void const* buf, size_t size, int compress_alg); +extern ssize_t fio_fread(FILE* f, void* buf, size_t size); +extern int fio_pread(FILE* f, void* buf, off_t offs); +extern int fio_fprintf(FILE* f, char const* arg, ...);// pg_attribute_printf(2, 3); +extern int fio_fflush(FILE* f); +extern int fio_fseek(FILE* f, off_t offs); +extern int fio_ftruncate(FILE* f, off_t size); +extern int fio_fclose(FILE* f); +extern int fio_ffstat(FILE* f, struct stat* st); +extern void fio_error(int rc, int size, char const* file, int line); + +extern int fio_open(char const* name, int mode, fio_location location); +extern ssize_t fio_write(int fd, void const* buf, size_t size); +extern ssize_t fio_read(int fd, void* buf, size_t size); +extern int fio_flush(int fd); +extern int fio_seek(int fd, off_t offs); +extern int fio_fstat(int fd, struct stat* st); +extern int fio_truncate(int fd, off_t size); +extern int fio_close(int fd); +extern void fio_disconnect(void); +extern int fio_sync(char const* path, fio_location location); +extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress); + +extern int fio_rename(char const* old_path, char const* new_path, fio_location location); +extern int fio_symlink(char const* target, char const* link_path, bool overwrite, fio_location location); +extern int fio_unlink(char const* path, fio_location location); +extern int fio_mkdir(char const* path, int mode, fio_location location); +extern int fio_chmod(char const* path, int mode, fio_location location); +extern int fio_access(char const* path, int mode, fio_location location); +extern int fio_stat(char const* path, struct stat* st, bool follow_symlinks, fio_location location); +extern DIR* fio_opendir(char const* path, fio_location location); +extern struct dirent * fio_readdir(DIR *dirp); +extern int fio_closedir(DIR *dirp); +extern FILE* fio_open_stream(char const* name, fio_location location); +extern int fio_close_stream(FILE* f); + +#ifdef HAVE_LIBZ +extern gzFile fio_gzopen(char const* path, char const* mode, int level, fio_location location); +extern int fio_gzclose(gzFile file); +extern int fio_gzread(gzFile f, void *buf, unsigned size); +extern int fio_gzwrite(gzFile f, void const* buf, unsigned size); +extern int fio_gzeof(gzFile f); +extern z_off_t fio_gzseek(gzFile f, z_off_t offset, int whence); +extern const char* fio_gzerror(gzFile file, int *errnum); +#endif + +#endif + diff --git a/src/bin/pg_probackup/help.cpp b/src/bin/pg_probackup/help.cpp new file mode 100644 index 000000000..b5559cccc --- /dev/null +++ b/src/bin/pg_probackup/help.cpp @@ -0,0 +1,754 @@ +/*------------------------------------------------------------------------- + * + * help.c + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +static void help_init(void); +static void help_backup(void); +static void help_restore(void); +static void help_validate(void); +static void help_show(void); +static void help_delete(void); +static void help_merge(void); +static void help_set_backup(void); +static void help_set_config(void); +static void help_show_config(void); +static void help_add_instance(void); +static void help_del_instance(void); + +void +help_command(char *command) +{ + if (strcmp(command, "init") == 0) + help_init(); + else if (strcmp(command, "backup") == 0) + help_backup(); + else if (strcmp(command, "restore") == 0) + help_restore(); + else if (strcmp(command, "validate") == 0) + help_validate(); + else if (strcmp(command, "show") == 0) + help_show(); + else if (strcmp(command, "delete") == 0) + help_delete(); + else if (strcmp(command, "merge") == 0) + help_merge(); + else if (strcmp(command, "set-backup") == 0) + help_set_backup(); + else if (strcmp(command, "set-config") == 0) + help_set_config(); + else if (strcmp(command, "show-config") == 0) + help_show_config(); + else if (strcmp(command, "add-instance") == 0) + help_add_instance(); + else if (strcmp(command, "del-instance") == 0) + help_del_instance(); + else if (strcmp(command, "--help") == 0 + || strcmp(command, "help") == 0 + || strcmp(command, "-?") == 0 + || strcmp(command, "--version") == 0 + || strcmp(command, "version") == 0 + || strcmp(command, "-V") == 0) + printf(_("No help page for \"%s\" command. Try pg_probackup help\n"), command); + else + printf(_("Unknown command \"%s\". Try pg_probackup help\n"), command); + exit(0); +} + +void +help_pg_probackup(void) +{ + printf(_("\n%s - utility to manage backup/recovery of PostgreSQL database.\n\n"), PROGRAM_NAME); + + printf(_(" %s help [COMMAND]\n"), PROGRAM_NAME); + + printf(_("\n %s version\n"), PROGRAM_NAME); + + printf(_("\n %s init -B backup-path\n"), PROGRAM_NAME); + + printf(_("\n %s set-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path]\n")); + printf(_(" [--external-dirs=external-directories-paths]\n")); + printf(_(" [--log-level-console=log-level-console]\n")); + printf(_(" [--log-level-file=log-level-file]\n")); + printf(_(" [--log-filename=log-filename]\n")); + printf(_(" [--error-log-filename=error-log-filename]\n")); + printf(_(" [--log-directory=log-directory]\n")); + printf(_(" [--log-rotation-size=log-rotation-size]\n")); + printf(_(" [--log-rotation-age=log-rotation-age]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); + printf(_(" [--compress-algorithm=compress-algorithm]\n")); + printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--archive-timeout=timeout]\n")); + printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--restore-command=cmdline] [--archive-host=destination]\n")); + printf(_(" [--archive-port=port] [--archive-user=username]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s set-backup -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" -i backup-id [--ttl=interval] [--expire-time=timestamp]\n")); + printf(_(" [--note=text]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s show-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [--format=format]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s backup -B backup-path -b backup-mode --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-C]\n")); + printf(_(" [--stream [-S slot-name]] [--temp-slot]\n")); + printf(_(" [--backup-pg-log] [-j num-threads] [--progress]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [--external-dirs=external-directories-paths]\n")); + printf(_(" [--no-sync]\n")); + printf(_(" [--log-level-console=log-level-console]\n")); + printf(_(" [--log-level-file=log-level-file]\n")); + printf(_(" [--log-filename=log-filename]\n")); + printf(_(" [--error-log-filename=error-log-filename]\n")); + printf(_(" [--log-directory=log-directory]\n")); + printf(_(" [--log-rotation-size=log-rotation-size]\n")); + printf(_(" [--log-rotation-age=log-rotation-age]\n")); + printf(_(" [--delete-expired] [--delete-wal] [--merge-expired]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); + printf(_(" [--compress]\n")); + printf(_(" [--compress-algorithm=compress-algorithm]\n")); + printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--archive-timeout=archive-timeout]\n")); + printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); + printf(_(" [-w --no-password] [-W --password]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--ttl=interval] [--expire-time=timestamp] [--note=text]\n")); + printf(_(" [--help]\n")); + + + printf(_("\n %s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-i backup-id] [-j num-threads]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); + printf(_(" [--recovery-target=immediate|latest]\n")); + printf(_(" [--recovery-target-name=target-name]\n")); + printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); + printf(_(" [--restore-command=cmdline] [--force]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [-T OLDDIR=NEWDIR] [--progress]\n")); + printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n")); + printf(_(" [--skip-external-dirs] [--no-sync]\n")); + printf(_(" [-I | --incremental-mode=none|checksum|lsn]\n")); + printf(_(" [--db-include | --db-exclude]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--archive-host=hostname]\n")); + printf(_(" [--archive-port=port] [--archive-user=username]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s validate -B backup-path [--instance=instance_name]\n"), PROGRAM_NAME); + printf(_(" [-i backup-id] [--progress] [-j num-threads]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); + printf(_(" [--recovery-target-name=target-name]\n")); + printf(_(" [--skip-block-validation]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s show -B backup-path\n"), PROGRAM_NAME); + printf(_(" [--instance=instance_name [-i backup-id]]\n")); + printf(_(" [--format=format] [--archive]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s delete -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-j num-threads] [--progress]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); + printf(_(" [-i backup-id | --delete-expired | --merge-expired | --status=backup_status]\n")); + printf(_(" [--delete-wal]\n")); + printf(_(" [--dry-run]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s merge -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" -i backup-id [--progress] [-j num-threads]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s add-instance -B backup-path -D pgdata-path\n"), PROGRAM_NAME); + printf(_(" --instance=instance_name\n")); + printf(_(" [--external-dirs=external-directories-paths]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s del-instance -B backup-path\n"), PROGRAM_NAME); + printf(_(" --instance=instance_name\n")); + printf(_(" [--help]\n")); + + if ((PROGRAM_URL || PROGRAM_EMAIL)) + { + printf("\n"); + if (PROGRAM_URL) + printf("Read the website for details. <%s>\n", PROGRAM_URL); + if (PROGRAM_EMAIL) + printf("Report bugs to <%s>.\n", PROGRAM_EMAIL); + } + exit(0); +} + +static void +help_init(void) +{ + printf(_("\n%s init -B backup-path\n\n"), PROGRAM_NAME); + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n\n")); +} + +static void +help_backup(void) +{ + printf(_("\n%s backup -B backup-path -b backup-mode --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-C]\n")); + printf(_(" [--stream [-S slot-name] [--temp-slot]\n")); + printf(_(" [--backup-pg-log] [-j num-threads] [--progress]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [-E external-directories-paths]\n")); + printf(_(" [--no-sync]\n")); + printf(_(" [--log-level-console=log-level-console]\n")); + printf(_(" [--log-level-file=log-level-file]\n")); + printf(_(" [--log-filename=log-filename]\n")); + printf(_(" [--error-log-filename=error-log-filename]\n")); + printf(_(" [--log-directory=log-directory]\n")); + printf(_(" [--log-rotation-size=log-rotation-size]\n")); + printf(_(" [--log-rotation-age=log-rotation-age]\n")); + printf(_(" [--delete-expired] [--delete-wal] [--merge-expired]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); + printf(_(" [--compress]\n")); + printf(_(" [--compress-algorithm=compress-algorithm]\n")); + printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--archive-timeout=archive-timeout]\n")); + printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); + printf(_(" [-w --no-password] [-W --password]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--ttl=interval] [--expire-time=timestamp] [--note=text]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" -b, --backup-mode=backup-mode backup mode=FULL|PAGE|DELTA|PTRACK\n")); + printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); + printf(_(" -C, --smooth-checkpoint do smooth checkpoint before backup\n")); + printf(_(" --stream stream the transaction log and include it in the backup\n")); + printf(_(" -S, --slot=SLOTNAME replication slot to use\n")); + printf(_(" --temp-slot use temporary replication slot\n")); + printf(_(" --backup-pg-log backup of '%s' directory\n"), PG_LOG_DIR); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --progress show progress\n")); + printf(_(" --no-validate disable validation after backup\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + printf(_(" -E --external-dirs=external-directories-paths\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); + printf(_(" --no-sync do not sync backed up files to disk\n")); + printf(_(" --note=text add note to backup\n")); + printf(_(" (example: --note='backup before app update to v13.1')\n")); + + printf(_("\n Logging options:\n")); + printf(_(" --log-level-console=log-level-console\n")); + printf(_(" level for console logging (default: info)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-level-file=log-level-file\n")); + printf(_(" level for file logging (default: off)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-filename=log-filename\n")); + printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); + printf(_(" --error-log-filename=error-log-filename\n")); + printf(_(" filename for error logging (default: none)\n")); + printf(_(" --log-directory=log-directory\n")); + printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); + printf(_(" --log-rotation-size=log-rotation-size\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); + printf(_(" --log-rotation-age=log-rotation-age\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); + + printf(_("\n Retention options:\n")); + printf(_(" --delete-expired delete backups expired according to current\n")); + printf(_(" retention policy after successful backup completion\n")); + printf(_(" --merge-expired merge backups expired according to current\n")); + printf(_(" retention policy after successful backup completion\n")); + printf(_(" --delete-wal remove redundant files in WAL archive\n")); + printf(_(" --retention-redundancy=retention-redundancy\n")); + printf(_(" number of full backups to keep; 0 disables; (default: 0)\n")); + printf(_(" --retention-window=retention-window\n")); + printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); + printf(_(" --wal-depth=wal-depth number of latest valid backups per timeline that must\n")); + printf(_(" retain the ability to perform PITR; 0 disables; (default: 0)\n")); + printf(_(" --dry-run perform a trial run without any changes\n")); + + printf(_("\n Pinning options:\n")); + printf(_(" --ttl=interval pin backup for specified amount of time; 0 unpin\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: s)\n")); + printf(_(" (example: --ttl=20d)\n")); + printf(_(" --expire-time=time pin backup until specified time stamp\n")); + printf(_(" (example: --expire-time='2024-01-01 00:00:00+03')\n")); + + printf(_("\n Compression options:\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); + printf(_(" --compress-algorithm=compress-algorithm\n")); + printf(_(" available options: 'zlib', 'pglz', 'none' (default: none)\n")); + printf(_(" --compress-level=compress-level\n")); + printf(_(" level of compression [0-9] (default: 1)\n")); + + printf(_("\n Archive options:\n")); + printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); + + printf(_("\n Connection options:\n")); + printf(_(" -U, --pguser=USERNAME user name to connect as (default: current local user)\n")); + printf(_(" -d, --pgdatabase=DBNAME database to connect (default: username)\n")); + printf(_(" -h, --pghost=HOSTNAME database server host or socket directory(default: 'local socket')\n")); + printf(_(" -p, --pgport=PORT database server port (default: 5432)\n")); + printf(_(" -w, --no-password never prompt for password\n")); + printf(_(" -W, --password force password prompt\n")); + + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); +} + +static void +help_restore(void) +{ + printf(_("\n%s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-i backup-id] [-j num-threads]\n")); + printf(_(" [--progress] [--force] [--no-sync]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [-T OLDDIR=NEWDIR]\n")); + printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n")); + printf(_(" [--skip-external-dirs]\n")); + printf(_(" [-I | --incremental-mode=none|checksum|lsn]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); + printf(_(" [--recovery-target=immediate|latest]\n")); + printf(_(" [--recovery-target-name=target-name]\n")); + printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); + printf(_(" [--restore-command=cmdline]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--archive-host=hostname] [--archive-port=port]\n")); + printf(_(" [--archive-user=username]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance\n")); + + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); + printf(_(" -i, --backup-id=backup-id backup to restore\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + + printf(_(" --progress show progress\n")); + printf(_(" --force ignore invalid status of the restored backup\n")); + printf(_(" --no-sync do not sync restored files to disk\n")); + printf(_(" --no-validate disable backup validation during restore\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + + printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n")); + printf(_(" relocate the tablespace from directory OLDDIR to NEWDIR\n")); + printf(_(" --external-mapping=OLDDIR=NEWDIR\n")); + printf(_(" relocate the external directory from OLDDIR to NEWDIR\n")); + printf(_(" --skip-external-dirs do not restore all external directories\n")); + + printf(_("\n Incremental restore options:\n")); + printf(_(" -I, --incremental-mode=none|checksum|lsn\n")); + printf(_(" reuse valid pages available in PGDATA if they have not changed\n")); + printf(_(" (default: none)\n")); + + printf(_("\n Recovery options:\n")); + printf(_(" --recovery-target-time=time time stamp up to which recovery will proceed\n")); + printf(_(" --recovery-target-xid=xid transaction ID up to which recovery will proceed\n")); + printf(_(" --recovery-target-lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n")); + printf(_(" --recovery-target-inclusive=boolean\n")); + printf(_(" whether we stop just after the recovery target\n")); + printf(_(" --recovery-target-timeline=timeline\n")); + printf(_(" recovering into a particular timeline\n")); + printf(_(" --recovery-target=immediate|latest\n")); + printf(_(" end recovery as soon as a consistent state is reached or as late as possible\n")); + printf(_(" --recovery-target-name=target-name\n")); + printf(_(" the named restore point to which recovery will proceed\n")); + printf(_(" --recovery-target-action=pause|promote|shutdown\n")); + printf(_(" action the server should take once the recovery target is reached\n")); + printf(_(" (default: pause)\n")); + printf(_(" --restore-command=cmdline command to use as 'restore_command' in recovery.conf; 'none' disables\n")); + + printf(_("\n Logging options:\n")); + printf(_(" --log-level-console=log-level-console\n")); + printf(_(" level for console logging (default: info)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-level-file=log-level-file\n")); + printf(_(" level for file logging (default: off)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-filename=log-filename\n")); + printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); + printf(_(" --error-log-filename=error-log-filename\n")); + printf(_(" filename for error logging (default: none)\n")); + printf(_(" --log-directory=log-directory\n")); + printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); + printf(_(" --log-rotation-size=log-rotation-size\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); + printf(_(" --log-rotation-age=log-rotation-age\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); + + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); +} + +static void +help_validate(void) +{ + printf(_("\n%s validate -B backup-path [--instance=instance_name]\n"), PROGRAM_NAME); + printf(_(" [-i backup-id] [--progress] [-j num-threads]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); + printf(_(" [--recovery-target-name=target-name]\n")); + printf(_(" [--skip-block-validation]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -i, --backup-id=backup-id backup to validate\n")); + + printf(_(" --progress show progress\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --recovery-target-time=time time stamp up to which recovery will proceed\n")); + printf(_(" --recovery-target-xid=xid transaction ID up to which recovery will proceed\n")); + printf(_(" --recovery-target-lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n")); + printf(_(" --recovery-target-inclusive=boolean\n")); + printf(_(" whether we stop just after the recovery target\n")); + printf(_(" --recovery-target-timeline=timeline\n")); + printf(_(" recovering into a particular timeline\n")); + printf(_(" --recovery-target-name=target-name\n")); + printf(_(" the named restore point to which recovery will proceed\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + + printf(_("\n Logging options:\n")); + printf(_(" --log-level-console=log-level-console\n")); + printf(_(" level for console logging (default: info)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-level-file=log-level-file\n")); + printf(_(" level for file logging (default: off)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-filename=log-filename\n")); + printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); + printf(_(" --error-log-filename=error-log-filename\n")); + printf(_(" filename for error logging (default: none)\n")); + printf(_(" --log-directory=log-directory\n")); + printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); + printf(_(" --log-rotation-size=log-rotation-size\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); + printf(_(" --log-rotation-age=log-rotation-age\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n\n")); +} + +static void +help_show(void) +{ + printf(_("\n%s show -B backup-path\n"), PROGRAM_NAME); + printf(_(" [--instance=instance_name [-i backup-id]]\n")); + printf(_(" [--format=format] [--archive]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name show info about specific instance\n")); + printf(_(" -i, --backup-id=backup-id show info about specific backups\n")); + printf(_(" --archive show WAL archive information\n")); + printf(_(" --format=format show format=PLAIN|JSON\n\n")); +} + +static void +help_delete(void) +{ + printf(_("\n%s delete -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-i backup-id | --delete-expired | --merge-expired] [--delete-wal]\n")); + printf(_(" [-j num-threads] [--progress]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -i, --backup-id=backup-id backup to delete\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --progress show progress\n")); + + printf(_("\n Retention options:\n")); + printf(_(" --delete-expired delete backups expired according to current\n")); + printf(_(" retention policy\n")); + printf(_(" --merge-expired merge backups expired according to current\n")); + printf(_(" retention policy\n")); + printf(_(" --delete-wal remove redundant files in WAL archive\n")); + printf(_(" --retention-redundancy=retention-redundancy\n")); + printf(_(" number of full backups to keep; 0 disables; (default: 0)\n")); + printf(_(" --retention-window=retention-window\n")); + printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); + printf(_(" --wal-depth=wal-depth number of latest valid backups per timeline that must\n")); + printf(_(" retain the ability to perform PITR; 0 disables; (default: 0)\n")); + printf(_(" --dry-run perform a trial run without any changes\n")); + printf(_(" --status=backup_status delete all backups with specified status\n")); + + printf(_("\n Logging options:\n")); + printf(_(" --log-level-console=log-level-console\n")); + printf(_(" level for console logging (default: info)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-level-file=log-level-file\n")); + printf(_(" level for file logging (default: off)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-filename=log-filename\n")); + printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); + printf(_(" --error-log-filename=error-log-filename\n")); + printf(_(" filename for error logging (default: none)\n")); + printf(_(" --log-directory=log-directory\n")); + printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); + printf(_(" --log-rotation-size=log-rotation-size\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); + printf(_(" --log-rotation-age=log-rotation-age\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n\n")); +} + +static void +help_merge(void) +{ + printf(_("\n%s merge -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" -i backup-id [-j num-threads] [--progress]\n")); + printf(_(" [--log-level-console=log-level-console]\n")); + printf(_(" [--log-level-file=log-level-file]\n")); + printf(_(" [--log-filename=log-filename]\n")); + printf(_(" [--error-log-filename=error-log-filename]\n")); + printf(_(" [--log-directory=log-directory]\n")); + printf(_(" [--log-rotation-size=log-rotation-size]\n")); + printf(_(" [--log-rotation-age=log-rotation-age]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -i, --backup-id=backup-id backup to merge\n")); + + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --progress show progress\n")); + + printf(_("\n Logging options:\n")); + printf(_(" --log-level-console=log-level-console\n")); + printf(_(" level for console logging (default: info)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-level-file=log-level-file\n")); + printf(_(" level for file logging (default: off)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-filename=log-filename\n")); + printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); + printf(_(" --error-log-filename=error-log-filename\n")); + printf(_(" filename for error logging (default: none)\n")); + printf(_(" --log-directory=log-directory\n")); + printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); + printf(_(" --log-rotation-size=log-rotation-size\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); + printf(_(" --log-rotation-age=log-rotation-age\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n\n")); +} + +static void +help_set_backup(void) +{ + printf(_("\n%s set-backup -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" -i backup-id\n")); + printf(_(" [--ttl=interval] [--expire-time=time] [--note=text]\n\n")); + + printf(_(" --ttl=interval pin backup for specified amount of time; 0 unpin\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: s)\n")); + printf(_(" (example: --ttl=20d)\n")); + printf(_(" --expire-time=time pin backup until specified time stamp\n")); + printf(_(" (example: --expire-time='2024-01-01 00:00:00+03')\n")); + printf(_(" --note=text add note to backup; 'none' to remove note\n")); + printf(_(" (example: --note='backup before app update to v13.1')\n")); +} + +static void +help_set_config(void) +{ + printf(_("\n%s set-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path]\n")); + printf(_(" [-E external-directories-paths]\n")); + printf(_(" [--restore-command=cmdline]\n")); + printf(_(" [--log-level-console=log-level-console]\n")); + printf(_(" [--log-level-file=log-level-file]\n")); + printf(_(" [--log-filename=log-filename]\n")); + printf(_(" [--error-log-filename=error-log-filename]\n")); + printf(_(" [--log-directory=log-directory]\n")); + printf(_(" [--log-rotation-size=log-rotation-size]\n")); + printf(_(" [--log-rotation-age=log-rotation-age]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); + printf(_(" [--compress-algorithm=compress-algorithm]\n")); + printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--archive-timeout=timeout]\n")); + printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); + printf(_(" -E --external-dirs=external-directories-paths\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); + printf(_(" --restore-command=cmdline command to use as 'restore_command' in recovery.conf; 'none' disables\n")); + + printf(_("\n Logging options:\n")); + printf(_(" --log-level-console=log-level-console\n")); + printf(_(" level for console logging (default: info)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-level-file=log-level-file\n")); + printf(_(" level for file logging (default: off)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-filename=log-filename\n")); + printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); + printf(_(" --error-log-filename=error-log-filename\n")); + printf(_(" filename for error logging (default: none)\n")); + printf(_(" --log-directory=log-directory\n")); + printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); + printf(_(" --log-rotation-size=log-rotation-size\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); + printf(_(" --log-rotation-age=log-rotation-age\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); + + printf(_("\n Retention options:\n")); + printf(_(" --retention-redundancy=retention-redundancy\n")); + printf(_(" number of full backups to keep; 0 disables; (default: 0)\n")); + printf(_(" --retention-window=retention-window\n")); + printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); + printf(_(" --wal-depth=wal-depth number of latest valid backups with ability to perform\n")); + printf(_(" the point in time recovery; disables; (default: 0)\n")); + + printf(_("\n Compression options:\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); + printf(_(" --compress-algorithm=compress-algorithm\n")); + printf(_(" available options: 'zlib','pglz','none' (default: 'none')\n")); + printf(_(" --compress-level=compress-level\n")); + printf(_(" level of compression [0-9] (default: 1)\n")); + + printf(_("\n Archive options:\n")); + printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); + + printf(_("\n Connection options:\n")); + printf(_(" -U, --pguser=USERNAME user name to connect as (default: current local user)\n")); + printf(_(" -d, --pgdatabase=DBNAME database to connect (default: username)\n")); + printf(_(" -h, --pghost=HOSTNAME database server host or socket directory(default: 'local socket')\n")); + printf(_(" -p, --pgport=PORT database server port (default: 5432)\n")); + + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); +} + +static void +help_show_config(void) +{ + printf(_("\n%s show-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [--format=format]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" --format=format show format=PLAIN|JSON\n\n")); +} + +static void +help_add_instance(void) +{ + printf(_("\n%s add-instance -B backup-path -D pgdata-path\n"), PROGRAM_NAME); + printf(_(" --instance=instance_name\n")); + printf(_(" [-E external-directory-path]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); + printf(_(" --instance=instance_name name of the new instance\n")); + + printf(_(" -E --external-dirs=external-directories-paths\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n\n")); +} + +static void +help_del_instance(void) +{ + printf(_("\n%s del-instance -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance to delete\n\n")); +} diff --git a/src/bin/pg_probackup/init.cpp b/src/bin/pg_probackup/init.cpp new file mode 100644 index 000000000..431ea3b70 --- /dev/null +++ b/src/bin/pg_probackup/init.cpp @@ -0,0 +1,131 @@ +/*------------------------------------------------------------------------- + * + * init.c: - initialize backup catalog. + * + * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include + +/* + * Initialize backup catalog. + */ +int +do_init(void) +{ + char path[MAXPGPATH]; + char arclog_path_dir[MAXPGPATH]; + int results; + + results = pg_check_dir(backup_path); + if (results == 4) /* exists and not empty*/ + elog(ERROR, "backup catalog already exist and it's not empty"); + else if (results == -1) /*trouble accessing directory*/ + { + int errno_tmp = errno; + elog(ERROR, "cannot open backup catalog directory \"%s\": %s", + backup_path, strerror(errno_tmp)); + } + + /* create backup catalog root directory */ + dir_create_dir(backup_path, DIR_PERMISSION); + + /* create backup catalog data directory */ + join_path_components(path, backup_path, BACKUPS_DIR); + dir_create_dir(path, DIR_PERMISSION); + + /* create backup catalog wal directory */ + join_path_components(arclog_path_dir, backup_path, "wal"); + dir_create_dir(arclog_path_dir, DIR_PERMISSION); + + elog(INFO, "Backup catalog '%s' successfully inited", backup_path); + return 0; +} + +int +do_add_instance(InstanceConfig *instance) +{ + char path[MAXPGPATH]; + char arclog_path_dir[MAXPGPATH]; + struct stat st; + + /* PGDATA is always required */ + if (instance->pgdata == NULL) + elog(ERROR, "Required parameter not specified: PGDATA " + "(-D, --pgdata)"); + + /* Read system_identifier from PGDATA */ + instance->system_identifier = get_system_identifier(instance->pgdata); + /* Starting from PostgreSQL 11 read WAL segment size from PGDATA */ + instance->xlog_seg_size = get_xlog_seg_size(instance->pgdata); + + /* Ensure that all root directories already exist */ + if (access(backup_path, F_OK) != 0) + elog(ERROR, "Directory does not exist: '%s'", backup_path); + + join_path_components(path, backup_path, BACKUPS_DIR); + if (access(path, F_OK) != 0) + elog(ERROR, "Directory does not exist: '%s'", path); + + join_path_components(arclog_path_dir, backup_path, "wal"); + if (access(arclog_path_dir, F_OK) != 0) + elog(ERROR, "Directory does not exist: '%s'", arclog_path_dir); + + if (stat(instance->backup_instance_path, &st) == 0 && S_ISDIR(st.st_mode)) + elog(ERROR, "Instance '%s' backup directory already exists: '%s'", + instance->name, instance->backup_instance_path); + + /* + * Create directory for wal files of this specific instance. + * Existence check is extra paranoid because if we don't have such a + * directory in data dir, we shouldn't have it in wal as well. + */ + if (stat(instance->arclog_path, &st) == 0 && S_ISDIR(st.st_mode)) + elog(ERROR, "Instance '%s' WAL archive directory already exists: '%s'", + instance->name, instance->arclog_path); + + /* Create directory for data files of this specific instance */ + dir_create_dir(instance->backup_instance_path, DIR_PERMISSION); + dir_create_dir(instance->arclog_path, DIR_PERMISSION); + + /* + * Write initial configuration file. + * system-identifier, xlog-seg-size and pgdata are set in init subcommand + * and will never be updated. + * + * We need to manually set options source to save them to the configuration + * file. + */ + config_set_opt(instance_options, &instance->system_identifier, + SOURCE_FILE); + config_set_opt(instance_options, &instance->xlog_seg_size, + SOURCE_FILE); + + /* Kludge: do not save remote options into config */ + config_set_opt(instance_options, &instance_config.remote.host, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.proto, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.port, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.path, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.user, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.ssh_options, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.ssh_config, + SOURCE_DEFAULT); + + /* pgdata was set through command line */ + do_set_config(true); + + elog(INFO, "Instance '%s' successfully inited", instance_name); + return 0; +} diff --git a/src/bin/pg_probackup/json.cpp b/src/bin/pg_probackup/json.cpp new file mode 100644 index 000000000..81530f959 --- /dev/null +++ b/src/bin/pg_probackup/json.cpp @@ -0,0 +1,147 @@ +/*------------------------------------------------------------------------- + * + * json.c: - make json document. + * + * Copyright (c) 2018-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "json.h" + +static void json_add_indent(PQExpBuffer buf, int32 level); +static void json_add_escaped(PQExpBuffer buf, const char *str); + +static bool add_comma = false; + +/* + * Start or end json token. Currently it is a json object or array. + * + * Function modifies level value and adds indent if it appropriate. + */ +void +json_add(PQExpBuffer buf, JsonToken type, int32 *level) +{ + switch (type) + { + case JT_BEGIN_ARRAY: + appendPQExpBufferChar(buf, '['); + *level += 1; + add_comma = false; + break; + case JT_END_ARRAY: + *level -= 1; + if (*level == 0) + appendPQExpBufferChar(buf, '\n'); + else + json_add_indent(buf, *level); + appendPQExpBufferChar(buf, ']'); + add_comma = true; + break; + case JT_BEGIN_OBJECT: + json_add_indent(buf, *level); + appendPQExpBufferChar(buf, '{'); + *level += 1; + add_comma = false; + break; + case JT_END_OBJECT: + *level -= 1; + if (*level == 0) + appendPQExpBufferChar(buf, '\n'); + else + json_add_indent(buf, *level); + appendPQExpBufferChar(buf, '}'); + add_comma = true; + break; + default: + break; + } +} + +/* + * Add json object's key. If it isn't first key we need to add a comma. + */ +void +json_add_key(PQExpBuffer buf, const char *name, int32 level) +{ + if (add_comma) + appendPQExpBufferChar(buf, ','); + json_add_indent(buf, level); + + json_add_escaped(buf, name); + appendPQExpBufferStr(buf, ": "); + + add_comma = true; +} + +/* + * Add json object's key and value. If it isn't first key we need to add a + * comma. + */ +void +json_add_value(PQExpBuffer buf, const char *name, const char *value, + int32 level, bool escaped) +{ + json_add_key(buf, name, level); + + if (escaped) + json_add_escaped(buf, value); + else + appendPQExpBufferStr(buf, value); +} + +static void +json_add_indent(PQExpBuffer buf, int32 level) +{ + uint16 i; + + if (level == 0) + return; + + appendPQExpBufferChar(buf, '\n'); + for (i = 0; i < level; i++) + appendPQExpBufferStr(buf, " "); +} + +static void +json_add_escaped(PQExpBuffer buf, const char *str) +{ + const char *p; + + appendPQExpBufferChar(buf, '"'); + for (p = str; *p; p++) + { + switch (*p) + { + case '\b': + appendPQExpBufferStr(buf, "\\b"); + break; + case '\f': + appendPQExpBufferStr(buf, "\\f"); + break; + case '\n': + appendPQExpBufferStr(buf, "\\n"); + break; + case '\r': + appendPQExpBufferStr(buf, "\\r"); + break; + case '\t': + appendPQExpBufferStr(buf, "\\t"); + break; + case '"': + appendPQExpBufferStr(buf, "\\\""); + break; + case '\\': + appendPQExpBufferStr(buf, "\\\\"); + break; + default: + if ((unsigned char) *p < ' ') + appendPQExpBuffer(buf, "\\u%04x", (int) *p); + else + appendPQExpBufferChar(buf, *p); + break; + } + } + appendPQExpBufferChar(buf, '"'); +} + diff --git a/src/bin/pg_probackup/json.h b/src/bin/pg_probackup/json.h new file mode 100644 index 000000000..e22602af3 --- /dev/null +++ b/src/bin/pg_probackup/json.h @@ -0,0 +1,33 @@ +/*------------------------------------------------------------------------- + * + * json.h: - prototypes of json output functions. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2018-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef PROBACKUP_JSON_H +#define PROBACKUP_JSON_H + +#include "postgres_fe.h" +#include "libpq/pqexpbuffer.h" + +/* + * Json document tokens. + */ +typedef enum +{ + JT_BEGIN_ARRAY, + JT_END_ARRAY, + JT_BEGIN_OBJECT, + JT_END_OBJECT +} JsonToken; + +extern void json_add(PQExpBuffer buf, JsonToken type, int32 *level); +extern void json_add_key(PQExpBuffer buf, const char *name, int32 level); +extern void json_add_value(PQExpBuffer buf, const char *name, const char *value, + int32 level, bool escaped); + +#endif /* PROBACKUP_JSON_H */ diff --git a/src/bin/pg_probackup/logger.cpp b/src/bin/pg_probackup/logger.cpp new file mode 100644 index 000000000..85de2ad56 --- /dev/null +++ b/src/bin/pg_probackup/logger.cpp @@ -0,0 +1,712 @@ +/*------------------------------------------------------------------------- + * + * logger.c: - log events into log file or stderr. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include + +#include "pg_probackup.h" +#include "logger.h" +#include "pgut.h" +#include "thread.h" +#include + +#include "configuration.h" + +/* Logger parameters */ +LoggerConfig logger_config = { + LOG_LEVEL_CONSOLE_DEFAULT, + LOG_LEVEL_FILE_DEFAULT, + (char *)LOG_FILENAME_DEFAULT, + NULL, + LOG_ROTATION_SIZE_DEFAULT, + LOG_ROTATION_AGE_DEFAULT +}; + +/* Implementation for logging.h */ + +typedef enum +{ + PG_DEBUG, + PG_PROGRESS, + PG_WARNING, + PG_FATAL +} eLogType; + +void pg_log(eLogType type, const char *fmt,...) pg_attribute_printf(2, 3); + +static void elog_internal(int elevel, bool file_only, const char *message); +static void elog_stderr(int elevel, const char *fmt, ...) + pg_attribute_printf(2, 3); +static char *get_log_message(const char *fmt, va_list args) pg_attribute_printf(1, 0); + +/* Functions to work with log files */ +static void open_logfile(FILE **file, const char *filename_format); +static void release_logfile(void); +static char *logfile_getname(const char *format, time_t timestamp); +static FILE *logfile_open(const char *filename, const char *mode); + +/* Static variables */ + +static FILE *log_file = NULL; +static FILE *error_log_file = NULL; + +static bool exit_hook_registered = false; +/* Logging of the current thread is in progress */ +static bool loggin_in_progress = false; + +static pthread_mutex_t log_file_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* + * Initialize logger. + * + * If log_directory wasn't set by a user we use full path: + * backup_directory/log + */ +void +init_logger(const char *root_path, LoggerConfig *config) +{ + /* + * If logging to file is enabled and log_directory wasn't set + * by user, init the path with default value: backup_directory/log/ + * */ + if (config->log_level_file != LOG_OFF + && config->log_directory == NULL) + { + config->log_directory = (char *)pgut_malloc(MAXPGPATH); + join_path_components(config->log_directory, + root_path, LOG_DIRECTORY_DEFAULT); + } + + if (config->log_directory != NULL) + canonicalize_path(config->log_directory); + + logger_config = *config; + +#if PG_VERSION_NUM >= 120000 + /* Setup logging for functions from other modules called by pg_probackup */ + pg_logging_init(PROGRAM_NAME); + + switch (logger_config.log_level_console) + { + case VERBOSE: + pg_logging_set_level(PG_LOG_DEBUG); + break; + case INFO: + case NOTICE: + case LOG: + pg_logging_set_level(PG_LOG_INFO); + break; + case WARNING: + pg_logging_set_level(PG_LOG_WARNING); + break; + case ERROR: + pg_logging_set_level(PG_LOG_ERROR); + break; + default: + break; + }; +#endif +} + +static void +write_elevel(FILE *stream, int elevel) +{ + switch (elevel) + { + case VERBOSE: + fputs("VERBOSE: ", stream); + break; + case LOG: + fputs("LOG: ", stream); + break; + case INFO: + fputs("INFO: ", stream); + break; + case NOTICE: + fputs("NOTICE: ", stream); + break; + case WARNING: + fputs("WARNING: ", stream); + break; + case ERROR: + fputs("ERROR: ", stream); + break; + default: + elog_stderr(ERROR, "invalid logging level: %d", elevel); + break; + } +} + +/* + * Exit with code if it is an error. + * Check for in_cleanup flag to avoid deadlock in case of ERROR in cleanup + * routines. + */ +static void +exit_if_necessary(int elevel) +{ + if (elevel > WARNING && !in_cleanup) + { + if (loggin_in_progress) + { + loggin_in_progress = false; + pthread_mutex_unlock(&log_file_mutex); + } + + if (remote_agent) + sleep(1); /* Let parent receive sent messages */ + + /* If this is not the main thread then don't call exit() */ + if (main_tid != pthread_self()) + { + /* Interrupt other possible routines */ + thread_interrupted = true; +#ifdef WIN32 + ExitThread(elevel); +#else + pthread_exit(NULL); +#endif + } + else + exit(elevel); + } +} + +/* + * Logs to stderr or to log file and exit if ERROR. + * + * Actual implementation for elog() and pg_log(). + */ +static void +elog_internal(int elevel, bool file_only, const char *message) +{ + bool write_to_file, + write_to_error_log, + write_to_stderr; + time_t log_time = (time_t) time(NULL); + char strfbuf[128]; + char str_pid[128]; + + write_to_file = elevel >= logger_config.log_level_file + && logger_config.log_directory + && logger_config.log_directory[0] != '\0'; + write_to_error_log = elevel >= ERROR && logger_config.error_log_filename && + logger_config.log_directory && logger_config.log_directory[0] != '\0'; + write_to_stderr = elevel >= logger_config.log_level_console && !file_only; + + if (remote_agent) + { + write_to_stderr |= write_to_error_log | write_to_file; + write_to_error_log = write_to_file = false; + } + pthread_lock(&log_file_mutex); + loggin_in_progress = true; + + if (write_to_file || write_to_error_log) + strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", + localtime(&log_time)); + + snprintf(str_pid, sizeof(str_pid), "[%d]:", my_pid); + + /* + * Write message to log file. + * Do not write to file if this error was raised during write previous + * message. + */ + if (write_to_file) + { + if (log_file == NULL) + { + if (logger_config.log_filename == NULL) + open_logfile(&log_file, LOG_FILENAME_DEFAULT); + else + open_logfile(&log_file, logger_config.log_filename); + } + + fprintf(log_file, "%s ", strfbuf); + fprintf(log_file, "%s ", str_pid); + write_elevel(log_file, elevel); + + fprintf(log_file, "%s\n", message); + fflush(log_file); + } + + /* + * Write error message to error log file. + * Do not write to file if this error was raised during write previous + * message. + */ + if (write_to_error_log) + { + if (error_log_file == NULL) + open_logfile(&error_log_file, logger_config.error_log_filename); + + fprintf(error_log_file, "%s ", strfbuf); + fprintf(error_log_file, "%s ", str_pid); + write_elevel(error_log_file, elevel); + + fprintf(error_log_file, "%s\n", message); + fflush(error_log_file); + } + + /* + * Write to stderr if the message was not written to log file. + * Write to stderr if the message level is greater than WARNING anyway. + */ + if (write_to_stderr) + { + write_elevel(stderr, elevel); + + fprintf(stderr, "%s\n", message); + fflush(stderr); + } + + exit_if_necessary(elevel); + + loggin_in_progress = false; + pthread_mutex_unlock(&log_file_mutex); +} + +/* + * Log only to stderr. It is called only within elog_internal() when another + * logging already was started. + */ +static void +elog_stderr(int elevel, const char *fmt, ...) +{ + va_list args; + + /* + * Do not log message if severity level is less than log_level. + * It is the little optimisation to put it here not in elog_internal(). + */ + if (elevel < logger_config.log_level_console && elevel < ERROR) + return; + + va_start(args, fmt); + + write_elevel(stderr, elevel); + vfprintf(stderr, fmt, args); + fputc('\n', stderr); + fflush(stderr); + + va_end(args); + + exit_if_necessary(elevel); +} + +/* + * Formats text data under the control of fmt and returns it in an allocated + * buffer. + */ +static char * +get_log_message(const char *fmt, va_list args) +{ + size_t len = 256; /* initial assumption about buffer size */ + + for (;;) + { + char *result; + size_t newlen; + va_list copy_args; + + result = (char *) pgut_malloc(len); + + /* Try to format the data */ + va_copy(copy_args, args); + newlen = pvsnprintf(result, len, fmt, copy_args); + va_end(copy_args); + + if (newlen < len) + return result; /* success */ + + /* Release buffer and loop around to try again with larger len. */ + pfree(result); + len = newlen; + } +} + +/* + * Logs to stderr or to log file and exit if ERROR. + */ +void +elog(int elevel, const char *fmt, ...) +{ + char *message; + va_list args; + + /* + * Do not log message if severity level is less than log_level. + * It is the little optimisation to put it here not in elog_internal(). + */ + if (elevel < logger_config.log_level_console && + elevel < logger_config.log_level_file && elevel < ERROR) + return; + + va_start(args, fmt); + message = get_log_message(fmt, args); + va_end(args); + + elog_internal(elevel, false, message); + pfree(message); +} + +/* + * Logs only to log file and exit if ERROR. + */ +void +elog_file(int elevel, const char *fmt, ...) +{ + char *message; + va_list args; + + /* + * Do not log message if severity level is less than log_level. + * It is the little optimisation to put it here not in elog_internal(). + */ + if (elevel < logger_config.log_level_file && elevel < ERROR) + return; + + va_start(args, fmt); + message = get_log_message(fmt, args); + va_end(args); + + elog_internal(elevel, true, message); + pfree(message); +} + +/* + * Implementation of pg_log() from logging.h. + */ +void +pg_log(eLogType type, const char *fmt, ...) +{ + char *message; + va_list args; + int elevel = INFO; + + /* Transform logging level from eLogType to utils/logger.h levels */ + switch (type) + { + case PG_DEBUG: + elevel = LOG; + break; + case PG_PROGRESS: + elevel = INFO; + break; + case PG_WARNING: + elevel = WARNING; + break; + case PG_FATAL: + elevel = ERROR; + break; + default: + elog(ERROR, "invalid logging level: %d", type); + break; + } + + /* + * Do not log message if severity level is less than log_level. + * It is the little optimisation to put it here not in elog_internal(). + */ + if (elevel < logger_config.log_level_console && + elevel < logger_config.log_level_file && elevel < ERROR) + return; + + va_start(args, fmt); + message = get_log_message(fmt, args); + va_end(args); + + elog_internal(elevel, false, message); + pfree(message); +} + +/* + * Parses string representation of log level. + */ +int +parse_log_level(const char *level) +{ + const char *v = level; + size_t len; + + /* Skip all spaces detected */ + while (isspace((unsigned char)*v)) + v++; + len = strlen(v); + + if (len == 0) + elog(ERROR, "log-level is empty"); + + if (pg_strncasecmp("off", v, len) == 0) + return LOG_OFF; + else if (pg_strncasecmp("verbose", v, len) == 0) + return VERBOSE; + else if (pg_strncasecmp("log", v, len) == 0) + return LOG; + else if (pg_strncasecmp("info", v, len) == 0) + return INFO; + else if (pg_strncasecmp("notice", v, len) == 0) + return NOTICE; + else if (pg_strncasecmp("warning", v, len) == 0) + return WARNING; + else if (pg_strncasecmp("error", v, len) == 0) + return ERROR; + + /* Log level is invalid */ + elog(ERROR, "invalid log-level \"%s\"", level); + return 0; +} + +/* + * Converts integer representation of log level to string. + */ +const char * +deparse_log_level(int level) +{ + switch (level) + { + case LOG_OFF: + return "OFF"; + case VERBOSE: + return "VERBOSE"; + case LOG: + return "LOG"; + case INFO: + return "INFO"; + case NOTICE: + return "NOTICE"; + case WARNING: + return "WARNING"; + case ERROR: + return "ERROR"; + default: + elog(ERROR, "invalid log-level %d", level); + } + + return NULL; +} + +/* + * Construct logfile name using timestamp information. + * + * Result is palloc'd. + */ +static char * +logfile_getname(const char *format, time_t timestamp) +{ + char *filename; + size_t len; + struct tm *tm = localtime(×tamp); + + if (logger_config.log_directory == NULL || + logger_config.log_directory[0] == '\0') + elog_stderr(ERROR, "logging path is not set"); + + filename = (char *) pgut_malloc(MAXPGPATH); + + snprintf(filename, MAXPGPATH, "%s/", logger_config.log_directory); + + len = strlen(filename); + + /* Treat log_filename as a strftime pattern */ +#ifdef WIN32 + if (pg_strftime(filename + len, MAXPGPATH - len, format, tm) <= 0) +#else + if (strftime(filename + len, MAXPGPATH - len, format, tm) <= 0) +#endif + elog_stderr(ERROR, "strftime(%s) failed: %s", format, strerror(errno)); + + return filename; +} + +/* + * Open a new log file. + */ +static FILE * +logfile_open(const char *filename, const char *mode) +{ + FILE *fh; + + /* + * Create log directory if not present; ignore errors + */ + mkdir(logger_config.log_directory, S_IRWXU); + + fh = fopen(filename, mode); + + if (fh) + setvbuf(fh, NULL, PG_IOLBF, 0); + else + { + int save_errno = errno; + + elog_stderr(ERROR, "could not open log file \"%s\": %s", + filename, strerror(errno)); + errno = save_errno; + } + + return fh; +} + +/* + * Open the log file. + */ +static void +open_logfile(FILE **file, const char *filename_format) +{ + char *filename; + char control[MAXPGPATH]; + struct stat st; + FILE *control_file; + time_t cur_time = time(NULL); + bool rotation_requested = false, + logfile_exists = false, + rotation_file_exists = false; + + filename = logfile_getname(filename_format, cur_time); + + /* "log_directory" was checked in logfile_getname() */ + snprintf(control, MAXPGPATH, "%s.rotation", filename); + + if (stat(filename, &st) == -1) + { + if (errno == ENOENT) + { + /* There is no file "filename" and rotation does not need */ + goto logfile_open; + } + else + elog_stderr(ERROR, "cannot stat log file \"%s\": %s", + filename, strerror(errno)); + } + /* Found log file "filename" */ + logfile_exists = true; + + /* First check for rotation */ + if (logger_config.log_rotation_size > 0 || + logger_config.log_rotation_age > 0) + { + /* Check for rotation by age */ + if (logger_config.log_rotation_age > 0) + { + struct stat control_st; + + if (stat(control, &control_st) < 0) + { + if (errno == ENOENT) + /* '.rotation' file is not found, force its recreation */ + elog_stderr(WARNING, "missing rotation file: \"%s\"", + control); + else + elog_stderr(ERROR, "cannot stat rotation file \"%s\": %s", + control, strerror(errno)); + } + else + { + /* rotation file exists */ + char buf[1024]; + + control_file = fopen(control, "r"); + if (control_file == NULL) + elog_stderr(ERROR, "cannot open rotation file \"%s\": %s", + control, strerror(errno)); + + rotation_file_exists = true; + + if (fgets(buf, lengthof(buf), control_file)) + { + time_t creation_time; + + if (!parse_int64(buf, (int64 *) &creation_time, 0)) + { + /* Inability to parse value from .rotation file is + * concerning but not a critical error + */ + elog_stderr(WARNING, "rotation file \"%s\" has wrong " + "creation timestamp \"%s\"", + control, buf); + rotation_file_exists = false; + } + else + /* Parsed creation time */ + rotation_requested = (cur_time - creation_time) > + /* convert to seconds from milliseconds */ + logger_config.log_rotation_age / 1000; + } + else + { + /* truncated .rotation file is not a critical error */ + elog_stderr(WARNING, "cannot read creation timestamp from " + "rotation file \"%s\"", control); + rotation_file_exists = false; + } + + fclose(control_file); + } + } + + /* Check for rotation by size */ + if (!rotation_requested && logger_config.log_rotation_size > 0) + rotation_requested = st.st_size >= + /* convert to bytes */ + logger_config.log_rotation_size * 1024L; + } + +logfile_open: + if (rotation_requested) + *file = logfile_open(filename, "w"); + else + *file = logfile_open(filename, "a"); + pfree(filename); + + /* Rewrite rotation control file */ + if (rotation_requested || !logfile_exists || !rotation_file_exists) + { + time_t timestamp = time(NULL); + + control_file = fopen(control, "w"); + if (control_file == NULL) + elog_stderr(ERROR, "cannot open rotation file \"%s\": %s", + control, strerror(errno)); + + fprintf(control_file, "%ld", timestamp); + + fclose(control_file); + } + + /* + * Arrange to close opened file at proc_exit. + */ + if (!exit_hook_registered) + { + atexit(release_logfile); + exit_hook_registered = true; + } +} + +/* + * Closes opened file. + */ +static void +release_logfile(void) +{ + if (log_file) + { + fclose(log_file); + log_file = NULL; + } + if (error_log_file) + { + fclose(error_log_file); + error_log_file = NULL; + } +} diff --git a/src/bin/pg_probackup/logger.h b/src/bin/pg_probackup/logger.h new file mode 100644 index 000000000..74f9593c3 --- /dev/null +++ b/src/bin/pg_probackup/logger.h @@ -0,0 +1,67 @@ +/*------------------------------------------------------------------------- + * + * logger.h: - prototypes of logger functions. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef LOGGER_H +#define LOGGER_H + +#define LOG_NONE (-10) + +/* Log level */ +#define VERBOSE (-5) +#undef LOG +#undef INFO +#undef NOTICE +#undef WARNING +#undef ERROR + +#define LOG (-4) +#define INFO (-3) +#define NOTICE (-2) +#define WARNING (-1) +#define ERROR 1 +#define LOG_OFF 10 + +typedef struct LoggerConfig +{ + int log_level_console; + int log_level_file; + char *log_filename; + char *error_log_filename; + char *log_directory; + /* Maximum size of an individual log file in kilobytes */ + uint64 log_rotation_size; + /* Maximum lifetime of an individual log file in minutes */ + uint64 log_rotation_age; +} LoggerConfig; + +/* Logger parameters */ +extern LoggerConfig logger_config; + +#define LOG_ROTATION_SIZE_DEFAULT 0 +#define LOG_ROTATION_AGE_DEFAULT 0 + +#define LOG_LEVEL_CONSOLE_DEFAULT LOG +#define LOG_LEVEL_FILE_DEFAULT LOG_OFF + +#define LOG_FILENAME_DEFAULT "pg_probackup.log" +#define LOG_DIRECTORY_DEFAULT "log" +#define pg_attribute_printf(f,a) +#define PG_IOLBF _IONBF + +#undef elog +extern void elog(int elevel, const char *fmt, ...); //pg_attribute_printf(2, 3); +extern void elog_file(int elevel, const char *fmt, ...); // pg_attribute_printf(2, 3); + +extern void init_logger(const char *root_path, LoggerConfig *config); + +extern int parse_log_level(const char *level); +extern const char *deparse_log_level(int level); +size_t pvsnprintf(char *buf, size_t len, const char *fmt, va_list args); +#endif /* LOGGER_H */ diff --git a/src/bin/pg_probackup/merge.cpp b/src/bin/pg_probackup/merge.cpp new file mode 100644 index 000000000..2126b9cfd --- /dev/null +++ b/src/bin/pg_probackup/merge.cpp @@ -0,0 +1,1399 @@ +/*------------------------------------------------------------------------- + * + * merge.c: merge FULL and incremental backups + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2018-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include + +#include "thread.h" +#include "common/fe_memutils.h" + +typedef struct +{ + parray *merge_filelist; + parray *parent_chain; + + pgBackup *dest_backup; + pgBackup *full_backup; + + const char *full_database_dir; + const char *full_external_prefix; + +// size_t in_place_merge_bytes; + bool compression_match; + bool program_version_match; + bool use_bitmap; + bool is_retry; + + /* + * Return value from the thread. + * 0 means there is no error, 1 - there is an error. + */ + int ret; +} merge_files_arg; + + +static void *merge_files(void *arg); +static void +reorder_external_dirs(pgBackup *to_backup, parray *to_external, + parray *from_external); +static int +get_external_index(const char *key, const parray *list); + +static void +merge_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, + pgFile *tmp_file, const char *to_root, bool use_bitmap, + bool is_retry); + +static void +merge_non_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, + pgFile *tmp_file, const char *full_database_dir, + const char *full_external_prefix); + +/* + * Implementation of MERGE command. + * + * - Find target and its parent full backup + * - Merge data files of target, parent and and intermediate backups + * - Remove unnecessary files, which doesn't exist in the target backup anymore + */ +void +do_merge(time_t backup_id) +{ + parray *backups; + parray *merge_list = parray_new(); + pgBackup *dest_backup = NULL; + pgBackup *dest_backup_tmp = NULL; + pgBackup *full_backup = NULL; + int i; + + if (backup_id == INVALID_BACKUP_ID) + elog(ERROR, "required parameter is not specified: --backup-id"); + + if (instance_name == NULL) + elog(ERROR, "required parameter is not specified: --instance"); + + elog(INFO, "Merge started"); + + /* Get list of all backups sorted in order of descending start time */ + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + /* Find destination backup first */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + /* found target */ + if (backup->start_time == backup_id) + { + /* sanity */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE && + /* It is possible that previous merging was interrupted */ + backup->status != BACKUP_STATUS_MERGING && + backup->status != BACKUP_STATUS_MERGED && + backup->status != BACKUP_STATUS_DELETING) + elog(ERROR, "Backup %s has status: %s", + base36enc(backup->start_time), status2str(backup->status)); + + dest_backup = backup; + break; + } + } + + /* + * Handle the case of crash right after deletion of the target + * incremental backup. We still can recover from this. + * Iterate over backups and look for the FULL backup with + * MERGED status, that has merge-target-id eqial to backup_id. + */ + if (dest_backup == NULL) + { + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + if (backup->status == BACKUP_STATUS_MERGED && + backup->merge_dest_backup == backup_id) + { + dest_backup = backup; + break; + } + } + } + + if (dest_backup == NULL) + elog(ERROR, "Target backup %s was not found", base36enc(backup_id)); + + /* It is possible to use FULL backup as target backup for merge. + * There are two possible cases: + * 1. The user want to merge FULL backup with closest incremental backup. + * In this case we must find suitable destination backup and merge them. + * + * 2. Previous merge has failed after destination backup was deleted, + * but before FULL backup was renamed: + * Example A: + * PAGE2_1 OK + * FULL2 OK + * PAGE1_1 MISSING/DELETING <- + * FULL1 MERGED/MERGING + */ + if (dest_backup->backup_mode == BACKUP_MODE_FULL) + { + full_backup = dest_backup; + dest_backup = NULL; + elog(INFO, "Merge target backup %s is full backup", + base36enc(full_backup->start_time)); + + /* sanity */ + if (full_backup->status == BACKUP_STATUS_DELETING) + elog(ERROR, "Backup %s has status: %s", + base36enc(full_backup->start_time), + status2str(full_backup->status)); + + /* Case #1 */ + if (full_backup->status == BACKUP_STATUS_OK || + full_backup->status == BACKUP_STATUS_DONE) + { + /* Check the case of FULL backup having more than one direct children */ + if (is_prolific(backups, full_backup)) + elog(ERROR, "Merge target is full backup and has multiple direct children, " + "you must specify child backup id you want to merge with"); + + elog(LOG, "Looking for closest incremental backup to merge with"); + + /* Look for closest child backup */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + /* skip unsuitable candidates */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE) + continue; + + if (backup->parent_backup == full_backup->start_time) + { + dest_backup = backup; + break; + } + } + + /* sanity */ + if (dest_backup == NULL) + elog(ERROR, "Failed to find merge candidate, " + "backup %s has no valid children", + base36enc(full_backup->start_time)); + + } + /* Case #2 */ + else if (full_backup->status == BACKUP_STATUS_MERGING) + { + /* + * MERGING - merge was ongoing at the moment of crash. + * We must find destination backup and rerun merge. + * If destination backup is missing, then merge must be aborted, + * there is no recovery from this situation. + */ + + if (full_backup->merge_dest_backup == INVALID_BACKUP_ID) + elog(ERROR, "Failed to determine merge destination backup"); + + /* look up destination backup */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + if (backup->start_time == full_backup->merge_dest_backup) + { + dest_backup = backup; + break; + } + } + if (!dest_backup) + { + char *tmp_backup_id = base36enc_dup(full_backup->start_time); + elog(ERROR, "Full backup %s has unfinished merge with missing backup %s", + tmp_backup_id, + base36enc(full_backup->merge_dest_backup)); + pg_free(tmp_backup_id); + } + } + else if (full_backup->status == BACKUP_STATUS_MERGED) + { + /* + * MERGED - merge crashed after files were transfered, but + * before rename could take place. + * If destination backup is missing, this is ok. + * If destination backup is present, then it should be deleted. + * After that FULL backup must acquire destination backup ID. + */ + + /* destination backup may or may not exists */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + if (backup->start_time == full_backup->merge_dest_backup) + { + dest_backup = backup; + break; + } + } + if (!dest_backup) + { + char *tmp_backup_id = base36enc_dup(full_backup->start_time); + elog(WARNING, "Full backup %s has unfinished merge with missing backup %s", + tmp_backup_id, + base36enc(full_backup->merge_dest_backup)); + pg_free(tmp_backup_id); + } + } + else + elog(ERROR, "Backup %s has status: %s", + base36enc(full_backup->start_time), + status2str(full_backup->status)); + } + else + { + /* + * Legal Case #1: + * PAGE2 OK <- target + * PAGE1 OK + * FULL OK + * Legal Case #2: + * PAGE2 MERGING <- target + * PAGE1 MERGING + * FULL MERGING + * Legal Case #3: + * PAGE2 MERGING <- target + * PAGE1 DELETING + * FULL MERGED + * Legal Case #4: + * PAGE2 MERGING <- target + * PAGE1 missing + * FULL MERGED + * Legal Case #5: + * PAGE2 DELETING <- target + * FULL MERGED + * Legal Case #6: + * PAGE2 MERGING <- target + * PAGE1 missing + * FULL MERGED + * Illegal Case #7: + * PAGE2 MERGING <- target + * PAGE1 missing + * FULL MERGING + */ + + if (dest_backup->status == BACKUP_STATUS_MERGING || + dest_backup->status == BACKUP_STATUS_DELETING) + elog(WARNING, "Rerun unfinished merge for backup %s", + base36enc(dest_backup->start_time)); + + /* First we should try to find parent FULL backup */ + full_backup = find_parent_full_backup(dest_backup); + + /* Chain is broken, one or more member of parent chain is missing */ + if (full_backup == NULL) + { + /* It is the legal state of affairs in Case #4, but + * only for MERGING incremental target backup and only + * if FULL backup has MERGED status. + */ + if (dest_backup->status != BACKUP_STATUS_MERGING) + elog(ERROR, "Failed to find parent full backup for %s", + base36enc(dest_backup->start_time)); + + /* Find FULL backup that has unfinished merge with dest backup */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + if (backup->merge_dest_backup == dest_backup->start_time) + { + full_backup = backup; + break; + } + } + + if (!full_backup) + elog(ERROR, "Failed to find full backup that has unfinished merge" + "with backup %s, cannot rerun merge", + base36enc(dest_backup->start_time)); + + if (full_backup->status == BACKUP_STATUS_MERGED) + elog(WARNING, "Incremental chain is broken, try to recover unfinished merge"); + else + elog(ERROR, "Incremental chain is broken, merge is impossible to finish"); + } + else + { + if ((full_backup->status == BACKUP_STATUS_MERGED || + full_backup->status == BACKUP_STATUS_MERGED) && + dest_backup->start_time != full_backup->merge_dest_backup) + { + char *tmp_backup_id = base36enc_dup(full_backup->start_time); + elog(ERROR, "Full backup %s has unfinished merge with backup %s", + tmp_backup_id, base36enc(full_backup->merge_dest_backup)); + pg_free(tmp_backup_id); + } + + } + } + + /* sanity */ + if (full_backup == NULL) + elog(ERROR, "Parent full backup for the given backup %s was not found", + base36enc(backup_id)); + + /* At this point NULL as dest_backup is allowed only in case of full backup + * having status MERGED */ + if (dest_backup == NULL && full_backup->status != BACKUP_STATUS_MERGED) + elog(ERROR, "Cannot run merge for full backup %s", + base36enc(full_backup->start_time)); + + /* sanity */ + if (full_backup->status != BACKUP_STATUS_OK && + full_backup->status != BACKUP_STATUS_DONE && + /* It is possible that previous merging was interrupted */ + full_backup->status != BACKUP_STATUS_MERGED && + full_backup->status != BACKUP_STATUS_MERGING) + elog(ERROR, "Backup %s has status: %s", + base36enc(full_backup->start_time), status2str(full_backup->status)); + + /* Form merge list */ + dest_backup_tmp = dest_backup; + + /* While loop below may looks strange, it is done so on purpose + * to handle both whole and broken incremental chains. + */ + while (dest_backup_tmp) + { + /* sanity */ + if (dest_backup_tmp->status != BACKUP_STATUS_OK && + dest_backup_tmp->status != BACKUP_STATUS_DONE && + /* It is possible that previous merging was interrupted */ + dest_backup_tmp->status != BACKUP_STATUS_MERGING && + dest_backup_tmp->status != BACKUP_STATUS_MERGED && + dest_backup_tmp->status != BACKUP_STATUS_DELETING) + elog(ERROR, "Backup %s has status: %s", + base36enc(dest_backup_tmp->start_time), + status2str(dest_backup_tmp->status)); + + if (dest_backup_tmp->backup_mode == BACKUP_MODE_FULL) + break; + + parray_append(merge_list, dest_backup_tmp); + dest_backup_tmp = dest_backup_tmp->parent_backup_link; + } + + /* Add FULL backup */ + parray_append(merge_list, full_backup); + + /* Lock merge chain */ + catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true); + + /* do actual merge */ + merge_chain(merge_list, full_backup, dest_backup); + + pgBackupValidate(full_backup, NULL); + if (full_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Merging of backup %s failed", base36enc(backup_id)); + + /* cleanup */ + parray_walk(backups, pgBackupFree); + parray_free(backups); + parray_free(merge_list); + + elog(INFO, "Merge of backup %s completed", base36enc(backup_id)); +} + +/* + * Merge backup chain. + * dest_backup - incremental backup. + * parent_chain - array of backups starting with dest_backup and + * ending with full_backup. + * + * Copy backup files from incremental backups from parent_chain into + * full backup directory. + * Remove unnecessary directories and files from full backup directory. + * Update metadata of full backup to represent destination backup. + * + * TODO: stop relying on caller to provide valid parent_chain, make sure + * that chain is ok. + */ +void +merge_chain(parray *parent_chain, pgBackup *full_backup, pgBackup *dest_backup) +{ + int i; + char *dest_backup_id; + char full_external_prefix[MAXPGPATH]; + char full_database_dir[MAXPGPATH]; + parray *full_externals = NULL, + *dest_externals = NULL; + + parray *result_filelist = NULL; + bool use_bitmap = true; + bool is_retry = false; +// size_t total_in_place_merge_bytes = 0; + + pthread_t *threads = NULL; + merge_files_arg *threads_args = NULL; + time_t merge_time; + bool merge_isok = true; + /* for fancy reporting */ + time_t end_time; + char pretty_time[20]; + /* in-place merge flags */ + bool compression_match = false; + bool program_version_match = false; + /* It's redundant to check block checksumms during merge */ + skip_block_validation = true; + + /* Handle corner cases of missing destination backup */ + if (dest_backup == NULL && + full_backup->status == BACKUP_STATUS_MERGED) + goto merge_rename; + + if (!dest_backup) + elog(ERROR, "Destination backup is missing, cannot continue merge"); + + if (dest_backup->status == BACKUP_STATUS_MERGING || + full_backup->status == BACKUP_STATUS_MERGING || + full_backup->status == BACKUP_STATUS_MERGED) + { + is_retry = true; + elog(INFO, "Retry failed merge of backup %s with parent chain", base36enc(dest_backup->start_time)); + } + else + elog(INFO, "Merging backup %s with parent chain", base36enc(dest_backup->start_time)); + + /* sanity */ + if (full_backup->merge_dest_backup != INVALID_BACKUP_ID && + full_backup->merge_dest_backup != dest_backup->start_time) + { + char *merge_dest_backup_current = base36enc_dup(dest_backup->start_time); + char *merge_dest_backup = base36enc_dup(full_backup->merge_dest_backup); + + elog(ERROR, "Cannot run merge for %s, because full backup %s has " + "unfinished merge with backup %s", + merge_dest_backup_current, + base36enc(full_backup->start_time), + merge_dest_backup); + + pg_free(merge_dest_backup_current); + pg_free(merge_dest_backup); + } + + /* + * Previous merging was interrupted during deleting source backup. It is + * safe just to delete it again. + */ + if (full_backup->status == BACKUP_STATUS_MERGED) + goto merge_delete; + + /* Forward compatibility is not supported */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + if (parse_program_version(backup->program_version) > + parse_program_version(PROGRAM_VERSION)) + { + elog(ERROR, "Backup %s has been produced by pg_probackup version %s, " + "but current program version is %s. Forward compatibility " + "is not supported.", + base36enc(backup->start_time), + backup->program_version, + PROGRAM_VERSION); + } + } + + /* If destination backup compression algorithm differs from + * full backup compression algorithm, then in-place merge is + * not possible. + */ + if (full_backup->compress_alg == dest_backup->compress_alg) + compression_match = true; + else + elog(WARNING, "In-place merge is disabled because of compression " + "algorithms mismatch"); + + /* + * If current program version differs from destination backup version, + * then in-place merge is not possible. + */ + if ((parse_program_version(full_backup->program_version) == + parse_program_version(dest_backup->program_version)) && + (parse_program_version(dest_backup->program_version) == + parse_program_version(PROGRAM_VERSION))) + program_version_match = true; + else + elog(WARNING, "In-place merge is disabled because of program " + "versions mismatch. Full backup version: %s, " + "destination backup version: %s, " + "current program version: %s", + full_backup->program_version, + dest_backup->program_version, + PROGRAM_VERSION); + + /* Forbid merge retry for failed merges between 2.4.0 and any + * older version. Several format changes makes it impossible + * to determine the exact format any speific file is got. + */ + if (is_retry && + parse_program_version(dest_backup->program_version) >= 20400 && + parse_program_version(full_backup->program_version) < 20400) + { + elog(ERROR, "Retry of failed merge for backups with different between minor " + "versions is forbidden to avoid data corruption because of storage format " + "changes introduced in 2.4.0 version, please take a new full backup"); + } + + /* + * Validate or revalidate all members of parent chain + * with sole exception of FULL backup. If it has MERGING status + * then it isn't valid backup until merging is finished. + */ + elog(INFO, "Validate parent chain for backup %s", + base36enc(dest_backup->start_time)); + + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + /* FULL backup is not to be validated if its status is MERGING */ + if (backup->backup_mode == BACKUP_MODE_FULL && + backup->status == BACKUP_STATUS_MERGING) + { + continue; + } + + pgBackupValidate(backup, NULL); + + if (backup->status != BACKUP_STATUS_OK) + elog(ERROR, "Backup %s has status %s, merge is aborted", + base36enc(backup->start_time), status2str(backup->status)); + } + + /* + * Get backup files. + */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + backup->files = get_backup_filelist(backup, true); + parray_qsort(backup->files, pgFileCompareRelPathWithExternal); + + /* Set MERGING status for every member of the chain */ + if (backup->backup_mode == BACKUP_MODE_FULL) + { + /* In case of FULL backup also remember backup_id of + * of destination backup we are merging with, so + * we can safely allow rerun merge in case of failure. + */ + backup->merge_dest_backup = dest_backup->start_time; + backup->status = BACKUP_STATUS_MERGING; + write_backup(backup, true); + } + else + write_backup_status(backup, BACKUP_STATUS_MERGING, instance_name, true); + } + + /* Construct path to database dir: /backup_dir/instance_name/FULL/database */ + join_path_components(full_database_dir, full_backup->root_dir, DATABASE_DIR); + /* Construct path to external dir: /backup_dir/instance_name/FULL/external */ + join_path_components(full_external_prefix, full_backup->root_dir, EXTERNAL_DIR); + + /* Create directories */ + create_data_directories(dest_backup->files, full_database_dir, + dest_backup->root_dir, false, false, FIO_BACKUP_HOST); + + /* External directories stuff */ + if (dest_backup->external_dir_str) + dest_externals = make_external_directory_list(dest_backup->external_dir_str, false); + if (full_backup->external_dir_str) + full_externals = make_external_directory_list(full_backup->external_dir_str, false); + /* + * Rename external directories in FULL backup (if exists) + * according to numeration of external dirs in destionation backup. + */ + if (full_externals && dest_externals) + reorder_external_dirs(full_backup, full_externals, dest_externals); + + /* bitmap optimization rely on n_blocks, which is generally available since 2.3.0 */ + if (parse_program_version(dest_backup->program_version) < 20300) + use_bitmap = false; + + /* Setup threads */ + for (i = 0; i < parray_num(dest_backup->files); i++) + { + pgFile *file = (pgFile *) parray_get(dest_backup->files, i); + + /* if the entry was an external directory, create it in the backup */ + if (file->external_dir_num && S_ISDIR(file->mode)) + { + char dirpath[MAXPGPATH]; + char new_container[MAXPGPATH]; + + makeExternalDirPathByNum(new_container, full_external_prefix, + file->external_dir_num); + join_path_components(dirpath, new_container, file->rel_path); + dir_create_dir(dirpath, DIR_PERMISSION); + } + + pg_atomic_init_flag(&file->lock); + } + + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (merge_files_arg *) palloc(sizeof(merge_files_arg) * num_threads); + + thread_interrupted = false; + merge_time = time(NULL); + elog(INFO, "Start merging backup files"); + for (i = 0; i < num_threads; i++) + { + merge_files_arg *arg = &(threads_args[i]); + arg->merge_filelist = parray_new(); + arg->parent_chain = parent_chain; + arg->dest_backup = dest_backup; + arg->full_backup = full_backup; + arg->full_database_dir = full_database_dir; + arg->full_external_prefix = full_external_prefix; + + arg->compression_match = compression_match; + arg->program_version_match = program_version_match; + arg->use_bitmap = use_bitmap; + arg->is_retry = is_retry; + /* By default there are some error */ + arg->ret = 1; + + elog(VERBOSE, "Start thread: %d", i); + + pthread_create(&threads[i], NULL, merge_files, arg); + } + + /* Wait threads */ + result_filelist = parray_new(); + for (i = 0; i < num_threads; i++) + { + pthread_join(threads[i], NULL); + if (threads_args[i].ret == 1) + merge_isok = false; + + /* Compile final filelist */ + parray_concat(result_filelist, threads_args[i].merge_filelist); + + /* cleanup */ + parray_free(threads_args[i].merge_filelist); + //total_in_place_merge_bytes += threads_args[i].in_place_merge_bytes; + } + + time(&end_time); + pretty_time_interval(difftime(end_time, merge_time), + pretty_time, lengthof(pretty_time)); + + if (merge_isok) + elog(INFO, "Backup files are successfully merged, time elapsed: %s", + pretty_time); + else + elog(ERROR, "Backup files merging failed, time elapsed: %s", + pretty_time); + + /* If temp header map is open, then close it and make rename */ + if (full_backup->hdr_map.fp) + { + cleanup_header_map(&(full_backup->hdr_map)); + + /* sync new header map to disk */ + if (fio_sync(full_backup->hdr_map.path_tmp, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync temp header map \"%s\": %s", + full_backup->hdr_map.path_tmp, strerror(errno)); + + /* Replace old header map with new one */ + if (rename(full_backup->hdr_map.path_tmp, full_backup->hdr_map.path)) + elog(ERROR, "Could not rename file \"%s\" to \"%s\": %s", + full_backup->hdr_map.path_tmp, full_backup->hdr_map.path, strerror(errno)); + } + + /* Close page header maps */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + cleanup_header_map(&(backup->hdr_map)); + } + + /* + * Update FULL backup metadata. + * We cannot set backup status to OK just yet, + * because it still has old start_time. + */ + StrNCpy(full_backup->program_version, PROGRAM_VERSION, + sizeof(full_backup->program_version)); + full_backup->parent_backup = INVALID_BACKUP_ID; + full_backup->start_lsn = dest_backup->start_lsn; + full_backup->stop_lsn = dest_backup->stop_lsn; + full_backup->recovery_time = dest_backup->recovery_time; + full_backup->recovery_xid = dest_backup->recovery_xid; + full_backup->tli = dest_backup->tli; + + pfree(full_backup->external_dir_str); + full_backup->external_dir_str = pgut_strdup(dest_backup->external_dir_str); + + full_backup->merge_time = merge_time; + full_backup->end_time = time(NULL); + + full_backup->compress_alg = dest_backup->compress_alg; + full_backup->compress_level = dest_backup->compress_level; + + /* If incremental backup is pinned, + * then result FULL backup must also be pinned. + * And reverse, if FULL backup was pinned and dest was not, + * then pinning is no more. + */ + full_backup->expire_time = dest_backup->expire_time; + + pg_free(full_backup->note); + full_backup->note = NULL; + + if (dest_backup->note) + full_backup->note = pgut_strdup(dest_backup->note); + + /* FULL backup must inherit wal mode. */ + full_backup->stream = dest_backup->stream; + + /* ARCHIVE backup must inherit wal_bytes too. + * STREAM backup will have its wal_bytes calculated by + * write_backup_filelist(). + */ + if (!dest_backup->stream) + full_backup->wal_bytes = dest_backup->wal_bytes; + + parray_qsort(result_filelist, pgFileCompareRelPathWithExternal); + + write_backup_filelist(full_backup, result_filelist, full_database_dir, NULL, true); + write_backup(full_backup, true); + + /* Delete FULL backup files, that do not exists in destination backup + * Both arrays must be sorted in in reversed order to delete from leaf + */ + parray_qsort(dest_backup->files, pgFileCompareRelPathWithExternalDesc); + parray_qsort(full_backup->files, pgFileCompareRelPathWithExternalDesc); + for (i = 0; i < parray_num(full_backup->files); i++) + { + pgFile *full_file = (pgFile *) parray_get(full_backup->files, i); + + if (full_file->external_dir_num && full_externals) + { + char *dir_name = (char *)parray_get(full_externals, full_file->external_dir_num - 1); + if (backup_contains_external(dir_name, full_externals)) + /* Dir already removed*/ + continue; + } + + if (parray_bsearch(dest_backup->files, full_file, pgFileCompareRelPathWithExternalDesc) == NULL) + { + char full_file_path[MAXPGPATH]; + + /* We need full path, file object has relative path */ + join_path_components(full_file_path, full_database_dir, full_file->rel_path); + + pgFileDelete(full_file->mode, full_file_path); + elog(VERBOSE, "Deleted \"%s\"", full_file_path); + } + } + + /* Critical section starts. + * Change status of FULL backup. + * Files are merged into FULL backup. It is time to remove incremental chain. + */ + full_backup->status = BACKUP_STATUS_MERGED; + write_backup(full_backup, true); + +merge_delete: + for (i = parray_num(parent_chain) - 2; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + delete_backup_files(backup); + } + + /* + * PAGE2 DELETED + * PAGE1 DELETED + * FULL MERGED + * If we crash now, automatic rerun of failed merge is still possible: + * The user should start merge with full backup ID as an argument to option '-i'. + */ + +merge_rename: + /* + * Rename FULL backup directory to destination backup directory. + */ + if (dest_backup) + { + elog(LOG, "Rename %s to %s", full_backup->root_dir, dest_backup->root_dir); + if (rename(full_backup->root_dir, dest_backup->root_dir) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + full_backup->root_dir, dest_backup->root_dir, strerror(errno)); + + /* update root_dir after rename */ + pg_free(full_backup->root_dir); + full_backup->root_dir = pgut_strdup(dest_backup->root_dir); + } + else + { + /* Ugly */ + char backups_dir[MAXPGPATH]; + char instance_dir[MAXPGPATH]; + char destination_path[MAXPGPATH]; + + join_path_components(backups_dir, backup_path, BACKUPS_DIR); + join_path_components(instance_dir, backups_dir, instance_name); + join_path_components(destination_path, instance_dir, + base36enc(full_backup->merge_dest_backup)); + + elog(LOG, "Rename %s to %s", full_backup->root_dir, destination_path); + if (rename(full_backup->root_dir, destination_path) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + full_backup->root_dir, destination_path, strerror(errno)); + + /* update root_dir after rename */ + pg_free(full_backup->root_dir); + full_backup->root_dir = pgut_strdup(destination_path); + } + + /* Reinit path to database_dir */ + join_path_components(full_backup->database_dir, full_backup->root_dir, DATABASE_DIR); + + /* If we crash here, it will produce full backup in MERGED + * status, located in directory with wrong backup id. + * It should not be a problem. + */ + + /* + * Merging finished, now we can safely update ID of the FULL backup + */ + dest_backup_id = base36enc_dup(full_backup->merge_dest_backup); + elog(INFO, "Rename merged full backup %s to %s", + base36enc(full_backup->start_time), dest_backup_id); + + full_backup->status = BACKUP_STATUS_OK; + full_backup->start_time = full_backup->merge_dest_backup; + full_backup->merge_dest_backup = INVALID_BACKUP_ID; + write_backup(full_backup, true); + /* Critical section end */ + + /* Cleanup */ + pg_free(dest_backup_id); + if (threads) + { + pfree(threads_args); + pfree(threads); + } + + if (result_filelist && parray_num(result_filelist) > 0) + { + parray_walk(result_filelist, pgFileFree); + parray_free(result_filelist); + } + + if (dest_externals != NULL) + free_dir_list(dest_externals); + + if (full_externals != NULL) + free_dir_list(full_externals); + + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + if (backup->files) + { + parray_walk(backup->files, pgFileFree); + parray_free(backup->files); + } + } +} + +/* + * Thread worker of merge_chain(). + */ +static void * +merge_files(void *arg) +{ + int i; + merge_files_arg *arguments = (merge_files_arg *) arg; + size_t n_files = parray_num(arguments->dest_backup->files); + + for (i = 0; i < n_files; i++) + { + pgFile *dest_file = (pgFile *) parray_get(arguments->dest_backup->files, i); + pgFile *tmp_file; + bool in_place = false; /* keep file as it is */ + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during merge"); + + if (!pg_atomic_test_set_flag(&dest_file->lock)) + continue; + + tmp_file = pgFileInit(dest_file->rel_path); + tmp_file->mode = dest_file->mode; + tmp_file->is_datafile = dest_file->is_datafile; + tmp_file->is_cfs = dest_file->is_cfs; + tmp_file->external_dir_num = dest_file->external_dir_num; + tmp_file->dbOid = dest_file->dbOid; + + /* Directories were created before */ + if (S_ISDIR(dest_file->mode)) + goto done; + + if (progress) + elog(INFO, "Progress: (%d/%lu). Merging file \"%s\"", + i + 1, n_files, dest_file->rel_path); + + if (dest_file->is_datafile && !dest_file->is_cfs) + tmp_file->segno = dest_file->segno; + + // If destination file is 0 sized, then go for the next + if (dest_file->write_size == 0) + { + if (!dest_file->is_datafile || dest_file->is_cfs) + tmp_file->crc = dest_file->crc; + + tmp_file->write_size = 0; + goto done; + } + + /* + * If file didn`t changed over the course of all incremental chain, + * then do in-place merge, unless destination backup has + * different compression algorithm. + * In-place merge is also impossible, if program version of destination + * backup differs from PROGRAM_VERSION + */ + if (arguments->program_version_match && arguments->compression_match && + !arguments->is_retry) + { + /* + * Case 1: + * in this case in place merge is possible: + * 0 PAGE; file, size BYTES_INVALID + * 1 PAGE; file, size BYTES_INVALID + * 2 FULL; file, size 100500 + * + * Case 2: + * in this case in place merge is possible: + * 0 PAGE; file, size 0 + * 1 PAGE; file, size 0 + * 2 FULL; file, size 100500 + * + * Case 3: + * in this case in place merge is impossible: + * 0 PAGE; file, size BYTES_INVALID + * 1 PAGE; file, size 100501 + * 2 FULL; file, size 100500 + * + * Case 4 (good candidate for future optimization): + * in this case in place merge is impossible: + * 0 PAGE; file, size BYTES_INVALID + * 1 PAGE; file, size 100501 + * 2 FULL; file, not exists yet + */ + + in_place = true; + + for (int j = parray_num(arguments->parent_chain) - 1; j >= 0; j--) + { + pgFile **res_file = NULL; + pgFile *file = NULL; + + pgBackup *backup = (pgBackup *) parray_get(arguments->parent_chain, j); + + /* lookup file in intermediate backup */ + res_file = (pgFile **)parray_bsearch(backup->files, dest_file, pgFileCompareRelPathWithExternal); + file = (res_file) ? *res_file : NULL; + + /* Destination file is not exists yet, + * in-place merge is impossible + */ + if (file == NULL) + { + in_place = false; + break; + } + + /* Skip file from FULL backup */ + if (backup->backup_mode == BACKUP_MODE_FULL) + continue; + + if (file->write_size != BYTES_INVALID) + { + in_place = false; + break; + } + } + } + + /* + * In-place merge means that file in FULL backup stays as it is, + * no additional actions are required. + * page header map cannot be trusted when retrying, so no + * in place merge for retry. + */ + if (in_place) + { + pgFile **res_file = NULL; + pgFile *file = NULL; + res_file = (pgFile **)parray_bsearch(arguments->full_backup->files, dest_file, + pgFileCompareRelPathWithExternal); + file = (res_file) ? *res_file : NULL; + + /* If file didn`t changed in any way, then in-place merge is possible */ + if (file && + file->n_blocks == dest_file->n_blocks) + { + BackupPageHeader2 *headers = NULL; + + elog(VERBOSE, "The file didn`t changed since FULL backup, skip merge: \"%s\"", + file->rel_path); + + tmp_file->crc = file->crc; + tmp_file->write_size = file->write_size; + + if (dest_file->is_datafile && !dest_file->is_cfs) + { + tmp_file->n_blocks = file->n_blocks; + tmp_file->compress_alg = file->compress_alg; + tmp_file->uncompressed_size = file->n_blocks * BLCKSZ; + + tmp_file->n_headers = file->n_headers; + tmp_file->hdr_crc = file->hdr_crc; + } + else + tmp_file->uncompressed_size = tmp_file->write_size; + + /* Copy header metadata from old map into a new one */ + tmp_file->n_headers = file->n_headers; + headers = get_data_file_headers(&(arguments->full_backup->hdr_map), file, + parse_program_version(arguments->full_backup->program_version), + true); + + /* sanity */ + if (!headers && file->n_headers > 0) + elog(ERROR, "Failed to get headers for file \"%s\"", file->rel_path); + + write_page_headers(headers, tmp_file, &(arguments->full_backup->hdr_map), true); + pg_free(headers); + + //TODO: report in_place merge bytes. + goto done; + } + } + + if (dest_file->is_datafile && !dest_file->is_cfs) + merge_data_file(arguments->parent_chain, + arguments->full_backup, + arguments->dest_backup, + dest_file, tmp_file, + arguments->full_database_dir, + arguments->use_bitmap, + arguments->is_retry); + else + merge_non_data_file(arguments->parent_chain, + arguments->full_backup, + arguments->dest_backup, + dest_file, tmp_file, + arguments->full_database_dir, + arguments->full_external_prefix); + +done: + parray_append(arguments->merge_filelist, tmp_file); + } + + /* Data files merging is successful */ + arguments->ret = 0; + + return NULL; +} + +/* Recursively delete a directory and its contents */ +static void +remove_dir_with_files(const char *path) +{ + parray *files = parray_new(); + int i; + char full_path[MAXPGPATH]; + + dir_list_file(files, path, false, false, true, false, false, 0, FIO_LOCAL_HOST); + parray_qsort(files, pgFileCompareRelPathWithExternalDesc); + for (i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + + join_path_components(full_path, path, file->rel_path); + + pgFileDelete(file->mode, full_path); + elog(VERBOSE, "Deleted \"%s\"", full_path); + } + + /* cleanup */ + parray_walk(files, pgFileFree); + parray_free(files); +} + +/* Get index of external directory */ +static int +get_external_index(const char *key, const parray *list) +{ + int i; + + if (!list) /* Nowhere to search */ + return -1; + for (i = 0; i < parray_num(list); i++) + { + if (strcmp(key, (char *)parray_get(list, i)) == 0) + return i + 1; + } + return -1; +} + +/* Rename directories in to_backup according to order in from_external */ +static void +reorder_external_dirs(pgBackup *to_backup, parray *to_external, + parray *from_external) +{ + char externaldir_template[MAXPGPATH]; + int i; + + join_path_components(externaldir_template, to_backup->root_dir, EXTERNAL_DIR); + for (i = 0; i < parray_num(to_external); i++) + { + int from_num = get_external_index((const char *)parray_get(to_external, i), + from_external); + if (from_num == -1) + { + char old_path[MAXPGPATH]; + makeExternalDirPathByNum(old_path, externaldir_template, i + 1); + remove_dir_with_files(old_path); + } + else if (from_num != i + 1) + { + char old_path[MAXPGPATH]; + char new_path[MAXPGPATH]; + makeExternalDirPathByNum(old_path, externaldir_template, i + 1); + makeExternalDirPathByNum(new_path, externaldir_template, from_num); + elog(VERBOSE, "Rename %s to %s", old_path, new_path); + if (rename (old_path, new_path) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + old_path, new_path, strerror(errno)); + } + } +} + +/* Merge is usually happens as usual backup/restore via temp files, unless + * file didn`t changed since FULL backup AND full a dest backup have the + * same compression algorithm. In this case file can be left as it is. + */ +void +merge_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, pgFile *tmp_file, + const char *full_database_dir, bool use_bitmap, bool is_retry) +{ + FILE *out = NULL; + char *buffer = (char *)pgut_malloc(STDIO_BUFSIZE); + char to_fullpath[MAXPGPATH]; + char to_fullpath_tmp1[MAXPGPATH]; /* used for restore */ + char to_fullpath_tmp2[MAXPGPATH]; /* used for backup */ + + /* The next possible optimization is copying "as is" the file + * from intermediate incremental backup, that didn`t changed in + * subsequent incremental backups. TODO. + */ + + /* set fullpath of destination file and temp files */ + join_path_components(to_fullpath, full_database_dir, tmp_file->rel_path); + snprintf(to_fullpath_tmp1, MAXPGPATH, "%s_tmp1", to_fullpath); + snprintf(to_fullpath_tmp2, MAXPGPATH, "%s_tmp2", to_fullpath); + + /* open temp file */ + out = fopen(to_fullpath_tmp1, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open merge target file \"%s\": %s", + to_fullpath_tmp1, strerror(errno)); + setvbuf(out, buffer, _IOFBF, STDIO_BUFSIZE); + + /* restore file into temp file */ + tmp_file->size = restore_data_file(parent_chain, dest_file, out, to_fullpath_tmp1, + use_bitmap, NULL, InvalidXLogRecPtr, NULL, + /* when retrying merge header map cannot be trusted */ + is_retry ? false : true); + if (fclose(out) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", + to_fullpath_tmp1, strerror(errno)); + + pg_free(buffer); + + /* tmp_file->size is greedy, even if there is single 8KB block in file, + * that was overwritten twice during restore_data_file, we would assume that its size is + * 16KB. + * TODO: maybe we should just trust dest_file->n_blocks? + * No, we can`t, because current binary can be used to merge + * 2 backups of old versions, where n_blocks is missing. + */ + + backup_data_file(NULL, tmp_file, to_fullpath_tmp1, to_fullpath_tmp2, + InvalidXLogRecPtr, BACKUP_MODE_FULL, + dest_backup->compress_alg, dest_backup->compress_level, + dest_backup->checksum_version, + &(full_backup->hdr_map), true); + + /* drop restored temp file */ + if (unlink(to_fullpath_tmp1) == -1) + elog(ERROR, "Cannot remove file \"%s\": %s", to_fullpath_tmp1, + strerror(errno)); + + /* + * In old (=<2.2.7) versions of pg_probackup n_blocks attribute of files + * in PAGE and PTRACK wasn`t filled. + */ + //Assert(tmp_file->n_blocks == dest_file->n_blocks); + + /* Backward compatibility kludge: + * When merging old backups, it is possible that + * to_fullpath_tmp2 size will be 0, and so it will be + * truncated in backup_data_file(). + * TODO: remove in 3.0.0 + */ + if (tmp_file->write_size == 0) + return; + + /* sync second temp file to disk */ + if (fio_sync(to_fullpath_tmp2, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync merge temp file \"%s\": %s", + to_fullpath_tmp2, strerror(errno)); + + /* Do atomic rename from second temp file to destination file */ + if (rename(to_fullpath_tmp2, to_fullpath) == -1) + elog(ERROR, "Could not rename file \"%s\" to \"%s\": %s", + to_fullpath_tmp2, to_fullpath, strerror(errno)); + + /* drop temp file */ + unlink(to_fullpath_tmp1); +} + +/* + * For every destionation file lookup the newest file in chain and + * copy it. + * Additional pain is external directories. + */ +void +merge_non_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, pgFile *tmp_file, + const char *full_database_dir, const char *to_external_prefix) +{ + int i; + char to_fullpath[MAXPGPATH]; + char to_fullpath_tmp[MAXPGPATH]; /* used for backup */ + char from_fullpath[MAXPGPATH]; + pgBackup *from_backup = NULL; + pgFile *from_file = NULL; + + /* We need to make full path to destination file */ + if (dest_file->external_dir_num) + { + char temp[MAXPGPATH]; + makeExternalDirPathByNum(temp, to_external_prefix, + dest_file->external_dir_num); + join_path_components(to_fullpath, temp, dest_file->rel_path); + } + else + join_path_components(to_fullpath, full_database_dir, dest_file->rel_path); + + snprintf(to_fullpath_tmp, MAXPGPATH, "%s_tmp", to_fullpath); + + /* + * Iterate over parent chain starting from direct parent of destination + * backup to oldest backup in chain, and look for the first + * full copy of destination file. + * Full copy is latest possible destination file with size equal(!) + * or greater than zero. + */ + for (i = 0; i < parray_num(parent_chain); i++) + { + pgFile **res_file = NULL; + from_backup = (pgBackup *) parray_get(parent_chain, i); + + /* lookup file in intermediate backup */ + res_file = (pgFile **)parray_bsearch(from_backup->files, dest_file, pgFileCompareRelPathWithExternal); + from_file = (res_file) ? *res_file : NULL; + + /* + * It should not be possible not to find source file in intermediate + * backup, without encountering full copy first. + */ + if (!from_file) + { + elog(ERROR, "Failed to locate nonedata file \"%s\" in backup %s", + dest_file->rel_path, base36enc(from_backup->start_time)); + continue; + } + + if (from_file->write_size > 0) + break; + } + + /* sanity */ + if (!from_backup) + elog(ERROR, "Failed to found a backup containing full copy of nonedata file \"%s\"", + dest_file->rel_path); + + if (!from_file) + elog(ERROR, "Failed to locate a full copy of nonedata file \"%s\"", dest_file->rel_path); + + /* set path to source file */ + if (from_file->external_dir_num) + { + char temp[MAXPGPATH]; + char external_prefix[MAXPGPATH]; + + join_path_components(external_prefix, from_backup->root_dir, EXTERNAL_DIR); + makeExternalDirPathByNum(temp, external_prefix, dest_file->external_dir_num); + + join_path_components(from_fullpath, temp, from_file->rel_path); + } + else + { + char backup_database_dir[MAXPGPATH]; + join_path_components(backup_database_dir, from_backup->root_dir, DATABASE_DIR); + join_path_components(from_fullpath, backup_database_dir, from_file->rel_path); + } + + /* Copy file to FULL backup directory into temp file */ + backup_non_data_file(tmp_file, NULL, from_fullpath, + to_fullpath_tmp, BACKUP_MODE_FULL, 0, false); + + /* sync temp file to disk */ + if (fio_sync(to_fullpath_tmp, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync merge temp file \"%s\": %s", + to_fullpath_tmp, strerror(errno)); + + /* Do atomic rename from second temp file to destination file */ + if (rename(to_fullpath_tmp, to_fullpath) == -1) + elog(ERROR, "Could not rename file \"%s\" to \"%s\": %s", + to_fullpath_tmp, to_fullpath, strerror(errno)); + +} diff --git a/src/bin/pg_probackup/parray.cpp b/src/bin/pg_probackup/parray.cpp new file mode 100644 index 000000000..d1c01d8d8 --- /dev/null +++ b/src/bin/pg_probackup/parray.cpp @@ -0,0 +1,213 @@ +/*------------------------------------------------------------------------- + * + * parray.c: pointer array collection. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "parray.h" +#include "pgut.h" + +/* members of struct parray are hidden from client. */ +struct parray +{ + void **data; /* pointer array, expanded if necessary */ + size_t alloced; /* number of elements allocated */ + size_t used; /* number of elements in use */ +}; + +/* + * Create new parray object. + * Never returns NULL. + */ +parray * +parray_new(void) +{ + parray *a = pgut_new(parray); + + a->data = NULL; + a->used = 0; + a->alloced = 0; + + parray_expand(a, 1024); + + return a; +} + +/* + * Expand array pointed by data to newsize. + * Elements in expanded area are initialized to NULL. + * Note: never returns NULL. + */ +void +parray_expand(parray *array, size_t newsize) +{ + void **p; + + /* already allocated */ + if (newsize <= array->alloced) + return; + + p = (void **)pgut_realloc(array->data, sizeof(void *) * newsize); + + /* initialize expanded area to NULL */ + memset(p + array->alloced, 0, (newsize - array->alloced) * sizeof(void *)); + + array->alloced = newsize; + array->data = p; +} + +void +parray_free(parray *array) +{ + if (array == NULL) + return; + free(array->data); + free(array); +} + +void +parray_append(parray *array, void *elem) +{ + if (array->used + 1 > array->alloced) + parray_expand(array, array->alloced * 2); + + array->data[array->used++] = elem; +} + +void +parray_insert(parray *array, size_t index, void *elem) +{ + if (array->used + 1 > array->alloced) + parray_expand(array, array->alloced * 2); + + memmove(array->data + index + 1, array->data + index, + (array->alloced - index - 1) * sizeof(void *)); + array->data[index] = elem; + + /* adjust used count */ + if (array->used < index + 1) + array->used = index + 1; + else + array->used++; +} + +/* + * Concatenate two parray. + * parray_concat() appends the copy of the content of src to the end of dest. + */ +parray * +parray_concat(parray *dest, const parray *src) +{ + /* expand head array */ + parray_expand(dest, dest->used + src->used); + + /* copy content of src after content of dest */ + memcpy(dest->data + dest->used, src->data, src->used * sizeof(void *)); + dest->used += parray_num(src); + + return dest; +} + +void +parray_set(parray *array, size_t index, void *elem) +{ + if (index > array->alloced - 1) + parray_expand(array, index + 1); + + array->data[index] = elem; + + /* adjust used count */ + if (array->used < index + 1) + array->used = index + 1; +} + +void * +parray_get(const parray *array, size_t index) +{ + if (index > array->alloced - 1) + return NULL; + return array->data[index]; +} + +void * +parray_remove(parray *array, size_t index) +{ + void *val; + + /* removing unused element */ + if (index > array->used) + return NULL; + + val = array->data[index]; + + /* Do not move if the last element was removed. */ + if (index < array->alloced - 1) + memmove(array->data + index, array->data + index + 1, + (array->alloced - index - 1) * sizeof(void *)); + + /* adjust used count */ + array->used--; + + return val; +} + +bool +parray_rm(parray *array, const void *key, int(*compare)(const void *, const void *)) +{ + int i; + + for (i = 0; i < array->used; i++) + { + if (compare(&key, &array->data[i]) == 0) + { + parray_remove(array, i); + return true; + } + } + return false; +} + +size_t +parray_num(const parray *array) +{ + return array->used; +} + +void +parray_qsort(parray *array, int(*compare)(const void *, const void *)) +{ + qsort(array->data, array->used, sizeof(void *), compare); +} + +void +parray_walk(parray *array, void (*action)(void *)) +{ + int i; + for (i = 0; i < array->used; i++) + action(array->data[i]); +} + +void * +parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const void *)) +{ + return bsearch(&key, array->data, array->used, sizeof(void *), compare); +} + +/* checks that parray contains element */ +bool parray_contains(parray *array, void *elem) +{ + int i; + + for (i = 0; i < parray_num(array); i++) + { + if (parray_get(array, i) == elem) + return true; + } + return false; +} diff --git a/src/bin/pg_probackup/parray.h b/src/bin/pg_probackup/parray.h new file mode 100644 index 000000000..85d7383f3 --- /dev/null +++ b/src/bin/pg_probackup/parray.h @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * + * parray.h: pointer array collection. + * + * Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * + *------------------------------------------------------------------------- + */ + +#ifndef PARRAY_H +#define PARRAY_H + +/* + * "parray" hold pointers to objects in a linear memory area. + * Client use "parray *" to access parray object. + */ +typedef struct parray parray; + +extern parray *parray_new(void); +extern void parray_expand(parray *array, size_t newnum); +extern void parray_free(parray *array); +extern void parray_append(parray *array, void *val); +extern void parray_insert(parray *array, size_t index, void *val); +extern parray *parray_concat(parray *head, const parray *tail); +extern void parray_set(parray *array, size_t index, void *val); +extern void *parray_get(const parray *array, size_t index); +extern void *parray_remove(parray *array, size_t index); +extern bool parray_rm(parray *array, const void *key, int(*compare)(const void *, const void *)); +extern size_t parray_num(const parray *array); +extern void parray_qsort(parray *array, int(*compare)(const void *, const void *)); +extern void *parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const void *)); +extern void parray_walk(parray *array, void (*action)(void *)); +extern bool parray_contains(parray *array, void *elem); + +#endif /* PARRAY_H */ + diff --git a/src/bin/pg_probackup/parsexlog.cpp b/src/bin/pg_probackup/parsexlog.cpp new file mode 100644 index 000000000..c7cd1921a --- /dev/null +++ b/src/bin/pg_probackup/parsexlog.cpp @@ -0,0 +1,1890 @@ +/*------------------------------------------------------------------------- + * + * parsexlog.c + * Functions for reading Write-Ahead-Log + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include "access/transam.h" +#include "catalog/pg_control.h" +#include "commands/dbcommands.h" +#include "catalog/storage_xlog.h" + +#ifdef HAVE_LIBZ +#include +#endif + +#include "thread.h" +#include +#include +#include "common/fe_memutils.h" + +/* + * RmgrNames is an array of resource manager names, to make error messages + * a bit nicer. + */ +#if PG_VERSION_NUM >= 100000 +#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask) \ + name, +#else +#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup) \ + name, +#endif + +static const char *RmgrNames[RM_MAX_ID + 1] = { +#include "access/rmgrlist.h" +}; + +/* some from access/xact.h */ +/* + * XLOG allows to store some information in high 4 bits of log record xl_info + * field. We use 3 for the opcode, and one about an optional flag variable. + */ +#define XLOG_XACT_COMMIT 0x00 +#define XLOG_XACT_PREPARE 0x10 +#define XLOG_XACT_ABORT 0x20 +#define XLOG_XACT_COMMIT_PREPARED 0x30 +#define XLOG_XACT_ABORT_PREPARED 0x40 +#define XLOG_XACT_ASSIGNMENT 0x50 +/* free opcode 0x60 */ +/* free opcode 0x70 */ + +/* mask for filtering opcodes out of xl_info */ +#define XLOG_XACT_OPMASK 0x70 + +typedef struct xl_xact_commit_local +{ + TimestampTz xact_time; /* time of commit */ + + /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ + /* xl_xact_dbinfo follows if XINFO_HAS_DBINFO */ + /* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */ + /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ + /* xl_xact_invals follows if XINFO_HAS_INVALS */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ +} xl_xact_commit_local; + +typedef struct xl_xact_abort_local +{ + TimestampTz xact_time; /* time of abort */ + + /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ + /* No db_info required */ + /* xl_xact_subxacts follows if HAS_SUBXACT */ + /* xl_xact_relfilenodes follows if HAS_RELFILENODES */ + /* No invalidation messages needed. */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ +} xl_xact_abort_local; + +/* + * XLogRecTarget allows to track the last recovery targets. Currently used only + * within validate_wal(). + */ +typedef struct XLogRecTarget +{ + TimestampTz rec_time; + TransactionId rec_xid; + XLogRecPtr rec_lsn; +} XLogRecTarget; + +typedef struct XLogReaderData +{ + int thread_num; + TimeLineID tli; + + XLogRecTarget cur_rec; + XLogSegNo xlogsegno; + bool xlogexists; + + char page_buf[XLOG_BLCKSZ]; + uint32 prev_page_off; + + bool need_switch; + + int xlogfile; + char xlogpath[MAXPGPATH]; + +#ifdef HAVE_LIBZ + gzFile gz_xlogfile; + char gz_xlogpath[MAXPGPATH]; +#endif +} XLogReaderData; + +/* Function to process a WAL record */ +typedef void (*xlog_record_function) (XLogReaderState *record, + XLogReaderData *reader_data, + bool *stop_reading); + +/* An argument for a thread function */ +typedef struct +{ + XLogReaderData reader_data; + + xlog_record_function process_record; + + XLogRecPtr startpoint; + XLogRecPtr endpoint; + XLogSegNo endSegNo; + + /* + * The thread got the recovery target. + */ + bool got_target; + + /* Should we read record, located at endpoint position */ + bool inclusive_endpoint; + + /* + * Return value from the thread. + * 0 means there is no error, 1 - there is an error. + */ + int ret; +} xlog_thread_arg; + +static int SimpleXLogPageRead_local(XLogReaderState *xlogreader, + XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, char *readBuf, + TimeLineID *pageTLI); +static XLogReaderState *InitXLogPageRead(XLogReaderData *reader_data, + const char *archivedir, + TimeLineID tli, uint32 segment_size, + bool manual_switch, + bool consistent_read, + bool allocate_reader); +static bool RunXLogThreads(const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, + TimeLineID tli, uint32 segment_size, + XLogRecPtr startpoint, XLogRecPtr endpoint, + bool consistent_read, + xlog_record_function process_record, + XLogRecTarget *last_rec, + bool inclusive_endpoint); +//static XLogReaderState *InitXLogThreadRead(xlog_thread_arg *arg); +static bool SwitchThreadToNextWal(XLogReaderState *xlogreader, + xlog_thread_arg *arg); +static bool XLogWaitForConsistency(XLogReaderState *xlogreader); +static void *XLogThreadWorker(void *arg); +static void CleanupXLogPageRead(XLogReaderState *xlogreader); +static void PrintXLogCorruptionMsg(XLogReaderData *reader_data, int elevel); + +static void extractPageInfo(XLogReaderState *record, + XLogReaderData *reader_data, bool *stop_reading); +static void validateXLogRecord(XLogReaderState *record, + XLogReaderData *reader_data, bool *stop_reading); +static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime); + +static XLogSegNo segno_start = 0; +/* Segment number where target record is located */ +static XLogSegNo segno_target = 0; +/* Next segment number to read by a thread */ +static XLogSegNo segno_next = 0; +/* Number of segments already read by threads */ +static uint32 segnum_read = 0; +/* Number of detected corrupted or absent segments */ +static uint32 segnum_corrupted = 0; +static pthread_mutex_t wal_segment_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* copied from timestamp.c */ +pg_time_t +timestamptz_to_time_t(TimestampTz t) +{ + pg_time_t result; + +#ifdef HAVE_INT64_TIMESTAMP + result = (pg_time_t) (t / USECS_PER_SEC + + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)); +#else + result = (pg_time_t) (t + + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)); +#endif + return result; +} + +static const char *wal_archivedir = NULL; +static uint32 wal_seg_size = 0; +/* + * If true a wal reader thread switches to the next segment using + * segno_next. + */ +static bool wal_manual_switch = false; +/* + * If true a wal reader thread waits for other threads if the thread met absent + * wal segment. + */ +static bool wal_consistent_read = false; + +/* + * Variables used within validate_wal() and validateXLogRecord() to stop workers + */ +static time_t wal_target_time = 0; +static TransactionId wal_target_xid = InvalidTransactionId; +static XLogRecPtr wal_target_lsn = InvalidXLogRecPtr; + +/* + * Read WAL from the archive directory, from 'startpoint' to 'endpoint' on the + * given timeline. Collect data blocks touched by the WAL records into a page map. + * + * Pagemap extracting is processed using threads. Each thread reads single WAL + * file. + */ +bool +extractPageMap(const char *archivedir, uint32 wal_seg_size, + XLogRecPtr startpoint, TimeLineID start_tli, + XLogRecPtr endpoint, TimeLineID end_tli, + parray *tli_list) +{ + bool extract_isok = false; + + if (start_tli == end_tli) + /* easy case */ + extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, end_tli, wal_seg_size, + startpoint, endpoint, false, extractPageInfo, + NULL, true); + else + { + /* We have to process WAL located on several different xlog intervals, + * located on different timelines. + * + * Consider this example: + * t3 C-----X + * / + * t1 -A----*-------> + * + * A - prev backup START_LSN + * B - switchpoint for t2, available as t2->switchpoint + * C - switch for t3, available as t3->switchpoint + * X - current backup START_LSN + * + * Intervals to be parsed: + * - [A,B) on t1 + * - [B,C) on t2 + * - [C,X] on t3 + */ + int i; + parray *interval_list = parray_new(); + timelineInfo *end_tlinfo = NULL; + timelineInfo *tmp_tlinfo = NULL; + XLogRecPtr prev_switchpoint = InvalidXLogRecPtr; + + /* We must find TLI information about final timeline (t3 in example) */ + for (i = 0; i < (int)parray_num(tli_list); i++) + { + tmp_tlinfo = (timelineInfo *)parray_get(tli_list, i); + + if (tmp_tlinfo->tli == end_tli) + { + end_tlinfo = tmp_tlinfo; + break; + } + } + + /* Iterate over timelines backward, + * starting with end_tli and ending with start_tli. + * For every timeline calculate LSN-interval that must be parsed. + */ + + tmp_tlinfo = end_tlinfo; + while (tmp_tlinfo) + { + lsnInterval *wal_interval = (lsnInterval *)pgut_malloc(sizeof(lsnInterval)); + wal_interval->tli = tmp_tlinfo->tli; + + if (tmp_tlinfo->tli == end_tli) + { + wal_interval->begin_lsn = tmp_tlinfo->switchpoint; + wal_interval->end_lsn = endpoint; + } + else if (tmp_tlinfo->tli == start_tli) + { + wal_interval->begin_lsn = startpoint; + wal_interval->end_lsn = prev_switchpoint; + } + else + { + wal_interval->begin_lsn = tmp_tlinfo->switchpoint; + wal_interval->end_lsn = prev_switchpoint; + } + + parray_append(interval_list, wal_interval); + + if (tmp_tlinfo->tli == start_tli) + break; + + prev_switchpoint = tmp_tlinfo->switchpoint; + tmp_tlinfo = tmp_tlinfo->parent_link; + } + + for (i = parray_num(interval_list) - 1; i >= 0; i--) + { + bool inclusive_endpoint; + lsnInterval *tmp_interval = (lsnInterval *) parray_get(interval_list, i); + + /* In case of replica promotion, endpoints of intermediate + * timelines can be unreachable. + */ + inclusive_endpoint = false; + + /* ... but not the end timeline */ + if (tmp_interval->tli == end_tli) + inclusive_endpoint = true; + + extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tmp_interval->tli, wal_seg_size, + tmp_interval->begin_lsn, tmp_interval->end_lsn, + false, extractPageInfo, NULL, inclusive_endpoint); + if (!extract_isok) + break; + + pg_free(tmp_interval); + } + pg_free(interval_list); + } + + return extract_isok; +} + +/* + * Ensure that the backup has all wal files needed for recovery to consistent + * state. + * + * WAL records reading is processed using threads. Each thread reads single WAL + * file. + */ +static void +validate_backup_wal_from_start_to_stop(pgBackup *backup, + const char *archivedir, TimeLineID tli, + uint32 xlog_seg_size) +{ + bool got_endpoint; + + got_endpoint = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tli, xlog_seg_size, + backup->start_lsn, backup->stop_lsn, + false, NULL, NULL, true); + + if (!got_endpoint) + { + /* + * If we don't have WAL between start_lsn and stop_lsn, + * the backup is definitely corrupted. Update its status. + */ + write_backup_status(backup, BACKUP_STATUS_CORRUPT, instance_name, true); + + elog(WARNING, "There are not enough WAL records to consistenly restore " + "backup %s from START LSN: %X/%X to STOP LSN: %X/%X", + base36enc(backup->start_time), + (uint32) (backup->start_lsn >> 32), + (uint32) (backup->start_lsn), + (uint32) (backup->stop_lsn >> 32), + (uint32) (backup->stop_lsn)); + } +} + +/* + * Ensure that the backup has all wal files needed for recovery to consistent + * state. And check if we have in archive all files needed to restore the backup + * up to the given recovery target. + */ +void +validate_wal(pgBackup *backup, const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, TimeLineID tli, uint32 wal_seg_size) +{ + const char *backup_id; + XLogRecTarget last_rec; + char last_timestamp[100], + target_timestamp[100]; + bool all_wal = false; + + /* We need free() this later */ + backup_id = base36enc(backup->start_time); + + if (!XRecOffIsValid(backup->start_lsn)) + elog(ERROR, "Invalid start_lsn value %X/%X of backup %s", + (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn), + backup_id); + + if (!XRecOffIsValid(backup->stop_lsn)) + elog(ERROR, "Invalid stop_lsn value %X/%X of backup %s", + (uint32) (backup->stop_lsn >> 32), (uint32) (backup->stop_lsn), + backup_id); + + /* + * Check that the backup has all wal files needed + * for recovery to consistent state. + */ + if (backup->stream) + { + char backup_database_dir[MAXPGPATH]; + char backup_xlog_path[MAXPGPATH]; + + join_path_components(backup_database_dir, backup->root_dir, DATABASE_DIR); + join_path_components(backup_xlog_path, backup_database_dir, PG_XLOG_DIR); + + validate_backup_wal_from_start_to_stop(backup, backup_xlog_path, tli, + wal_seg_size); + } + else + validate_backup_wal_from_start_to_stop(backup, (char *) archivedir, tli, + wal_seg_size); + + if (backup->status == BACKUP_STATUS_CORRUPT) + { + elog(WARNING, "Backup %s WAL segments are corrupted", backup_id); + return; + } + /* + * If recovery target is provided check that we can restore backup to a + * recovery target time or xid. + */ + if (!TransactionIdIsValid(target_xid) && target_time == 0 && + !XRecOffIsValid(target_lsn)) + { + /* Recovery target is not given so exit */ + elog(INFO, "Backup %s WAL segments are valid", backup_id); + return; + } + + /* + * If recovery target is provided, ensure that archive files exist in + * archive directory. + */ + if (dir_is_empty(archivedir, FIO_LOCAL_HOST)) + elog(ERROR, "WAL archive is empty. You cannot restore backup to a recovery target without WAL archive."); + + /* + * Check if we have in archive all files needed to restore backup + * up to the given recovery target. + * In any case we cannot restore to the point before stop_lsn. + */ + + /* We can restore at least up to the backup end */ + last_rec.rec_time = 0; + last_rec.rec_xid = backup->recovery_xid; + last_rec.rec_lsn = backup->stop_lsn; + + time2iso(last_timestamp, lengthof(last_timestamp), backup->recovery_time); + + if ((TransactionIdIsValid(target_xid) && target_xid == last_rec.rec_xid) + || (target_time != 0 && backup->recovery_time >= target_time) + || (XRecOffIsValid(target_lsn) && last_rec.rec_lsn >= target_lsn)) + all_wal = true; + + all_wal = all_wal || + RunXLogThreads(archivedir, target_time, target_xid, target_lsn, + tli, wal_seg_size, backup->stop_lsn, + InvalidXLogRecPtr, true, validateXLogRecord, &last_rec, true); + if (last_rec.rec_time > 0) + time2iso(last_timestamp, lengthof(last_timestamp), + timestamptz_to_time_t(last_rec.rec_time)); + + /* There are all needed WAL records */ + if (all_wal) + elog(INFO, "Backup validation completed successfully on time %s, xid " XID_FMT " and LSN %X/%X", + last_timestamp, last_rec.rec_xid, + (uint32) (last_rec.rec_lsn >> 32), (uint32) last_rec.rec_lsn); + /* Some needed WAL records are absent */ + else + { + elog(WARNING, "Recovery can be done up to time %s, xid " XID_FMT " and LSN %X/%X", + last_timestamp, last_rec.rec_xid, + (uint32) (last_rec.rec_lsn >> 32), (uint32) last_rec.rec_lsn); + + if (target_time > 0) + time2iso(target_timestamp, lengthof(target_timestamp), target_time); + if (TransactionIdIsValid(target_xid) && target_time != 0) + elog(ERROR, "Not enough WAL records to time %s and xid " XID_FMT, + target_timestamp, target_xid); + else if (TransactionIdIsValid(target_xid)) + elog(ERROR, "Not enough WAL records to xid " XID_FMT, + target_xid); + else if (target_time != 0) + elog(ERROR, "Not enough WAL records to time %s", + target_timestamp); + else if (XRecOffIsValid(target_lsn)) + elog(ERROR, "Not enough WAL records to lsn %X/%X", + (uint32) (target_lsn >> 32), (uint32) (target_lsn)); + } +} + +/* + * Read from archived WAL segments latest recovery time and xid. All necessary + * segments present at archive folder. We waited **stop_lsn** in + * pg_stop_backup(). + */ +bool +read_recovery_info(const char *archivedir, TimeLineID tli, uint32 wal_seg_size, + XLogRecPtr start_lsn, XLogRecPtr stop_lsn, + time_t *recovery_time) +{ + XLogRecPtr startpoint = stop_lsn; + XLogReaderState *xlogreader; + XLogReaderData reader_data; + bool res; + + if (!XRecOffIsValid(start_lsn)) + elog(ERROR, "Invalid start_lsn value %X/%X", + (uint32) (start_lsn >> 32), (uint32) (start_lsn)); + + if (!XRecOffIsValid(stop_lsn)) + elog(ERROR, "Invalid stop_lsn value %X/%X", + (uint32) (stop_lsn >> 32), (uint32) (stop_lsn)); + + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, true, true); + + /* Read records from stop_lsn down to start_lsn */ + do + { + XLogRecord *record; + TimestampTz last_time = 0; + char *errormsg; + + record = XLogReadRecord(xlogreader, startpoint, &errormsg); + if (record == NULL) + { + XLogRecPtr errptr; + + errptr = startpoint ? startpoint : xlogreader->EndRecPtr; + + if (errormsg) + elog(ERROR, "Could not read WAL record at %X/%X: %s", + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(ERROR, "Could not read WAL record at %X/%X", + (uint32) (errptr >> 32), (uint32) (errptr)); + } + + /* Read previous record */ + startpoint = record->xl_prev; + + if (getRecordTimestamp(xlogreader, &last_time)) + { + *recovery_time = timestamptz_to_time_t(last_time); + + /* Found timestamp in WAL record 'record' */ + res = true; + goto cleanup; + } + } while (startpoint >= start_lsn); + + /* Didn't find timestamp from WAL records between start_lsn and stop_lsn */ + res = false; + +cleanup: + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + return res; +} + +/* + * Check if there is a WAL segment file in 'archivedir' which contains + * 'target_lsn'. + */ +bool +wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, + TimeLineID target_tli, uint32 wal_seg_size) +{ + XLogReaderState *xlogreader; + XLogReaderData reader_data; + char *errormsg; + bool res; + + if (!XRecOffIsValid(target_lsn)) + elog(ERROR, "Invalid target_lsn value %X/%X", + (uint32) (target_lsn >> 32), (uint32) (target_lsn)); + + xlogreader = InitXLogPageRead(&reader_data, archivedir, target_tli, + wal_seg_size, false, false, true); + + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + + xlogreader->system_identifier = instance_config.system_identifier; + + res = XLogReadRecord(xlogreader, target_lsn, &errormsg) != NULL; + /* Didn't find 'target_lsn' and there is no error, return false */ + + if (errormsg) + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (target_lsn >> 32), (uint32) (target_lsn), errormsg); + + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + return res; +} + +/* + * Get LSN of a first record within the WAL segment with number 'segno'. + */ +XLogRecPtr +get_first_record_lsn(const char *archivedir, XLogSegNo segno, + TimeLineID tli, uint32 wal_seg_size, int timeout) +{ + XLogReaderState *xlogreader; + XLogReaderData reader_data; + XLogRecPtr record = InvalidXLogRecPtr; + XLogRecPtr startpoint; + char wal_segment[MAXFNAMELEN]; + int attempts = 0; + + if (segno <= 1) + elog(ERROR, "Invalid WAL segment number " UINT64_FORMAT, segno); + + GetXLogFileName(wal_segment, tli, segno, instance_config.xlog_seg_size); + + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, false, true); + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + xlogreader->system_identifier = instance_config.system_identifier; + + /* Set startpoint to 0 in segno */ + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + + while (attempts <= timeout) + { + record = XLogFindNextRecord(xlogreader, startpoint); + + if (XLogRecPtrIsInvalid(record)) + record = InvalidXLogRecPtr; + else + { + elog(LOG, "First record in WAL segment \"%s\": %X/%X", wal_segment, + (uint32) (record >> 32), (uint32) (record)); + break; + } + + attempts++; + sleep(1); + } + + /* cleanup */ + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + return record; +} + + +/* + * Get LSN of the record next after target lsn. + */ +XLogRecPtr +get_next_record_lsn(const char *archivedir, XLogSegNo segno, + TimeLineID tli, uint32 wal_seg_size, int timeout, + XLogRecPtr target) +{ + XLogReaderState *xlogreader; + XLogReaderData reader_data; + XLogRecPtr startpoint, found; + XLogRecPtr res = InvalidXLogRecPtr; + char wal_segment[MAXFNAMELEN]; + int attempts = 0; + + if (segno <= 1) + elog(ERROR, "Invalid WAL segment number " UINT64_FORMAT, segno); + + GetXLogFileName(wal_segment, tli, segno, instance_config.xlog_seg_size); + + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, false, true); + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + xlogreader->system_identifier = instance_config.system_identifier; + + /* Set startpoint to 0 in segno */ + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + + found = XLogFindNextRecord(xlogreader, startpoint); + + if (XLogRecPtrIsInvalid(found)) + { + if (xlogreader->errormsg_buf[0] != '\0') + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (startpoint >> 32), (uint32) (startpoint), + xlogreader->errormsg_buf); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (startpoint >> 32), (uint32) (startpoint)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + startpoint = found; + + while (attempts <= timeout) + { + XLogRecord *record; + char *errormsg; + + if (interrupted) + elog(ERROR, "Interrupted during WAL reading"); + + record = XLogReadRecord(xlogreader, startpoint, &errormsg); + + if (record == NULL) + { + XLogRecPtr errptr; + + errptr = XLogRecPtrIsInvalid(startpoint) ? xlogreader->EndRecPtr : + startpoint; + + if (errormsg) + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (errptr >> 32), (uint32) (errptr)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + + if (xlogreader->ReadRecPtr >= target) + { + elog(LOG, "Record %X/%X is next after target LSN %X/%X", + (uint32) (xlogreader->ReadRecPtr >> 32), (uint32) (xlogreader->ReadRecPtr), + (uint32) (target >> 32), (uint32) (target)); + res = xlogreader->ReadRecPtr; + break; + } + else + startpoint = InvalidXLogRecPtr; + } + + /* cleanup */ + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + return res; +} + + +/* + * Get LSN of a record prior to target_lsn. + * If 'start_lsn' is in the segment with number 'segno' then start from 'start_lsn', + * otherwise start from offset 0 within the segment. + * + * Returns LSN of a record which EndRecPtr is greater or equal to target_lsn. + * If 'seek_prev_segment' is true, then look for prior record in prior WAL segment. + * + * it's unclear that "last" in "last_wal_lsn" refers to the + * "closest to stop_lsn backward or forward, depending on seek_prev_segment setting". + */ +XLogRecPtr +get_prior_record_lsn(const char *archivedir, XLogRecPtr start_lsn, + XLogRecPtr stop_lsn, TimeLineID tli, bool seek_prev_segment, + uint32 wal_seg_size) +{ + XLogReaderState *xlogreader; + XLogReaderData reader_data; + XLogRecPtr startpoint; + XLogSegNo start_segno; + XLogSegNo segno; + XLogRecPtr res = InvalidXLogRecPtr; + + GetXLogSegNo(stop_lsn, segno, wal_seg_size); + + if (segno <= 1) + elog(ERROR, "Invalid WAL segment number " UINT64_FORMAT, segno); + + if (seek_prev_segment) + segno = segno - 1; + + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, false, true); + + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + + xlogreader->system_identifier = instance_config.system_identifier; + + /* + * Calculate startpoint. Decide: we should use 'start_lsn' or offset 0. + */ + GetXLogSegNo(start_lsn, start_segno, wal_seg_size); + if (start_segno == segno) + startpoint = start_lsn; + else + { + XLogRecPtr found; + + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + found = XLogFindNextRecord(xlogreader, startpoint); + + if (XLogRecPtrIsInvalid(found)) + { + if (xlogreader->errormsg_buf[0] != '\0') + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (startpoint >> 32), (uint32) (startpoint), + xlogreader->errormsg_buf); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (startpoint >> 32), (uint32) (startpoint)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + startpoint = found; + } + + while (true) + { + XLogRecord *record; + char *errormsg; + + if (interrupted) + elog(ERROR, "Interrupted during WAL reading"); + + record = XLogReadRecord(xlogreader, startpoint, &errormsg); + if (record == NULL) + { + XLogRecPtr errptr; + + errptr = XLogRecPtrIsInvalid(startpoint) ? xlogreader->EndRecPtr : + startpoint; + + if (errormsg) + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (errptr >> 32), (uint32) (errptr)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + + if (xlogreader->EndRecPtr >= stop_lsn) + { + elog(LOG, "Record %X/%X has endpoint %X/%X which is equal or greater than requested LSN %X/%X", + (uint32) (xlogreader->ReadRecPtr >> 32), (uint32) (xlogreader->ReadRecPtr), + (uint32) (xlogreader->EndRecPtr >> 32), (uint32) (xlogreader->EndRecPtr), + (uint32) (stop_lsn >> 32), (uint32) (stop_lsn)); + res = xlogreader->ReadRecPtr; + break; + } + + /* continue reading at next record */ + startpoint = InvalidXLogRecPtr; + } + + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + return res; +} + +#ifdef HAVE_LIBZ +/* + * Show error during work with compressed file + */ +static const char * +get_gz_error(gzFile gzf) +{ + int errnum; + const char *errmsg; + + errmsg = fio_gzerror(gzf, &errnum); + if (errnum == Z_ERRNO) + return strerror(errno); + else + return errmsg; +} +#endif + +/* XLogreader callback function, to read a WAL page */ +static int +SimpleXLogPageRead_local(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, char *readBuf, + TimeLineID *pageTLI) +{ + XLogReaderData *reader_data; + uint32 targetPageOff; + + reader_data = (XLogReaderData *) xlogreader->private_data; + targetPageOff = targetPagePtr % wal_seg_size; + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + /* + * See if we need to switch to a new segment because the requested record + * is not in the currently open one. + */ + if (!IsInXLogSeg(targetPagePtr, reader_data->xlogsegno, wal_seg_size)) + { + elog(VERBOSE, "Thread [%d]: Need to switch to the next WAL segment, page LSN %X/%X, record being read LSN %X/%X", + reader_data->thread_num, + (uint32) (targetPagePtr >> 32), (uint32) (targetPagePtr), + (uint32) (xlogreader->currRecPtr >> 32), + (uint32) (xlogreader->currRecPtr )); + + /* + * If the last record on the page is not complete, + * we must continue reading pages in the same thread + */ + if (!XLogRecPtrIsInvalid(xlogreader->currRecPtr) && + xlogreader->currRecPtr < targetPagePtr) + { + CleanupXLogPageRead(xlogreader); + + /* + * Switch to the next WAL segment after reading contrecord. + */ + if (wal_manual_switch) + reader_data->need_switch = true; + } + else + { + CleanupXLogPageRead(xlogreader); + /* + * Do not switch to next WAL segment in this function. It is + * manually switched by a thread routine. + */ + if (wal_manual_switch) + { + reader_data->need_switch = true; + return -1; + } + } + } + + GetXLogSegNo(targetPagePtr, reader_data->xlogsegno, wal_seg_size); + + /* Try to switch to the next WAL segment */ + if (!reader_data->xlogexists) + { + char xlogfname[MAXFNAMELEN]; + char partial_file[MAXPGPATH]; + int rc; + + GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, wal_seg_size); + + snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir, xlogfname); +#ifdef HAVE_LIBZ + snprintf(reader_data->gz_xlogpath, MAXPGPATH, "%s.gz", reader_data->xlogpath); +#endif + + /* We fall back to using .partial segment in case if we are running + * multi-timeline incremental backup right after standby promotion. + * TODO: it should be explicitly enabled. + */ + rc = sprintf_s(partial_file, MAXPGPATH, "%s.partial", reader_data->xlogpath); + securec_check_ss_c(rc, "\0", "\0"); + + /* If segment do not exists, but the same + * segment with '.partial' suffix does, use it instead */ + if (!fileExists(reader_data->xlogpath, FIO_LOCAL_HOST) && + fileExists(partial_file, FIO_LOCAL_HOST)) + { + snprintf(reader_data->xlogpath, MAXPGPATH, "%s", partial_file); + } + + if (fileExists(reader_data->xlogpath, FIO_LOCAL_HOST)) + { + elog(LOG, "Thread [%d]: Opening WAL segment \"%s\"", + reader_data->thread_num, reader_data->xlogpath); + + reader_data->xlogexists = true; + reader_data->xlogfile = fio_open(reader_data->xlogpath, + O_RDONLY | PG_BINARY, FIO_LOCAL_HOST); + + if (reader_data->xlogfile < 0) + { + elog(WARNING, "Thread [%d]: Could not open WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->xlogpath, + strerror(errno)); + return -1; + } + } +#ifdef HAVE_LIBZ + /* Try to open compressed WAL segment */ + else if (fileExists(reader_data->gz_xlogpath, FIO_LOCAL_HOST)) + { + elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"", + reader_data->thread_num, reader_data->gz_xlogpath); + + reader_data->xlogexists = true; + reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath, + "rb", -1, FIO_LOCAL_HOST); + if (reader_data->gz_xlogfile == NULL) + { + elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->gz_xlogpath, + strerror(errno)); + return -1; + } + } +#endif + /* Exit without error if WAL segment doesn't exist */ + if (!reader_data->xlogexists) + return -1; + } + + /* + * At this point, we have the right segment open. + */ + Assert(reader_data->xlogexists); + + /* + * Do not read same page read earlier from the file, read it from the buffer + */ + if (reader_data->prev_page_off != 0 && + reader_data->prev_page_off == targetPageOff) + { + memcpy(readBuf, reader_data->page_buf, XLOG_BLCKSZ); + *pageTLI = reader_data->tli; + return XLOG_BLCKSZ; + } + + /* Read the requested page */ + if (reader_data->xlogfile != -1) + { + if (fio_seek(reader_data->xlogfile, (off_t) targetPageOff) < 0) + { + elog(WARNING, "Thread [%d]: Could not seek in WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->xlogpath, strerror(errno)); + return -1; + } + + if (fio_read(reader_data->xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + { + elog(WARNING, "Thread [%d]: Could not read from WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->xlogpath, strerror(errno)); + return -1; + } + } +#ifdef HAVE_LIBZ + else + { + if (fio_gzseek(reader_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1) + { + elog(WARNING, "Thread [%d]: Could not seek in compressed WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->gz_xlogpath, + get_gz_error(reader_data->gz_xlogfile)); + return -1; + } + + if (fio_gzread(reader_data->gz_xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + { + elog(WARNING, "Thread [%d]: Could not read from compressed WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->gz_xlogpath, + get_gz_error(reader_data->gz_xlogfile)); + return -1; + } + } +#endif + + memcpy(reader_data->page_buf, readBuf, XLOG_BLCKSZ); + reader_data->prev_page_off = targetPageOff; + *pageTLI = reader_data->tli; + return XLOG_BLCKSZ; +} + +/* + * Initialize WAL segments reading. + */ +static XLogReaderState * +InitXLogPageRead(XLogReaderData *reader_data, const char *archivedir, + TimeLineID tli, uint32 segment_size, bool manual_switch, + bool consistent_read, bool allocate_reader) +{ + XLogReaderState *xlogreader = NULL; + + wal_archivedir = archivedir; + wal_seg_size = segment_size; + wal_manual_switch = manual_switch; + wal_consistent_read = consistent_read; + + MemSet(reader_data, 0, sizeof(XLogReaderData)); + reader_data->tli = tli; + reader_data->xlogfile = -1; + + if (allocate_reader) + { +#if PG_VERSION_NUM >= 110000 + xlogreader = XLogReaderAllocate(wal_seg_size, &SimpleXLogPageRead_local, + reader_data); +#else + xlogreader = XLogReaderAllocate(&SimpleXLogPageRead_local, reader_data); +#endif + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + xlogreader->system_identifier = instance_config.system_identifier; + } + + return xlogreader; +} + +/* + * Comparison function to sort xlog_thread_arg array. + */ +static int +xlog_thread_arg_comp(const void *a1, const void *a2) +{ + const xlog_thread_arg *arg1 = (const xlog_thread_arg *)a1; + const xlog_thread_arg *arg2 = (const xlog_thread_arg *)a2; + + return arg1->reader_data.xlogsegno - arg2->reader_data.xlogsegno; +} + +/* + * Run WAL processing routines using threads. Start from startpoint up to + * endpoint. It is possible to send zero endpoint, threads will read WAL + * infinitely in this case. + */ +static bool +RunXLogThreads(const char *archivedir, time_t target_time, + TransactionId target_xid, XLogRecPtr target_lsn, TimeLineID tli, + uint32 segment_size, XLogRecPtr startpoint, XLogRecPtr endpoint, + bool consistent_read, xlog_record_function process_record, + XLogRecTarget *last_rec, bool inclusive_endpoint) +{ + pthread_t *threads; + xlog_thread_arg *thread_args; + int i; + int threads_need = 0; + XLogSegNo endSegNo = 0; + bool result = true; + + if (!XRecOffIsValid(startpoint) && !XRecOffIsNull(startpoint)) + elog(ERROR, "Invalid startpoint value %X/%X", + (uint32) (startpoint >> 32), (uint32) (startpoint)); + + if (process_record) + elog(LOG, "Extracting pagemap from tli %i on range from %X/%X to %X/%X", + tli, + (uint32) (startpoint >> 32), (uint32) (startpoint), + (uint32) (endpoint >> 32), (uint32) (endpoint)); + + if (!XLogRecPtrIsInvalid(endpoint)) + { +// if (XRecOffIsNull(endpoint) && !inclusive_endpoint) + if (XRecOffIsNull(endpoint)) + { + GetXLogSegNo(endpoint, endSegNo, segment_size); + endSegNo--; + } + else if (!XRecOffIsValid(endpoint)) + { + elog(ERROR, "Invalid endpoint value %X/%X", + (uint32) (endpoint >> 32), (uint32) (endpoint)); + } + else + GetXLogSegNo(endpoint, endSegNo, segment_size); + } + + /* Initialize static variables for workers */ + wal_target_time = target_time; + wal_target_xid = target_xid; + wal_target_lsn = target_lsn; + + GetXLogSegNo(startpoint, segno_start, segment_size); + segno_target = 0; + GetXLogSegNo(startpoint, segno_next, segment_size); + segnum_read = 0; + segnum_corrupted = 0; + + threads = (pthread_t *) pgut_malloc(sizeof(pthread_t) * num_threads); + thread_args = (xlog_thread_arg *) pgut_malloc(sizeof(xlog_thread_arg) * num_threads); + + /* + * Initialize thread args. + * + * Each thread works with its own WAL segment and we need to adjust + * startpoint value for each thread. + */ + for (i = 0; i < num_threads; i++) + { + xlog_thread_arg *arg = &thread_args[i]; + + InitXLogPageRead(&arg->reader_data, archivedir, tli, segment_size, true, + consistent_read, false); + arg->reader_data.xlogsegno = segno_next; + arg->reader_data.thread_num = i + 1; + arg->process_record = process_record; + arg->startpoint = startpoint; + arg->endpoint = endpoint; + arg->endSegNo = endSegNo; + arg->inclusive_endpoint = inclusive_endpoint; + arg->got_target = false; + /* By default there is some error */ + arg->ret = 1; + + threads_need++; + segno_next++; + /* + * If we need to read less WAL segments than num_threads, create less + * threads. + */ + if (endSegNo != 0 && segno_next > endSegNo) + break; + GetXLogRecPtr(segno_next, 0, segment_size, startpoint); + } + + /* Run threads */ + thread_interrupted = false; + for (i = 0; i < threads_need; i++) + { + elog(VERBOSE, "Start WAL reader thread: %d", i + 1); + pthread_create(&threads[i], NULL, XLogThreadWorker, &thread_args[i]); + } + + /* Wait for threads */ + for (i = 0; i < threads_need; i++) + { + pthread_join(threads[i], NULL); + if (thread_args[i].ret == 1) + result = false; + } + + /* Release threads here, use thread_args only below */ + pfree(threads); + threads = NULL; + + if (last_rec) + { + /* + * We need to sort xlog_thread_arg array by xlogsegno to return latest + * possible record up to which restore is possible. We need to sort to + * detect failed thread between start segment and target segment. + * + * Loop stops on first failed thread. + */ + if (threads_need > 1) + qsort((void *) thread_args, threads_need, sizeof(xlog_thread_arg), + xlog_thread_arg_comp); + + for (i = 0; i < threads_need; i++) + { + XLogRecTarget *cur_rec; + + cur_rec = &thread_args[i].reader_data.cur_rec; + /* + * If we got the target return minimum possible record. + */ + if (segno_target > 0) + { + if (thread_args[i].got_target && + thread_args[i].reader_data.xlogsegno == segno_target) + { + *last_rec = *cur_rec; + break; + } + } + /* + * Else return maximum possible record up to which restore is + * possible. + */ + else if (last_rec->rec_lsn < cur_rec->rec_lsn) + *last_rec = *cur_rec; + + /* + * We reached failed thread, so stop here. We cannot use following + * WAL records after failed segment. + */ + if (thread_args[i].ret != 0) + break; + } + } + + pfree(thread_args); + + return result; +} + +/* + * WAL reader worker. + */ +void * +XLogThreadWorker(void *arg) +{ + xlog_thread_arg *thread_arg = (xlog_thread_arg *) arg; + XLogReaderData *reader_data = &thread_arg->reader_data; + XLogReaderState *xlogreader; + XLogSegNo nextSegNo = 0; + XLogRecPtr found; + uint32 prev_page_off = 0; + bool need_read = true; + +#if PG_VERSION_NUM >= 110000 + xlogreader = XLogReaderAllocate(wal_seg_size, &SimpleXLogPageRead_local, + reader_data); +#else + xlogreader = XLogReaderAllocate(&SimpleXLogPageRead_local, reader_data); +#endif + if (xlogreader == NULL) + elog(ERROR, "Thread [%d]: out of memory", reader_data->thread_num); + xlogreader->system_identifier = instance_config.system_identifier; + + found = XLogFindNextRecord(xlogreader, thread_arg->startpoint); + + /* + * We get invalid WAL record pointer usually when WAL segment is absent or + * is corrupted. + */ + if (XLogRecPtrIsInvalid(found)) + { + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + need_read = false; + else + { + if (xlogreader->errormsg_buf[0] != '\0') + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X: %s", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint), + xlogreader->errormsg_buf); + else + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint)); + PrintXLogCorruptionMsg(reader_data, ERROR); + } + } + + thread_arg->startpoint = found; + + elog(VERBOSE, "Thread [%d]: Starting LSN: %X/%X", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint)); + + while (need_read) + { + XLogRecord *record; + char *errormsg; + bool stop_reading = false; + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + /* + * We need to switch to the next WAL segment after reading previous + * record. It may happen if we read contrecord. + */ + if (reader_data->need_switch && + !SwitchThreadToNextWal(xlogreader, thread_arg)) + break; + + record = XLogReadRecord(xlogreader, thread_arg->startpoint, &errormsg); + + if (record == NULL) + { + XLogRecPtr errptr; + + /* + * There is no record, try to switch to the next WAL segment. + * Usually SimpleXLogPageRead_local() does it by itself. But here we need + * to do it manually to support threads. + */ + if (reader_data->need_switch && errormsg == NULL) + { + if (SwitchThreadToNextWal(xlogreader, thread_arg)) + continue; + else + break; + } + + /* + * XLogWaitForConsistency() is normally used only with threads. + * Call it here for just in case. + */ + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + break; + else if (wal_consistent_read) + { + XLogSegNo segno_report; + + pthread_lock(&wal_segment_mutex); + segno_report = segno_start + segnum_read; + pthread_mutex_unlock(&wal_segment_mutex); + + /* + * Report error message if this is the first corrupted WAL. + */ + if (reader_data->xlogsegno > segno_report) + return NULL; /* otherwise just stop the thread */ + } + + errptr = thread_arg->startpoint ? + thread_arg->startpoint : xlogreader->EndRecPtr; + + if (errormsg) + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X: %s", + reader_data->thread_num, + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (errptr >> 32), (uint32) (errptr)); + + /* In we failed to read record located at endpoint position, + * and endpoint is not inclusive, do not consider this as an error. + */ + if (!thread_arg->inclusive_endpoint && + errptr == thread_arg->endpoint) + { + elog(LOG, "Thread [%d]: Endpoint %X/%X is not inclusive, switch to the next timeline", + reader_data->thread_num, + (uint32) (thread_arg->endpoint >> 32), (uint32) (thread_arg->endpoint)); + break; + } + + /* + * If we don't have all WAL files from prev backup start_lsn to current + * start_lsn, we won't be able to build page map and PAGE backup will + * be incorrect. Stop it and throw an error. + */ + PrintXLogCorruptionMsg(reader_data, ERROR); + } + + getRecordTimestamp(xlogreader, &reader_data->cur_rec.rec_time); + if (TransactionIdIsValid(XLogRecGetXid(xlogreader))) + reader_data->cur_rec.rec_xid = XLogRecGetXid(xlogreader); + reader_data->cur_rec.rec_lsn = xlogreader->ReadRecPtr; + + if (thread_arg->process_record) + thread_arg->process_record(xlogreader, reader_data, &stop_reading); + if (stop_reading) + { + thread_arg->got_target = true; + + pthread_lock(&wal_segment_mutex); + /* We should store least target segment number */ + if (segno_target == 0 || segno_target > reader_data->xlogsegno) + segno_target = reader_data->xlogsegno; + pthread_mutex_unlock(&wal_segment_mutex); + + break; + } + + /* + * Check if other thread got the target segment. Check it not very + * often, only every WAL page. + */ + if (wal_consistent_read && prev_page_off != 0 && + prev_page_off != reader_data->prev_page_off) + { + XLogSegNo segno; + + pthread_lock(&wal_segment_mutex); + segno = segno_target; + pthread_mutex_unlock(&wal_segment_mutex); + + if (segno != 0 && segno < reader_data->xlogsegno) + break; + } + prev_page_off = reader_data->prev_page_off; + + /* continue reading at next record */ + thread_arg->startpoint = InvalidXLogRecPtr; + + GetXLogSegNo(xlogreader->EndRecPtr, nextSegNo, wal_seg_size); + + if (thread_arg->endSegNo != 0 && + !XLogRecPtrIsInvalid(thread_arg->endpoint) && + /* + * Consider thread_arg->endSegNo and thread_arg->endpoint only if + * they are valid. + */ + xlogreader->ReadRecPtr >= thread_arg->endpoint && + nextSegNo >= thread_arg->endSegNo) + break; + } + + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + /* Extracting is successful */ + thread_arg->ret = 0; + return NULL; +} + +/* + * Do manual switch to the next WAL segment. + * + * Returns false if the reader reaches the end of a WAL segment list. + */ +static bool +SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg) +{ + XLogReaderData *reader_data; + XLogRecPtr found; + + reader_data = (XLogReaderData *) xlogreader->private_data; + reader_data->need_switch = false; + + /* Critical section */ + pthread_lock(&wal_segment_mutex); + Assert(segno_next); + reader_data->xlogsegno = segno_next; + segnum_read++; + segno_next++; + pthread_mutex_unlock(&wal_segment_mutex); + + /* We've reached the end */ + if (arg->endSegNo != 0 && reader_data->xlogsegno > arg->endSegNo) + return false; + + /* Adjust next record position */ + GetXLogRecPtr(reader_data->xlogsegno, 0, wal_seg_size, arg->startpoint); + /* We need to close previously opened file if it wasn't closed earlier */ + CleanupXLogPageRead(xlogreader); + /* Skip over the page header and contrecord if any */ + found = XLogFindNextRecord(xlogreader, arg->startpoint); + + /* + * We get invalid WAL record pointer usually when WAL segment is + * absent or is corrupted. + */ + if (XLogRecPtrIsInvalid(found)) + { + /* + * Check if we need to stop reading. We stop if other thread found a + * target segment. + */ + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + return false; + else if (wal_consistent_read) + { + XLogSegNo segno_report; + + pthread_lock(&wal_segment_mutex); + segno_report = segno_start + segnum_read; + pthread_mutex_unlock(&wal_segment_mutex); + + /* + * Report error message if this is the first corrupted WAL. + */ + if (reader_data->xlogsegno > segno_report) + return false; /* otherwise just stop the thread */ + } + + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); + PrintXLogCorruptionMsg(reader_data, ERROR); + } + arg->startpoint = found; + + elog(VERBOSE, "Thread [%d]: Switched to LSN %X/%X", + reader_data->thread_num, + (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); + + return true; +} + +/* + * Wait for other threads since the current thread couldn't read its segment. + * We need to decide is it fail or not. + * + * Returns true if there is no failure and previous target segment was found. + * Otherwise return false. + */ +static bool +XLogWaitForConsistency(XLogReaderState *xlogreader) +{ + uint32 segnum_need; + XLogReaderData *reader_data =(XLogReaderData *) xlogreader->private_data; + bool log_message = true; + + segnum_need = reader_data->xlogsegno - segno_start; + while (true) + { + uint32 segnum_current_read; + XLogSegNo segno; + + if (log_message) + { + char xlogfname[MAXFNAMELEN]; + + GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, + wal_seg_size); + + elog(VERBOSE, "Thread [%d]: Possible WAL corruption in %s. Wait for other threads to decide is this a failure", + reader_data->thread_num, xlogfname); + log_message = false; + } + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + pthread_lock(&wal_segment_mutex); + segnum_current_read = segnum_read + segnum_corrupted; + segno = segno_target; + pthread_mutex_unlock(&wal_segment_mutex); + + /* Other threads read all previous segments and didn't find target */ + if (segnum_need <= segnum_current_read) + { + /* Mark current segment as corrupted */ + pthread_lock(&wal_segment_mutex); + segnum_corrupted++; + pthread_mutex_unlock(&wal_segment_mutex); + return false; + } + + if (segno != 0 && segno < reader_data->xlogsegno) + return true; + + pg_usleep(500000L); /* 500 ms */ + } + + /* We shouldn't reach it */ + return false; +} + +/* + * Cleanup after WAL segment reading. + */ +static void +CleanupXLogPageRead(XLogReaderState *xlogreader) +{ + XLogReaderData *reader_data; + + reader_data = (XLogReaderData *) xlogreader->private_data; + if (reader_data->xlogfile >= 0) + { + fio_close(reader_data->xlogfile); + reader_data->xlogfile = -1; + } +#ifdef HAVE_LIBZ + else if (reader_data->gz_xlogfile != NULL) + { + fio_gzclose(reader_data->gz_xlogfile); + reader_data->gz_xlogfile = NULL; + } +#endif + reader_data->prev_page_off = 0; + reader_data->xlogexists = false; +} + +static void +PrintXLogCorruptionMsg(XLogReaderData *reader_data, int elevel) +{ + if (reader_data->xlogpath[0] != 0) + { + /* + * XLOG reader couldn't read WAL segment. + * We throw a WARNING here to be able to update backup status. + */ + if (!reader_data->xlogexists) + elog(elevel, "Thread [%d]: WAL segment \"%s\" is absent", + reader_data->thread_num, reader_data->xlogpath); + else if (reader_data->xlogfile != -1) + elog(elevel, "Thread [%d]: Possible WAL corruption. " + "Error has occured during reading WAL segment \"%s\"", + reader_data->thread_num, reader_data->xlogpath); +#ifdef HAVE_LIBZ + else if (reader_data->gz_xlogfile != NULL) + elog(elevel, "Thread [%d]: Possible WAL corruption. " + "Error has occured during reading WAL segment \"%s\"", + reader_data->thread_num, reader_data->gz_xlogpath); +#endif + } + else + { + /* Cannot tell what happened specifically */ + elog(elevel, "Thread [%d]: An error occured during WAL reading", + reader_data->thread_num); + } +} + +/* + * Extract information about blocks modified in this record. + */ +static void +extractPageInfo(XLogReaderState *record, XLogReaderData *reader_data, + bool *stop_reading) +{ + uint8 block_id; + RmgrId rmid = XLogRecGetRmid(record); + uint8 info = XLogRecGetInfo(record); + uint8 rminfo = info & ~XLR_INFO_MASK; + + /* Is this a special record type that I recognize? */ + + if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE) + { + /* + * New databases can be safely ignored. They would be completely + * copied if found. + */ + } + else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP) + { + /* + * An existing database was dropped. It is fine to ignore that + * they will be removed appropriately. + */ + } + else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE) + { + /* + * We can safely ignore these. The file will be removed when + * combining the backups in the case of differential on. + */ + } + else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE) + { + /* + * We can safely ignore these. When we compare the sizes later on, + * we'll notice that they differ, and copy the missing tail from + * source system. + */ + } + else if (info & XLR_SPECIAL_REL_UPDATE) + { + /* + * This record type modifies a relation file in some special way, but + * we don't recognize the type. That's bad - we don't know how to + * track that change. + */ + elog(ERROR, "WAL record modifies a relation, but record type is not recognized\n" + "lsn: %X/%X, rmgr: %s, info: %02X", + (uint32) (record->ReadRecPtr >> 32), (uint32) (record->ReadRecPtr), + RmgrNames[rmid], info); + } + + for (block_id = 0; block_id <= record->max_block_id; block_id++) + { + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blkno; + + if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno)) + continue; + + /* We only care about the main fork; others are copied as is */ + if (forknum != MAIN_FORKNUM) + continue; + + process_block_change(forknum, rnode, blkno); + } +} + +/* + * Check the current read WAL record during validation. + */ +static void +validateXLogRecord(XLogReaderState *record, XLogReaderData *reader_data, + bool *stop_reading) +{ + /* Check target xid */ + if (TransactionIdIsValid(wal_target_xid) && + wal_target_xid == reader_data->cur_rec.rec_xid) + *stop_reading = true; + /* Check target time */ + else if (wal_target_time != 0 && + timestamptz_to_time_t(reader_data->cur_rec.rec_time) >= wal_target_time) + *stop_reading = true; + /* Check target lsn */ + else if (XRecOffIsValid(wal_target_lsn) && + reader_data->cur_rec.rec_lsn >= wal_target_lsn) + *stop_reading = true; +} + +/* + * Extract timestamp from WAL record. + * + * If the record contains a timestamp, returns true, and saves the timestamp + * in *recordXtime. If the record type has no timestamp, returns false. + * Currently, only transaction commit/abort records and restore points contain + * timestamps. + */ +static bool +getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + uint8 xact_info = info & XLOG_XACT_OPMASK; + uint8 rmid = XLogRecGetRmid(record); + + if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT) + { + *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time; + return true; + } + else if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_COMMIT || + xact_info == XLOG_XACT_COMMIT_PREPARED)) + { + *recordXtime = ((xl_xact_commit_local *) XLogRecGetData(record))->xact_time; + return true; + } + else if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_ABORT || + xact_info == XLOG_XACT_ABORT_PREPARED)) + { + *recordXtime = ((xl_xact_abort_local *) XLogRecGetData(record))->xact_time; + return true; + } + + return false; +} + +bool validate_wal_segment(TimeLineID tli, XLogSegNo segno, const char *prefetch_dir, uint32 wal_seg_size) +{ + XLogRecPtr startpoint; + XLogRecPtr endpoint; + + bool rc; + int tmp_num_threads = num_threads; + num_threads = 1; + + /* calculate startpoint and endpoint */ + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + GetXLogRecPtr(segno+1, 0, wal_seg_size, endpoint); + + /* disable multi-threading */ + num_threads = 1; + + rc = RunXLogThreads(prefetch_dir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tli, wal_seg_size, + startpoint, endpoint, false, NULL, NULL, true); + + num_threads = tmp_num_threads; + + return rc; +} + +/* + * * Returns information about the block that a block reference refers to. + * * + * * If the WAL record contains a block reference with the given ID, *rnode, + * * *forknum, and *blknum are filled in (if not NULL), and returns TRUE. + * * Otherwise returns FALSE. + * */ +bool XLogRecGetBlockTag( + XLogReaderState* record, uint8 block_id, RelFileNode* rnode, ForkNumber* forknum, BlockNumber* blknum) +{ + DecodedBkpBlock* bkpb = NULL; + + if (!record->blocks[block_id].in_use) + return false; + + bkpb = &record->blocks[block_id]; + if (rnode != NULL) + *rnode = bkpb->rnode; + if (forknum != NULL) + *forknum = bkpb->forknum; + if (blknum != NULL) + *blknum = bkpb->blkno; + return true; +} + diff --git a/src/bin/pg_probackup/pg_lzcompress.cpp b/src/bin/pg_probackup/pg_lzcompress.cpp new file mode 100644 index 000000000..fde164c2b --- /dev/null +++ b/src/bin/pg_probackup/pg_lzcompress.cpp @@ -0,0 +1,773 @@ +/* ---------- + * pg_lzcompress.c - + * + * This is an implementation of LZ compression for PostgreSQL. + * It uses a simple history table and generates 2-3 byte tags + * capable of backward copy information for 3-273 bytes with + * a max offset of 4095. + * + * Entry routines: + * + * int32 + * pglz_compress(const char *source, int32 slen, char *dest, + * const PGLZ_Strategy *strategy); + * + * source is the input data to be compressed. + * + * slen is the length of the input data. + * + * dest is the output area for the compressed result. + * It must be at least as big as PGLZ_MAX_OUTPUT(slen). + * + * strategy is a pointer to some information controlling + * the compression algorithm. If NULL, the compiled + * in default strategy is used. + * + * The return value is the number of bytes written in the + * buffer dest, or -1 if compression fails; in the latter + * case the contents of dest are undefined. + * + * int32 + * pglz_decompress(const char *source, int32 slen, char *dest, + * int32 rawsize, bool check_complete) + * + * source is the compressed input. + * + * slen is the length of the compressed input. + * + * dest is the area where the uncompressed data will be + * written to. It is the callers responsibility to + * provide enough space. + * + * The data is written to buff exactly as it was handed + * to pglz_compress(). No terminating zero byte is added. + * + * rawsize is the length of the uncompressed data. + * + * check_complete is a flag to let us know if -1 should be + * returned in cases where we don't reach the end of the + * source or dest buffers, or not. This should be false + * if the caller is asking for only a partial result and + * true otherwise. + * + * The return value is the number of bytes written in the + * buffer dest, or -1 if decompression fails. + * + * The decompression algorithm and internal data format: + * + * It is made with the compressed data itself. + * + * The data representation is easiest explained by describing + * the process of decompression. + * + * If compressed_size == rawsize, then the data + * is stored uncompressed as plain bytes. Thus, the decompressor + * simply copies rawsize bytes to the destination. + * + * Otherwise the first byte tells what to do the next 8 times. + * We call this the control byte. + * + * An unset bit in the control byte means, that one uncompressed + * byte follows, which is copied from input to output. + * + * A set bit in the control byte means, that a tag of 2-3 bytes + * follows. A tag contains information to copy some bytes, that + * are already in the output buffer, to the current location in + * the output. Let's call the three tag bytes T1, T2 and T3. The + * position of the data to copy is coded as an offset from the + * actual output position. + * + * The offset is in the upper nibble of T1 and in T2. + * The length is in the lower nibble of T1. + * + * So the 16 bits of a 2 byte tag are coded as + * + * 7---T1--0 7---T2--0 + * OOOO LLLL OOOO OOOO + * + * This limits the offset to 1-4095 (12 bits) and the length + * to 3-18 (4 bits) because 3 is always added to it. To emit + * a tag of 2 bytes with a length of 2 only saves one control + * bit. But we lose one byte in the possible length of a tag. + * + * In the actual implementation, the 2 byte tag's length is + * limited to 3-17, because the value 0xF in the length nibble + * has special meaning. It means, that the next following + * byte (T3) has to be added to the length value of 18. That + * makes total limits of 1-4095 for offset and 3-273 for length. + * + * Now that we have successfully decoded a tag. We simply copy + * the output that occurred bytes back to the current + * output location in the specified . Thus, a + * sequence of 200 spaces (think about bpchar fields) could be + * coded in 4 bytes. One literal space and a three byte tag to + * copy 199 bytes with a -1 offset. Whow - that's a compression + * rate of 98%! Well, the implementation needs to save the + * original data size too, so we need another 4 bytes for it + * and end up with a total compression rate of 96%, what's still + * worth a Whow. + * + * The compression algorithm + * + * The following uses numbers used in the default strategy. + * + * The compressor works best for attributes of a size between + * 1K and 1M. For smaller items there's not that much chance of + * redundancy in the character sequence (except for large areas + * of identical bytes like trailing spaces) and for bigger ones + * our 4K maximum look-back distance is too small. + * + * The compressor creates a table for lists of positions. + * For each input position (except the last 3), a hash key is + * built from the 4 next input bytes and the position remembered + * in the appropriate list. Thus, the table points to linked + * lists of likely to be at least in the first 4 characters + * matching strings. This is done on the fly while the input + * is compressed into the output area. Table entries are only + * kept for the last 4096 input positions, since we cannot use + * back-pointers larger than that anyway. The size of the hash + * table is chosen based on the size of the input - a larger table + * has a larger startup cost, as it needs to be initialized to + * zero, but reduces the number of hash collisions on long inputs. + * + * For each byte in the input, its hash key (built from this + * byte and the next 3) is used to find the appropriate list + * in the table. The lists remember the positions of all bytes + * that had the same hash key in the past in increasing backward + * offset order. Now for all entries in the used lists, the + * match length is computed by comparing the characters from the + * entries position with the characters from the actual input + * position. + * + * The compressor starts with a so called "good_match" of 128. + * It is a "prefer speed against compression ratio" optimizer. + * So if the first entry looked at already has 128 or more + * matching characters, the lookup stops and that position is + * used for the next tag in the output. + * + * For each subsequent entry in the history list, the "good_match" + * is lowered by 10%. So the compressor will be more happy with + * short matches the farer it has to go back in the history. + * Another "speed against ratio" preference characteristic of + * the algorithm. + * + * Thus there are 3 stop conditions for the lookup of matches: + * + * - a match >= good_match is found + * - there are no more history entries to look at + * - the next history entry is already too far back + * to be coded into a tag. + * + * Finally the match algorithm checks that at least a match + * of 3 or more bytes has been found, because that is the smallest + * amount of copy information to code into a tag. If so, a tag + * is omitted and all the input bytes covered by that are just + * scanned for the history add's, otherwise a literal character + * is omitted and only his history entry added. + * + * Acknowledgments: + * + * Many thanks to Adisak Pochanayon, who's article about SLZ + * inspired me to write the PostgreSQL compression this way. + * + * Jan Wieck + * + * Copyright (c) 1999-2019, PostgreSQL Global Development Group + * + * src/common/pg_lzcompress.c + * ---------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include + +#include "pg_lzcompress.h" + + +/* ---------- + * Local definitions + * ---------- + */ +#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */ +#define PGLZ_HISTORY_SIZE 4096 +#define PGLZ_MAX_MATCH 273 + + +/* ---------- + * PGLZ_HistEntry - + * + * Linked list for the backward history lookup + * + * All the entries sharing a hash key are linked in a doubly linked list. + * This makes it easy to remove an entry when it's time to recycle it + * (because it's more than 4K positions old). + * ---------- + */ +typedef struct PGLZ_HistEntry +{ + struct PGLZ_HistEntry *next; /* links for my hash key's list */ + struct PGLZ_HistEntry *prev; + int hindex; /* my current hash key */ + const char *pos; /* my input position */ +} PGLZ_HistEntry; + + +/* ---------- + * The provided standard strategies + * ---------- + */ +static const PGLZ_Strategy strategy_default_data = { + 32, /* Data chunks less than 32 bytes are not + * compressed */ + INT_MAX, /* No upper limit on what we'll try to + * compress */ + 25, /* Require 25% compression rate, or not worth + * it */ + 1024, /* Give up if no compression in the first 1KB */ + 128, /* Stop history lookup if a match of 128 bytes + * is found */ + 10 /* Lower good match size by 10% at every loop + * iteration */ +}; +const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data; + + +static const PGLZ_Strategy strategy_always_data = { + 0, /* Chunks of any size are compressed */ + INT_MAX, + 0, /* It's enough to save one single byte */ + INT_MAX, /* Never give up early */ + 128, /* Stop history lookup if a match of 128 bytes + * is found */ + 6 /* Look harder for a good match */ +}; +const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data; + + +/* ---------- + * Statically allocated work arrays for history + * ---------- + */ +static int16 hist_start[PGLZ_MAX_HISTORY_LISTS]; +static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; + +/* + * Element 0 in hist_entries is unused, and means 'invalid'. Likewise, + * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'. + */ +#define INVALID_ENTRY 0 +#define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY]) + +/* ---------- + * pglz_hist_idx - + * + * Computes the history table slot for the lookup by the next 4 + * characters in the input. + * + * NB: because we use the next 4 characters, we are not guaranteed to + * find 3-character matches; they very possibly will be in the wrong + * hash list. This seems an acceptable tradeoff for spreading out the + * hash keys more. + * ---------- + */ +#define pglz_hist_idx(_s,_e, _mask) ( \ + ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \ + (((_s)[0] << 6) ^ ((_s)[1] << 4) ^ \ + ((_s)[2] << 2) ^ (_s)[3])) & (_mask) \ + ) + + +/* ---------- + * pglz_hist_add - + * + * Adds a new entry to the history table. + * + * If _recycle is true, then we are recycling a previously used entry, + * and must first delink it from its old hashcode's linked list. + * + * NOTE: beware of multiple evaluations of macro's arguments, and note that + * _hn and _recycle are modified in the macro. + * ---------- + */ +#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask) \ +do { \ + int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \ + int16 *__myhsp = &(_hs)[__hindex]; \ + PGLZ_HistEntry *__myhe = &(_he)[_hn]; \ + if (_recycle) { \ + if (__myhe->prev == NULL) \ + (_hs)[__myhe->hindex] = __myhe->next - (_he); \ + else \ + __myhe->prev->next = __myhe->next; \ + if (__myhe->next != NULL) \ + __myhe->next->prev = __myhe->prev; \ + } \ + __myhe->next = &(_he)[*__myhsp]; \ + __myhe->prev = NULL; \ + __myhe->hindex = __hindex; \ + __myhe->pos = (_s); \ + /* If there was an existing entry in this hash slot, link */ \ + /* this new entry to it. However, the 0th entry in the */ \ + /* entries table is unused, so we can freely scribble on it. */ \ + /* So don't bother checking if the slot was used - we'll */ \ + /* scribble on the unused entry if it was not, but that's */ \ + /* harmless. Avoiding the branch in this critical path */ \ + /* speeds this up a little bit. */ \ + /* if (*__myhsp != INVALID_ENTRY) */ \ + (_he)[(*__myhsp)].prev = __myhe; \ + *__myhsp = _hn; \ + if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) { \ + (_hn) = 1; \ + (_recycle) = true; \ + } \ +} while (0) + + +/* ---------- + * pglz_out_ctrl - + * + * Outputs the last and allocates a new control byte if needed. + * ---------- + */ +#define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \ +do { \ + if ((__ctrl & 0xff) == 0) \ + { \ + *(__ctrlp) = __ctrlb; \ + __ctrlp = (__buf)++; \ + __ctrlb = 0; \ + __ctrl = 1; \ + } \ +} while (0) + + +/* ---------- + * pglz_out_literal - + * + * Outputs a literal byte to the destination buffer including the + * appropriate control bit. + * ---------- + */ +#define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \ +do { \ + pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ + *(_buf)++ = (unsigned char)(_byte); \ + _ctrl <<= 1; \ +} while (0) + + +/* ---------- + * pglz_out_tag - + * + * Outputs a backward reference tag of 2-4 bytes (depending on + * offset and length) to the destination buffer including the + * appropriate control bit. + * ---------- + */ +#define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \ +do { \ + pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ + _ctrlb |= _ctrl; \ + _ctrl <<= 1; \ + if (_len > 17) \ + { \ + (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \ + (_buf)[1] = (unsigned char)(((_off) & 0xff)); \ + (_buf)[2] = (unsigned char)((_len) - 18); \ + (_buf) += 3; \ + } else { \ + (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \ + (_buf)[1] = (unsigned char)((_off) & 0xff); \ + (_buf) += 2; \ + } \ +} while (0) + + +/* ---------- + * pglz_find_match - + * + * Lookup the history table if the actual input stream matches + * another sequence of characters, starting somewhere earlier + * in the input buffer. + * ---------- + */ +static inline int +pglz_find_match(int16 *hstart, const char *input, const char *end, + int *lenp, int *offp, int good_match, int good_drop, int mask) +{ + PGLZ_HistEntry *hent; + int16 hentno; + int32 len = 0; + int32 off = 0; + + /* + * Traverse the linked history list until a good enough match is found. + */ + hentno = hstart[pglz_hist_idx(input, end, mask)]; + hent = &hist_entries[hentno]; + while (hent != INVALID_ENTRY_PTR) + { + const char *ip = input; + const char *hp = hent->pos; + int32 thisoff; + int32 thislen; + + /* + * Stop if the offset does not fit into our tag anymore. + */ + thisoff = ip - hp; + if (thisoff >= 0x0fff) + break; + + /* + * Determine length of match. A better match must be larger than the + * best so far. And if we already have a match of 16 or more bytes, + * it's worth the call overhead to use memcmp() to check if this match + * is equal for the same size. After that we must fallback to + * character by character comparison to know the exact position where + * the diff occurred. + */ + thislen = 0; + if (len >= 16) + { + if (memcmp(ip, hp, len) == 0) + { + thislen = len; + ip += len; + hp += len; + while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) + { + thislen++; + ip++; + hp++; + } + } + } + else + { + while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) + { + thislen++; + ip++; + hp++; + } + } + + /* + * Remember this match as the best (if it is) + */ + if (thislen > len) + { + len = thislen; + off = thisoff; + } + + /* + * Advance to the next history entry + */ + hent = hent->next; + + /* + * Be happy with lesser good matches the more entries we visited. But + * no point in doing calculation if we're at end of list. + */ + if (hent != INVALID_ENTRY_PTR) + { + if (len >= good_match) + break; + good_match -= (good_match * good_drop) / 100; + } + } + + /* + * Return match information only if it results at least in one byte + * reduction. + */ + if (len > 2) + { + *lenp = len; + *offp = off; + return 1; + } + + return 0; +} + + +/* ---------- + * pglz_compress - + * + * Compresses source into dest using strategy. Returns the number of + * bytes written in buffer dest, or -1 if compression fails. + * ---------- + */ +int32 +pglz_compress(const char *source, int32 slen, char *dest, + const PGLZ_Strategy *strategy) +{ + unsigned char *bp = (unsigned char *) dest; + unsigned char *bstart = bp; + int hist_next = 1; + bool hist_recycle = false; + const char *dp = source; + const char *dend = source + slen; + unsigned char ctrl_dummy = 0; + unsigned char *ctrlp = &ctrl_dummy; + unsigned char ctrlb = 0; + unsigned char ctrl = 0; + bool found_match = false; + int32 match_len; + int32 match_off; + int32 good_match; + int32 good_drop; + int32 result_size; + int32 result_max; + int32 need_rate; + int hashsz; + int mask; + + /* + * Our fallback strategy is the default. + */ + if (strategy == NULL) + strategy = PGLZ_strategy_default; + + /* + * If the strategy forbids compression (at all or if source chunk size out + * of range), fail. + */ + if (strategy->match_size_good <= 0 || + slen < strategy->min_input_size || + slen > strategy->max_input_size) + return -1; + + /* + * Limit the match parameters to the supported range. + */ + good_match = strategy->match_size_good; + if (good_match > PGLZ_MAX_MATCH) + good_match = PGLZ_MAX_MATCH; + else if (good_match < 17) + good_match = 17; + + good_drop = strategy->match_size_drop; + if (good_drop < 0) + good_drop = 0; + else if (good_drop > 100) + good_drop = 100; + + need_rate = strategy->min_comp_rate; + if (need_rate < 0) + need_rate = 0; + else if (need_rate > 99) + need_rate = 99; + + /* + * Compute the maximum result size allowed by the strategy, namely the + * input size minus the minimum wanted compression rate. This had better + * be <= slen, else we might overrun the provided output buffer. + */ + if (slen > (INT_MAX / 100)) + { + /* Approximate to avoid overflow */ + result_max = (slen / 100) * (100 - need_rate); + } + else + result_max = (slen * (100 - need_rate)) / 100; + + /* + * Experiments suggest that these hash sizes work pretty well. A large + * hash table minimizes collision, but has a higher startup cost. For a + * small input, the startup cost dominates. The table size must be a power + * of two. + */ + if (slen < 128) + hashsz = 512; + else if (slen < 256) + hashsz = 1024; + else if (slen < 512) + hashsz = 2048; + else if (slen < 1024) + hashsz = 4096; + else + hashsz = 8192; + mask = hashsz - 1; + + /* + * Initialize the history lists to empty. We do not need to zero the + * hist_entries[] array; its entries are initialized as they are used. + */ + memset(hist_start, 0, hashsz * sizeof(int16)); + + /* + * Compress the source directly into the output buffer. + */ + while (dp < dend) + { + /* + * If we already exceeded the maximum result size, fail. + * + * We check once per loop; since the loop body could emit as many as 4 + * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better + * allow 4 slop bytes. + */ + if (bp - bstart >= result_max) + return -1; + + /* + * If we've emitted more than first_success_by bytes without finding + * anything compressible at all, fail. This lets us fall out + * reasonably quickly when looking at incompressible input (such as + * pre-compressed data). + */ + if (!found_match && bp - bstart >= strategy->first_success_by) + return -1; + + /* + * Try to find a match in the history + */ + if (pglz_find_match(hist_start, dp, dend, &match_len, + &match_off, good_match, good_drop, mask)) + { + /* + * Create the tag and add history entries for all matched + * characters. + */ + pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); + while (match_len--) + { + pglz_hist_add(hist_start, hist_entries, + hist_next, hist_recycle, + dp, dend, mask); + dp++; /* Do not do this ++ in the line above! */ + /* The macro would do it four times - Jan. */ + } + found_match = true; + } + else + { + /* + * No match found. Copy one literal byte. + */ + pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); + pglz_hist_add(hist_start, hist_entries, + hist_next, hist_recycle, + dp, dend, mask); + dp++; /* Do not do this ++ in the line above! */ + /* The macro would do it four times - Jan. */ + } + } + + /* + * Write out the last control byte and check that we haven't overrun the + * output size allowed by the strategy. + */ + *ctrlp = ctrlb; + result_size = bp - bstart; + if (result_size >= result_max) + return -1; + + /* success */ + return result_size; +} + + +/* ---------- + * pglz_decompress - + * + * Decompresses source into dest. Returns the number of bytes + * decompressed in the destination buffer, and *optionally* + * checks that both the source and dest buffers have been + * fully read and written to, respectively. + * ---------- + */ +int32 +pglz_decompress(const char *source, int32 slen, char *dest, + int32 rawsize, bool check_complete) +{ + const unsigned char *sp; + const unsigned char *srcend; + unsigned char *dp; + unsigned char *destend; + + sp = (const unsigned char *) source; + srcend = ((const unsigned char *) source) + slen; + dp = (unsigned char *) dest; + destend = dp + rawsize; + + while (sp < srcend && dp < destend) + { + /* + * Read one control byte and process the next 8 items (or as many as + * remain in the compressed input). + */ + unsigned char ctrl = *sp++; + int ctrlc; + + for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) + { + + if (ctrl & 1) + { + /* + * Otherwise it contains the match length minus 3 and the + * upper 4 bits of the offset. The next following byte + * contains the lower 8 bits of the offset. If the length is + * coded as 18, another extension tag byte tells how much + * longer the match really was (0-255). + */ + int32 len; + int32 off; + + len = (sp[0] & 0x0f) + 3; + off = ((sp[0] & 0xf0) << 4) | sp[1]; + sp += 2; + if (len == 18) + len += *sp++; + + /* + * Now we copy the bytes specified by the tag from OUTPUT to + * OUTPUT. It is dangerous and platform dependent to use + * memcpy() here, because the copied areas could overlap + * extremely! + */ + len = Min(len, destend - dp); + while (len--) + { + *dp = dp[-off]; + dp++; + } + } + else + { + /* + * An unset control bit means LITERAL BYTE. So we just copy + * one from INPUT to OUTPUT. + */ + *dp++ = *sp++; + } + + /* + * Advance the control bit + */ + ctrl >>= 1; + } + } + + /* + * Check we decompressed the right amount. If we are slicing, then we + * won't necessarily be at the end of the source or dest buffers when we + * hit a stop, so we don't test them. + */ + if (check_complete && (dp != destend || sp != srcend)) + return -1; + + /* + * That's it. + */ + return (char *) dp - dest; +} diff --git a/src/bin/pg_probackup/pg_lzcompress.h b/src/bin/pg_probackup/pg_lzcompress.h new file mode 100644 index 000000000..555576436 --- /dev/null +++ b/src/bin/pg_probackup/pg_lzcompress.h @@ -0,0 +1,91 @@ +/* ---------- + * pg_lzcompress.h - + * + * Definitions for the builtin LZ compressor + * + * src/include/common/pg_lzcompress.h + * ---------- + */ + +#ifndef _PG_LZCOMPRESS_H_ +#define _PG_LZCOMPRESS_H_ + + +/* ---------- + * PGLZ_MAX_OUTPUT - + * + * Macro to compute the buffer size required by pglz_compress(). + * We allow 4 bytes for overrun before detecting compression failure. + * ---------- + */ +#define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + 4) + + +/* ---------- + * PGLZ_Strategy - + * + * Some values that control the compression algorithm. + * + * min_input_size Minimum input data size to consider compression. + * + * max_input_size Maximum input data size to consider compression. + * + * min_comp_rate Minimum compression rate (0-99%) to require. + * Regardless of min_comp_rate, the output must be + * smaller than the input, else we don't store + * compressed. + * + * first_success_by Abandon compression if we find no compressible + * data within the first this-many bytes. + * + * match_size_good The initial GOOD match size when starting history + * lookup. When looking up the history to find a + * match that could be expressed as a tag, the + * algorithm does not always walk back entirely. + * A good match fast is usually better than the + * best possible one very late. For each iteration + * in the lookup, this value is lowered so the + * longer the lookup takes, the smaller matches + * are considered good. + * + * match_size_drop The percentage by which match_size_good is lowered + * after each history check. Allowed values are + * 0 (no change until end) to 100 (only check + * latest history entry at all). + * ---------- + */ +typedef struct PGLZ_Strategy +{ + int32 min_input_size; + int32 max_input_size; + int32 min_comp_rate; + int32 first_success_by; + int32 match_size_good; + int32 match_size_drop; +} PGLZ_Strategy; + + +/* ---------- + * The standard strategies + * + * PGLZ_strategy_default Recommended default strategy for TOAST. + * + * PGLZ_strategy_always Try to compress inputs of any length. + * Fallback to uncompressed storage only if + * output would be larger than input. + * ---------- + */ +extern const PGLZ_Strategy *const PGLZ_strategy_default; +extern const PGLZ_Strategy *const PGLZ_strategy_always; + + +/* ---------- + * Global function declarations + * ---------- + */ +extern int32 pglz_compress(const char *source, int32 slen, char *dest, + const PGLZ_Strategy *strategy); +extern int32 pglz_decompress(const char *source, int32 slen, char *dest, + int32 rawsize, bool check_complete); + +#endif /* _PG_LZCOMPRESS_H_ */ diff --git a/src/bin/pg_probackup/pg_probackup.cpp b/src/bin/pg_probackup/pg_probackup.cpp new file mode 100644 index 000000000..187eef52e --- /dev/null +++ b/src/bin/pg_probackup/pg_probackup.cpp @@ -0,0 +1,818 @@ +/*------------------------------------------------------------------------- + * + * pg_probackup.c: Backup/Recovery manager for PostgreSQL. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include "pg_getopt.h" +#include "streamutil.h" +#include "file.h" + +#include + +#include "configuration.h" +#include "thread.h" +#include +#include "common/fe_memutils.h" + +const char *PROGRAM_NAME = NULL; /* PROGRAM_NAME_FULL without .exe suffix + * if any */ +const char *PROGRAM_NAME_FULL = NULL; +const char *PROGRAM_FULL_PATH = NULL; +const char *PROGRAM_URL = "https://github.com/postgrespro/pg_probackup"; +const char *PROGRAM_EMAIL = "https://github.com/postgrespro/pg_probackup/issues"; + +typedef enum ProbackupSubcmd +{ + NO_CMD = 0, + INIT_CMD, + ADD_INSTANCE_CMD, + DELETE_INSTANCE_CMD, + BACKUP_CMD, + RESTORE_CMD, + VALIDATE_CMD, + DELETE_CMD, + MERGE_CMD, + SHOW_CMD, + SET_CONFIG_CMD, + SET_BACKUP_CMD, + SHOW_CONFIG_CMD, +} ProbackupSubcmd; + + +/* directory options */ +char *backup_path = NULL; +/* + * path or to the data files in the backup catalog + * $BACKUP_PATH/backups/instance_name + */ +char backup_instance_path[MAXPGPATH]; +/* + * path or to the wal files in the backup catalog + * $BACKUP_PATH/wal/instance_name + */ +char arclog_path[MAXPGPATH] = ""; + +/* colon separated external directories list ("/path1:/path2") */ +char *externaldir = NULL; +/* common options */ +static char *backup_id_string = NULL; +int num_threads = 1; +bool stream_wal = true; +pid_t my_pid = 0; +__thread int my_thread_num = 1; +bool progress = false; +bool no_sync = false; +#if PG_VERSION_NUM >= 100000 +char *replication_slot = NULL; +#endif +bool temp_slot = false; + +/* backup options */ +bool backup_logs = false; +bool smooth_checkpoint; +char *remote_agent; +static char *backup_note = NULL; +/* restore options */ +static char *target_time = NULL; +static char *target_xid = NULL; +static char *target_lsn = NULL; +static char *target_inclusive = NULL; +static TimeLineID target_tli; +static char *target_stop; +static bool target_immediate; +static char *target_name = NULL; +static char *target_action = NULL; + +static pgRecoveryTarget *recovery_target_options = NULL; +static pgRestoreParams *restore_params = NULL; + +time_t current_time = 0; +bool no_validate = false; +IncrRestoreMode incremental_mode = INCR_NONE; + +bool skip_block_validation = false; +bool skip_external_dirs = false; + +/* delete options */ +bool delete_wal = false; +bool delete_expired = false; +bool merge_expired = false; +bool force = false; +bool dry_run = false; +static char *delete_status = NULL; +/* compression options */ +bool compress_shortcut = false; + +/* other options */ +char *instance_name; + +/* archive push options */ +int batch_size = 1; +static char *wal_file_path; +static char *wal_file_name; +static bool file_overwrite = false; +static bool no_ready_rename = false; + +/* archive get options */ +static char *prefetch_dir; +bool no_validate_wal = false; + +/* show options */ +ShowFormat show_format = SHOW_PLAIN; +bool show_archive = false; + +/* set-backup options */ +int64 ttl = -1; +static char *expire_time_string = NULL; +static pgSetBackupParams *set_backup_params = NULL; + +/* current settings */ +pgBackup current; +static ProbackupSubcmd backup_subcmd = NO_CMD; + +static bool help_opt = false; + +static void opt_incr_restore_mode(ConfigOption *opt, const char *arg); +static void opt_backup_mode(ConfigOption *opt, const char *arg); +static void opt_show_format(ConfigOption *opt, const char *arg); + +static void compress_init(void); + +/* + * Short name should be non-printable ASCII character. + * Use values between 128 and 255. + */ +static ConfigOption cmd_options[] = +{ + { 'b', 130, "help", &help_opt, SOURCE_CMD_STRICT }, + { 's', 'B', "backup-path", &backup_path, SOURCE_CMD_STRICT }, + { 'u', 'j', "threads", &num_threads, SOURCE_CMD_STRICT }, + { 'b', 131, "stream", &stream_wal, SOURCE_CMD_STRICT }, + { 'b', 132, "progress", &progress, SOURCE_CMD_STRICT }, + { 's', 'i', "backup-id", &backup_id_string, SOURCE_CMD_STRICT }, + { 'b', 133, "no-sync", &no_sync, SOURCE_CMD_STRICT }, + { 'b', 180, "backup-pg-log", &backup_logs, SOURCE_CMD_STRICT }, + { 'f', 'b', "backup-mode", (void *)opt_backup_mode, SOURCE_CMD_STRICT }, + { 'b', 'C', "smooth-checkpoint", &smooth_checkpoint, SOURCE_CMD_STRICT }, + { 's', 'S', "slot", &replication_slot, SOURCE_CMD_STRICT }, + { 'b', 181, "temp-slot", &temp_slot, SOURCE_CMD_STRICT }, + { 'b', 182, "delete-wal", &delete_wal, SOURCE_CMD_STRICT }, + { 'b', 183, "delete-expired", &delete_expired, SOURCE_CMD_STRICT }, + { 'b', 184, "merge-expired", &merge_expired, SOURCE_CMD_STRICT }, + { 'b', 185, "dry-run", &dry_run, SOURCE_CMD_STRICT }, + { 's', 238, "note", &backup_note, SOURCE_CMD_STRICT }, + { 's', 136, "recovery-target-time", &target_time, SOURCE_CMD_STRICT }, + { 's', 137, "recovery-target-xid", &target_xid, SOURCE_CMD_STRICT }, + { 's', 144, "recovery-target-lsn", &target_lsn, SOURCE_CMD_STRICT }, + { 's', 138, "recovery-target-inclusive", &target_inclusive, SOURCE_CMD_STRICT }, + { 'u', 139, "recovery-target-timeline", &target_tli, SOURCE_CMD_STRICT }, + { 's', 157, "recovery-target", &target_stop, SOURCE_CMD_STRICT }, + { 'f', 'T', "tablespace-mapping", (void *)opt_tablespace_map, SOURCE_CMD_STRICT }, + { 'f', 155, "external-mapping", (void *)opt_externaldir_map, SOURCE_CMD_STRICT }, + { 's', 141, "recovery-target-name", &target_name, SOURCE_CMD_STRICT }, + { 's', 142, "recovery-target-action", &target_action, SOURCE_CMD_STRICT }, + { 'b', 143, "no-validate", &no_validate, SOURCE_CMD_STRICT }, + { 'b', 154, "skip-block-validation", &skip_block_validation, SOURCE_CMD_STRICT }, + { 'b', 156, "skip-external-dirs", &skip_external_dirs, SOURCE_CMD_STRICT }, + { 'f', 'I', "incremental-mode", (void *)opt_incr_restore_mode, SOURCE_CMD_STRICT }, + { 'b', 145, "wal", &delete_wal, SOURCE_CMD_STRICT }, + { 'b', 146, "expired", &delete_expired, SOURCE_CMD_STRICT }, + { 's', 172, "status", &delete_status, SOURCE_CMD_STRICT }, + + { 'b', 147, "force", &force, SOURCE_CMD_STRICT }, + { 'b', 148, "compress", &compress_shortcut, SOURCE_CMD_STRICT }, + { 'B', 'w', "no-password", &prompt_password, SOURCE_CMD_STRICT }, + { 'b', 'W', "password", &force_password, SOURCE_CMD_STRICT }, + { 's', 149, "instance", &instance_name, SOURCE_CMD_STRICT }, + { 's', 150, "wal-file-path", &wal_file_path, SOURCE_CMD_STRICT }, + { 's', 151, "wal-file-name", &wal_file_name, SOURCE_CMD_STRICT }, + { 'b', 152, "overwrite", &file_overwrite, SOURCE_CMD_STRICT }, + { 'b', 153, "no-ready-rename", &no_ready_rename, SOURCE_CMD_STRICT }, + { 'i', 162, "batch-size", &batch_size, SOURCE_CMD_STRICT }, + { 's', 163, "prefetch-dir", &prefetch_dir, SOURCE_CMD_STRICT }, + { 'b', 164, "no-validate-wal", &no_validate_wal, SOURCE_CMD_STRICT }, + { 'f', 165, "format", (void *)opt_show_format, SOURCE_CMD_STRICT }, + { 'b', 166, "archive", &show_archive, SOURCE_CMD_STRICT }, + { 'I', 170, "ttl", &ttl, SOURCE_CMD_STRICT, SOURCE_DEFAULT, 0, OPTION_UNIT_S, option_get_value}, + { 's', 171, "expire-time", &expire_time_string, SOURCE_CMD_STRICT }, + + { 's', 136, "time", &target_time, SOURCE_CMD_STRICT }, + { 's', 137, "xid", &target_xid, SOURCE_CMD_STRICT }, + { 's', 138, "inclusive", &target_inclusive, SOURCE_CMD_STRICT }, + { 'u', 139, "timeline", &target_tli, SOURCE_CMD_STRICT }, + { 's', 144, "lsn", &target_lsn, SOURCE_CMD_STRICT }, + { 'b', 140, "immediate", &target_immediate, SOURCE_CMD_STRICT }, + + { 0 } +}; + + +static void +setMyLocation(void) +{ + +#ifdef WIN32 + if (IsSshProtocol()) + elog(ERROR, "Currently remote operations on Windows are not supported"); +#endif + + MyLocation = IsSshProtocol() + ? (backup_subcmd == BACKUP_CMD || backup_subcmd == RESTORE_CMD || backup_subcmd == ADD_INSTANCE_CMD) + ? FIO_BACKUP_HOST + : FIO_LOCAL_HOST + : FIO_LOCAL_HOST; +} + +/* + * Entry point of pg_probackup command. + */ +int +main(int argc, char *argv[]) +{ + char *command = NULL, + *command_name; + + PROGRAM_NAME_FULL = argv[0]; + + /* Initialize current backup */ + pgBackupInit(¤t); + + /* Initialize current instance configuration */ + init_config(&instance_config, instance_name); + + PROGRAM_NAME = get_progname(argv[0]); + PROGRAM_FULL_PATH = (char *)gs_palloc0(MAXPGPATH); + + /* Get current time */ + current_time = time(NULL); + + my_pid = getpid(); + //set_pglocale_pgservice(argv[0], "pgscripts"); + +#if PG_VERSION_NUM >= 110000 + /* + * Reset WAL segment size, we will retreive it using RetrieveWalSegSize() + * later. + */ + WalSegSz = 0; +#endif + + /* + * Save main thread's tid. It is used call exit() in case of errors. + */ + main_tid = pthread_self(); + + /* Parse subcommands and non-subcommand options */ + if (argc > 1) + { + if (strcmp(argv[1], "add-instance") == 0) + backup_subcmd = ADD_INSTANCE_CMD; + else if (strcmp(argv[1], "del-instance") == 0) + backup_subcmd = DELETE_INSTANCE_CMD; + else if (strcmp(argv[1], "init") == 0) + backup_subcmd = INIT_CMD; + else if (strcmp(argv[1], "backup") == 0) + backup_subcmd = BACKUP_CMD; + else if (strcmp(argv[1], "restore") == 0) + backup_subcmd = RESTORE_CMD; + else if (strcmp(argv[1], "validate") == 0) + backup_subcmd = VALIDATE_CMD; + else if (strcmp(argv[1], "delete") == 0) + backup_subcmd = DELETE_CMD; + else if (strcmp(argv[1], "merge") == 0) + backup_subcmd = MERGE_CMD; + else if (strcmp(argv[1], "show") == 0) + backup_subcmd = SHOW_CMD; + else if (strcmp(argv[1], "set-config") == 0) + backup_subcmd = SET_CONFIG_CMD; + else if (strcmp(argv[1], "set-backup") == 0) + backup_subcmd = SET_BACKUP_CMD; + else if (strcmp(argv[1], "show-config") == 0) + backup_subcmd = SHOW_CONFIG_CMD; +#ifdef WIN32 + else if (strcmp(argv[1], "ssh") == 0) + launch_ssh(argv); +#endif + else if (strcmp(argv[1], "agent") == 0) + { + /* 'No forward compatibility' sanity: + * /old/binary -> ssh execute -> /newer/binary agent version_num + * If we are executed as an agent for older binary, then exit with error + */ + if (argc > 2) + { + elog(ERROR, "Version mismatch, pg_probackup binary with version '%s' " + "is launched as an agent for pg_probackup binary with version '%s'", + PROGRAM_VERSION, argv[2]); + } + fio_communicate(STDIN_FILENO, STDOUT_FILENO); + return 0; + } + else if (strcmp(argv[1], "--help") == 0 || + strcmp(argv[1], "-?") == 0 || + strcmp(argv[1], "help") == 0) + { + if (argc > 2) + help_command(argv[2]); + else + help_pg_probackup(); + } + else if (strcmp(argv[1], "--version") == 0 + || strcmp(argv[1], "version") == 0 + || strcmp(argv[1], "-V") == 0) + { + puts("gs_probackup " DEF_GS_VERSION); + exit(0); + } + else + elog(ERROR, "Unknown subcommand \"%s\"", argv[1]); + } + + if (backup_subcmd == NO_CMD) + elog(ERROR, "No subcommand specified"); + + /* + * Make command string before getopt_long() will call. It permutes the + * content of argv. + */ + /* TODO why do we do that only for some commands? */ + command_name = gs_pstrdup(argv[1]); + if (backup_subcmd == BACKUP_CMD || + backup_subcmd == RESTORE_CMD || + backup_subcmd == VALIDATE_CMD || + backup_subcmd == DELETE_CMD || + backup_subcmd == MERGE_CMD || + backup_subcmd == SET_CONFIG_CMD || + backup_subcmd == SET_BACKUP_CMD) + { + int i, + len = 0, + allocated = 0; + + allocated = sizeof(char) * MAXPGPATH; + command = (char *) gs_palloc0(allocated); + + for (i = 0; i < argc; i++) + { + int arglen = strlen(argv[i]); + + if (arglen + len > allocated) + { + allocated *= 2; + command = (char *)gs_repalloc(command, allocated); + } + + strncpy(command + len, argv[i], arglen); + len += arglen; + command[len++] = ' '; + } + + command[len] = '\0'; + } + + optind += 1; + /* Parse command line only arguments */ + config_get_opt(argc, argv, cmd_options, instance_options); + + pgut_init(); + + if (help_opt) + help_command(command_name); + + /* backup_path is required for all pg_probackup commands except help */ + if (backup_path == NULL) + { + /* + * If command line argument is not set, try to read BACKUP_PATH + * from environment variable + */ + backup_path = getenv("BACKUP_PATH"); + if (backup_path == NULL) + elog(ERROR, "required parameter not specified: BACKUP_PATH (-B, --backup-path)"); + } + + setMyLocation(); + + if (backup_path != NULL) + { + canonicalize_path(backup_path); + + /* Ensure that backup_path is an absolute path */ + if (!is_absolute_path(backup_path)) + elog(ERROR, "-B, --backup-path must be an absolute path"); + } + + /* Ensure that backup_path is an absolute path */ + if (backup_path && !is_absolute_path(backup_path)) + elog(ERROR, "-B, --backup-path must be an absolute path"); + + + /* + * Option --instance is required for all commands except + * init, show and validate + */ + if (instance_name == NULL) + { + if (backup_subcmd != INIT_CMD && backup_subcmd != SHOW_CMD && + backup_subcmd != VALIDATE_CMD) + elog(ERROR, "required parameter not specified: --instance"); + } + else + /* Set instance name */ + instance_config.name = pgut_strdup(instance_name); + + /* + * If --instance option was passed, construct paths for backup data and + * xlog files of this backup instance. + */ + if ((backup_path != NULL) && instance_name) + { + /* + * Fill global variables used to generate pathes inside the instance's + * backup catalog. + * TODO replace global variables with InstanceConfig structure fields + */ + sprintf(backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance_name); + + /* + * Fill InstanceConfig structure fields used to generate pathes inside + * the instance's backup catalog. + * TODO continue refactoring to use these fields instead of global vars + */ + sprintf(instance_config.backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + canonicalize_path(instance_config.backup_instance_path); + + sprintf(instance_config.arclog_path, "%s/%s/%s", + backup_path, "wal", instance_name); + canonicalize_path(instance_config.arclog_path); + + /* + * Ensure that requested backup instance exists. + * for all commands except init, which doesn't take this parameter, + * add-instance, which creates new instance + */ + if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD) + { + struct stat st; + + if (fio_stat(backup_instance_path, &st, true, FIO_BACKUP_HOST) != 0) + { + elog(WARNING, "Failed to access directory \"%s\": %s", + backup_instance_path, strerror(errno)); + + // TODO: redundant message, should we get rid of it? + elog(ERROR, "Instance '%s' does not exist in this backup catalog", + instance_name); + } + else + { + /* Ensure that backup_path is a path to a directory */ + if (!S_ISDIR(st.st_mode)) + elog(ERROR, "-B, --backup-path must be a path to directory"); + } + } + } + + /* + * We read options from command line, now we need to read them from + * configuration file since we got backup path and instance name. + * For some commands an instance option isn't required, see above. + */ + if (instance_name) + { + char path[MAXPGPATH]; + /* Read environment variables */ + config_get_opt_env(instance_options); + + /* Read options from configuration file */ + if (backup_subcmd != ADD_INSTANCE_CMD) + { + join_path_components(path, backup_instance_path, + BACKUP_CATALOG_CONF_FILE); + config_read_opt(path, instance_options, ERROR, true, false); + } + setMyLocation(); + } + + /* Initialize logger */ + init_logger(backup_path, &instance_config.logger); + + /* command was initialized for a few commands */ + if (command) + { + elog_file(INFO, "command: %s", command); + + pfree(command); + command = NULL; + } + + if (find_my_exec(argv[0],(char *) PROGRAM_FULL_PATH) < 0) + { + PROGRAM_FULL_PATH = NULL; + elog(WARNING, "%s: could not find a full path to executable", PROGRAM_NAME); + } + + /* + * We have read pgdata path from command line or from configuration file. + * Ensure that pgdata is an absolute path. + */ + if (instance_config.pgdata != NULL) + canonicalize_path(instance_config.pgdata); + if (instance_config.pgdata != NULL && + !is_absolute_path(instance_config.pgdata)) + elog(ERROR, "-D, --pgdata must be an absolute path"); + +#if PG_VERSION_NUM >= 110000 + /* Check xlog-seg-size option */ + if (instance_name && + backup_subcmd != INIT_CMD && + backup_subcmd != ADD_INSTANCE_CMD && backup_subcmd != SET_CONFIG_CMD && + !IsValidWalSegSize(instance_config.xlog_seg_size)) + { + /* If we are working with instance of PG<11 using PG11 binary, + * then xlog_seg_size is equal to zero. Manually set it to 16MB. + */ + if (instance_config.xlog_seg_size == 0) + instance_config.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE; + else + elog(ERROR, "Invalid WAL segment size %u", instance_config.xlog_seg_size); + } +#endif + + /* Sanity check of --backup-id option */ + if (backup_id_string != NULL) + { + if (backup_subcmd != RESTORE_CMD && + backup_subcmd != VALIDATE_CMD && + backup_subcmd != DELETE_CMD && + backup_subcmd != MERGE_CMD && + backup_subcmd != SET_BACKUP_CMD && + backup_subcmd != SHOW_CMD) + elog(ERROR, "Cannot use -i (--backup-id) option together with the \"%s\" command", + command_name); + + current.backup_id = base36dec(backup_id_string); + if (current.backup_id == 0) + elog(ERROR, "Invalid backup-id \"%s\"", backup_id_string); + } + + if (!instance_config.conn_opt.pghost && instance_config.remote.host) + instance_config.conn_opt.pghost = instance_config.remote.host; + + /* Setup stream options. They are used in streamutil.c. */ + if (instance_config.conn_opt.pghost != NULL) + dbhost = gs_pstrdup(instance_config.conn_opt.pghost); + if (instance_config.conn_opt.pgport != NULL) + dbport = gs_pstrdup(instance_config.conn_opt.pgport); + if (instance_config.conn_opt.pguser != NULL) + dbuser = gs_pstrdup(instance_config.conn_opt.pguser); + + if (backup_subcmd == VALIDATE_CMD || backup_subcmd == RESTORE_CMD) + { + /* + * Parse all recovery target options into recovery_target_options + * structure. + */ + recovery_target_options = + parseRecoveryTargetOptions(target_time, target_xid, + target_inclusive, target_tli, target_lsn, + (target_stop != NULL) ? target_stop : + (target_immediate) ? "immediate" : NULL, + target_name, target_action); + + if (force && backup_subcmd != RESTORE_CMD) + elog(ERROR, "You cannot specify \"--force\" flag with the \"%s\" command", + command_name); + + if (force) + no_validate = true; + + /* keep all params in one structure */ + restore_params = pgut_new(pgRestoreParams); + restore_params->is_restore = (backup_subcmd == RESTORE_CMD); + restore_params->force = force; + restore_params->no_validate = no_validate; + restore_params->skip_block_validation = skip_block_validation; + restore_params->skip_external_dirs = skip_external_dirs; + restore_params->incremental_mode = incremental_mode; + } + + /* + * Parse set-backup options into set_backup_params structure. + */ + if (backup_subcmd == SET_BACKUP_CMD || backup_subcmd == BACKUP_CMD) + { + time_t expire_time = 0; + + if (expire_time_string && ttl >= 0) + elog(ERROR, "You cannot specify '--expire-time' and '--ttl' options together"); + + /* Parse string to seconds */ + if (expire_time_string) + { + if (!parse_time(expire_time_string, &expire_time, false)) + elog(ERROR, "Invalid value for '--expire-time' option: '%s'", + expire_time_string); + } + + if (expire_time > 0 || ttl >= 0 || backup_note) + { + set_backup_params = pgut_new(pgSetBackupParams); + set_backup_params->ttl = ttl; + set_backup_params->expire_time = expire_time; + set_backup_params->note = backup_note; + + if (backup_note && strlen(backup_note) > MAX_NOTE_SIZE) + elog(ERROR, "Backup note cannot exceed %u bytes", MAX_NOTE_SIZE); + } + } + + /* sanity */ + if (backup_subcmd == VALIDATE_CMD && restore_params->no_validate) + elog(ERROR, "You cannot specify \"--no-validate\" option with the \"%s\" command", + command_name); + + if (num_threads < 1) + num_threads = 1; + + if (batch_size < 1) + batch_size = 1; + + compress_init(); + + /* do actual operation */ + switch (backup_subcmd) + { + case ADD_INSTANCE_CMD: + return do_add_instance(&instance_config); + case DELETE_INSTANCE_CMD: + return do_delete_instance(); + case INIT_CMD: + return do_init(); + case BACKUP_CMD: + { + time_t start_time = time(NULL); + + current.stream = stream_wal; + + /* sanity */ + if (current.backup_mode == BACKUP_MODE_INVALID) + elog(ERROR, "required parameter not specified: BACKUP_MODE " + "(-b, --backup-mode)"); + + return do_backup(start_time, set_backup_params, no_validate, no_sync, backup_logs); + } + case RESTORE_CMD: + return do_restore_or_validate(current.backup_id, + recovery_target_options, + restore_params, no_sync); + case VALIDATE_CMD: + if (current.backup_id == 0 && target_time == 0 && target_xid == 0 && !target_lsn) + return do_validate_all(); + else + /* PITR validation and, optionally, partial validation */ + return do_restore_or_validate(current.backup_id, + recovery_target_options, + restore_params, + no_sync); + case SHOW_CMD: + return do_show(instance_name, current.backup_id, show_archive); + case DELETE_CMD: + if (delete_expired && backup_id_string) + elog(ERROR, "You cannot specify --delete-expired and (-i, --backup-id) options together"); + if (merge_expired && backup_id_string) + elog(ERROR, "You cannot specify --merge-expired and (-i, --backup-id) options together"); + if (delete_status && backup_id_string) + elog(ERROR, "You cannot specify --status and (-i, --backup-id) options together"); + if (!delete_expired && !merge_expired && !delete_wal && delete_status == NULL && !backup_id_string) + elog(ERROR, "You must specify at least one of the delete options: " + "--delete-expired |--delete-wal |--merge-expired |--status |(-i, --backup-id)"); + if (!backup_id_string) + { + if (delete_status) + do_delete_status(&instance_config, delete_status); + else + do_retention(); + } + else + do_delete(current.backup_id); + break; + case MERGE_CMD: + do_merge(current.backup_id); + break; + case SHOW_CONFIG_CMD: + do_show_config(); + break; + case SET_CONFIG_CMD: + do_set_config(false); + break; + case SET_BACKUP_CMD: + if (!backup_id_string) + elog(ERROR, "You must specify parameter (-i, --backup-id) for 'set-backup' command"); + do_set_backup(instance_name, current.backup_id, set_backup_params); + break; + case NO_CMD: + /* Should not happen */ + elog(ERROR, "Unknown subcommand"); + } + + return 0; +} + +static void +opt_incr_restore_mode(ConfigOption *opt, const char *arg) +{ + if (pg_strcasecmp(arg, "none") == 0) + { + incremental_mode = INCR_NONE; + return; + } + else if (pg_strcasecmp(arg, "checksum") == 0) + { + incremental_mode = INCR_CHECKSUM; + return; + } + else if (pg_strcasecmp(arg, "lsn") == 0) + { + incremental_mode = INCR_LSN; + return; + } + + elog(ERROR, "Invalid value for '--incremental-mode' option: '%s'", arg); +} + +static void +opt_backup_mode(ConfigOption *opt, const char *arg) +{ + current.backup_mode = parse_backup_mode(arg); +} + +static void +opt_show_format(ConfigOption *opt, const char *arg) +{ + const char *v = arg; + size_t len; + + while (IsSpace(*v)) + v++; + len = strlen(v); + + if (len > 0) + { + if (pg_strncasecmp("plain", v, len) == 0) + show_format = SHOW_PLAIN; + else if (pg_strncasecmp("json", v, len) == 0) + show_format = SHOW_JSON; + else + elog(ERROR, "Invalid show format \"%s\"", arg); + } + else + elog(ERROR, "Invalid show format \"%s\"", arg); +} + +/* + * Initialize compress and sanity checks for compress. + */ +static void +compress_init(void) +{ + /* Default algorithm is zlib */ + if (compress_shortcut) + instance_config.compress_alg = ZLIB_COMPRESS; + + if (backup_subcmd != SET_CONFIG_CMD) + { + if (instance_config.compress_level != COMPRESS_LEVEL_DEFAULT + && instance_config.compress_alg == NOT_DEFINED_COMPRESS) + elog(ERROR, "Cannot specify compress-level option alone without " + "compress-algorithm option"); + } + + if (instance_config.compress_level < 0 || instance_config.compress_level > 9) + elog(ERROR, "--compress-level value must be in the range from 0 to 9"); + + if (instance_config.compress_alg == ZLIB_COMPRESS && instance_config.compress_level == 0) + elog(WARNING, "Compression level 0 will lead to data bloat!"); + + if (backup_subcmd == BACKUP_CMD) + { +#ifndef HAVE_LIBZ + if (instance_config.compress_alg == ZLIB_COMPRESS) + elog(ERROR, "This build does not support zlib compression"); + else +#endif + if (instance_config.compress_alg == PGLZ_COMPRESS && num_threads > 1) + elog(ERROR, "Multithread backup does not support pglz compression"); + } +} diff --git a/src/bin/pg_probackup/pg_probackup.h b/src/bin/pg_probackup/pg_probackup.h new file mode 100644 index 000000000..8f450bccc --- /dev/null +++ b/src/bin/pg_probackup/pg_probackup.h @@ -0,0 +1,1123 @@ +/*------------------------------------------------------------------------- + * + * pg_probackup.h: Backup/Recovery manager for PostgreSQL. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2018, Postgres Professional + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PROBACKUP_H +#define PG_PROBACKUP_H + +#include "postgres_fe.h" +#include "libpq/libpq-fe.h" +#include "libpq/libpq-int.h" + +#include "access/xlog_internal.h" +#include "utils/pg_crc.h" + +#if PG_VERSION_NUM >= 120000 +#include "common/logging.h" +#endif + +#ifdef FRONTEND +#undef FRONTEND +#include "atomics.h" +#define FRONTEND +#else +#include "atomics.h" +#endif + +#include "configuration.h" +#include "logger.h" +#include "remote.h" +#include "parray.h" +#include "pgut.h" +#include "file.h" + +#include "datapagemap.h" +#include "thread.h" + +#ifdef WIN32 +#define __thread __declspec(thread) +#else +#include +#endif + +/* pgut client variables and full path */ +extern const char *PROGRAM_NAME; +extern const char *PROGRAM_NAME_FULL; +extern const char *PROGRAM_FULL_PATH; +extern const char *PROGRAM_URL; +extern const char *PROGRAM_EMAIL; + +//#define SUPPORT_MULTI_TIMELINE 0 + +/* Directory/File names */ +#define DATABASE_DIR "database" +#define BACKUPS_DIR "backups" +#define PG_XLOG_DIR "pg_xlog" +#define PG_LOG_DIR "pg_log" +#define PG_TBLSPC_DIR "pg_tblspc" +#define PG_GLOBAL_DIR "global" +#define BACKUP_CONTROL_FILE "backup.control" +#define BACKUP_CATALOG_CONF_FILE "pg_probackup.conf" +#define BACKUP_CATALOG_PID "backup.pid" +#define DATABASE_FILE_LIST "backup_content.control" +#define PG_BACKUP_LABEL_FILE "backup_label" +#define PG_TABLESPACE_MAP_FILE "tablespace_map" +#define EXTERNAL_DIR "external_directories/externaldir" +#define DATABASE_MAP "database_map" +#define HEADER_MAP "page_header_map" +#define HEADER_MAP_TMP "page_header_map_tmp" + +/* Timeout defaults */ +#define ARCHIVE_TIMEOUT_DEFAULT 300 + +/* Directory/File permission */ +#define DIR_PERMISSION (0700) +#define FILE_PERMISSION (0600) + +/* 64-bit xid support for PGPRO_EE */ +/*#ifndef PGPRO_EE +#define XID_FMT "%u" +#endif*/ + +#ifndef STDIN_FILENO +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#endif + +/* stdio buffer size */ +#define STDIO_BUFSIZE 65536 + +#define ERRMSG_MAX_LEN 2048 +#define CHUNK_SIZE (128 * 1024) +#define LARGE_CHUNK_SIZE (4 * 1024 * 1024) +#define OUT_BUF_SIZE (512 * 1024) + +/* retry attempts */ +#define PAGE_READ_ATTEMPTS 300 + +/* max size of note, that can be added to backup */ +#define MAX_NOTE_SIZE 1024 + +/* Check if an XLogRecPtr value is pointed to 0 offset */ +#define XRecOffIsNull(xlrp) \ + ((xlrp) % XLOG_BLCKSZ == 0) + +typedef struct RedoParams +{ + TimeLineID tli; + XLogRecPtr lsn; + uint32 checksum_version; +} RedoParams; + +typedef struct PageState +{ + uint16 checksum; + XLogRecPtr lsn; +} PageState; + +typedef struct db_map_entry +{ + Oid dbOid; + char *datname; +} db_map_entry; + +typedef enum IncrRestoreMode +{ + INCR_NONE, + INCR_CHECKSUM, + INCR_LSN +} IncrRestoreMode; + +typedef enum PartialRestoreType +{ + NONE, + INCLUDE, + EXCLUDE, +} PartialRestoreType; + +typedef enum CompressAlg +{ + NOT_DEFINED_COMPRESS = 0, + NONE_COMPRESS, + PGLZ_COMPRESS, + ZLIB_COMPRESS, +} CompressAlg; + +typedef enum ForkName +{ + vm, + fsm, + cfm, + init, + ptrack +} ForkName; + +#define INIT_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + INIT_CRC32C(crc); \ + else \ + INIT_TRADITIONAL_CRC32(crc); \ +} while (0) +#define COMP_FILE_CRC32(use_crc32c, crc, data, len) \ +do { \ + if (use_crc32c) \ + COMP_CRC32C((crc), (data), (len)); \ + else \ + COMP_TRADITIONAL_CRC32(crc, data, len); \ +} while (0) +#define FIN_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + FIN_CRC32C(crc); \ + else \ + FIN_TRADITIONAL_CRC32(crc); \ +} while (0) + + +/* Information about single file (or dir) in backup */ +typedef struct pgFile_t +{ + char *name; /* file or directory name */ + mode_t mode; /* protection (file type and permission) */ + size_t size; /* size of the file */ + time_t mtime; /* file st_mtime attribute, can be used only + during backup */ + size_t read_size; /* size of the portion read (if only some pages are + backed up, it's different from size) */ + int64 write_size; /* size of the backed-up file. BYTES_INVALID means + that the file existed but was not backed up + because not modified since last backup. */ + size_t uncompressed_size; /* size of the backed-up file before compression + * and adding block headers. + */ + /* we need int64 here to store '-1' value */ + pg_crc32 crc; /* CRC value of the file, regular file only */ + char *rel_path; /* relative path of the file */ + char *linked; /* path of the linked file */ + bool is_datafile; /* true if the file is PostgreSQL data file */ + Oid tblspcOid; /* tblspcOid extracted from path, if applicable */ + Oid dbOid; /* dbOid extracted from path, if applicable */ + Oid relOid; /* relOid extracted from path, if applicable */ + ForkName forkName; /* forkName extracted from path, if applicable */ + int segno; /* Segment number for ptrack */ + int n_blocks; /* number of blocks in the data file in data directory */ + bool is_cfs; /* Flag to distinguish files compressed by CFS*/ + bool is_database; /* Flag used strictly by ptrack 1.x backup */ + int external_dir_num; /* Number of external directory. 0 if not external */ + bool exists_in_prev; /* Mark files, both data and regular, that exists in previous backup */ + CompressAlg compress_alg; /* compression algorithm applied to the file */ + volatile pg_atomic_flag lock;/* lock for synchronization of parallel threads */ + datapagemap_t pagemap; /* bitmap of pages updated since previous backup + may take up to 16kB per file */ + bool pagemap_isabsent; /* Used to mark files with unknown state of pagemap, + * i.e. datafiles without _ptrack */ + /* Coordinates in header map */ + int n_headers; /* number of blocks in the data file in backup */ + pg_crc32 hdr_crc; /* CRC value of header file: name_hdr */ + off_t hdr_off; /* offset in header map */ + int hdr_size; /* offset in header map */ +} pgFile; + +typedef struct page_map_entry +{ + const char *path; /* file or directory name */ + char *pagemap; + size_t pagemapsize; +} page_map_entry; + +/* Special values of datapagemap_t bitmapsize */ +#define PageBitmapIsEmpty 0 /* Used to mark unchanged datafiles */ + +/* Current state of backup */ +typedef enum BackupStatus +{ + BACKUP_STATUS_INVALID, /* the pgBackup is invalid */ + BACKUP_STATUS_OK, /* completed backup */ + BACKUP_STATUS_ERROR, /* aborted because of unexpected error */ + BACKUP_STATUS_RUNNING, /* running backup */ + BACKUP_STATUS_MERGING, /* merging backups */ + BACKUP_STATUS_MERGED, /* backup has been successfully merged and now awaits + * the assignment of new start_time */ + BACKUP_STATUS_DELETING, /* data files are being deleted */ + BACKUP_STATUS_DELETED, /* data files have been deleted */ + BACKUP_STATUS_DONE, /* completed but not validated yet */ + BACKUP_STATUS_ORPHAN, /* backup validity is unknown but at least one parent backup is corrupted */ + BACKUP_STATUS_CORRUPT /* files are corrupted, not available */ +} BackupStatus; + +typedef enum BackupMode +{ + BACKUP_MODE_INVALID = 0, + BACKUP_MODE_DIFF_PTRACK, /* incremental page backup with ptrack system */ + BACKUP_MODE_FULL /* full backup */ +} BackupMode; + +typedef enum ShowFormat +{ + SHOW_PLAIN, + SHOW_JSON +} ShowFormat; + + +/* special values of pgBackup fields */ +#define INVALID_BACKUP_ID 0 /* backup ID is not provided by user */ +#define BYTES_INVALID (-1) /* file didn`t changed since previous backup, DELTA backup do not rely on it */ +#define FILE_NOT_FOUND (-2) /* file disappeared during backup */ +#define BLOCKNUM_INVALID (-1) +#define PROGRAM_VERSION "2.4.2" +#define AGENT_PROTOCOL_VERSION 20402 + + +typedef struct ConnectionOptions +{ + const char *pgdatabase; + const char *pghost; + const char *pgport; + const char *pguser; +} ConnectionOptions; + +typedef struct ConnectionArgs +{ + PGconn *conn; + PGcancel *cancel_conn; +} ConnectionArgs; + +/* Store values for --remote-* option for 'restore_command' constructor */ +typedef struct ArchiveOptions +{ + const char *host; + const char *port; + const char *user; +} ArchiveOptions; + +/* + * An instance configuration. It can be stored in a configuration file or passed + * from command line. + */ +typedef struct InstanceConfig +{ + char *name; + char arclog_path[MAXPGPATH]; + char backup_instance_path[MAXPGPATH]; + + uint64 system_identifier; + uint32 xlog_seg_size; + + char *pgdata; + char *external_dir_str; + + ConnectionOptions conn_opt; + + /* Wait timeout for WAL segment archiving */ + uint32 archive_timeout; + + /* cmdline to be used as restore_command */ + char *restore_command; + + /* Logger parameters */ + LoggerConfig logger; + + /* Remote access parameters */ + RemoteConfig remote; + + /* Retention options. 0 disables the option. */ + uint32 retention_redundancy; + uint32 retention_window; + uint32 wal_depth; + + CompressAlg compress_alg; + int compress_level; + + /* Archive description */ + ArchiveOptions archive; +} InstanceConfig; + +extern ConfigOption instance_options[]; +extern InstanceConfig instance_config; +extern time_t current_time; + +typedef struct PGNodeInfo +{ + uint32 block_size; + uint32 wal_block_size; + uint32 checksum_version; + bool is_superuser; + bool pgpro_support; + + int server_version; + char server_version_str[100]; +} PGNodeInfo; + +/* structure used for access to block header map */ +typedef struct HeaderMap +{ + char path[MAXPGPATH]; + char path_tmp[MAXPGPATH]; /* used only in merge */ + FILE *fp; /* used only for writing */ + char *buf; /* buffer */ + off_t offset; /* current position in fp */ + pthread_mutex_t mutex; + +} HeaderMap; + +typedef struct pgBackup pgBackup; + +/* Information about single backup stored in backup.conf */ +struct pgBackup +{ + BackupMode backup_mode; /* Mode - one of BACKUP_MODE_xxx above*/ + time_t backup_id; /* Identifier of the backup. + * Currently it's the same as start_time */ + BackupStatus status; /* Status - one of BACKUP_STATUS_xxx above*/ + TimeLineID tli; /* timeline of start and stop backup lsns */ + XLogRecPtr start_lsn; /* backup's starting transaction log location */ + XLogRecPtr stop_lsn; /* backup's finishing transaction log location */ + time_t start_time; /* since this moment backup has status + * BACKUP_STATUS_RUNNING */ + time_t merge_dest_backup; /* start_time of incremental backup, + * this backup is merging with. + * Only available for FULL backups + * with MERGING or MERGED statuses */ + time_t merge_time; /* the moment when merge was started or 0 */ + time_t end_time; /* the moment when backup was finished, or the moment + * when we realized that backup is broken */ + time_t recovery_time; /* Earliest moment for which you can restore + * the state of the database cluster using + * this backup */ + time_t expire_time; /* Backup expiration date */ + TransactionId recovery_xid; /* Earliest xid for which you can restore + * the state of the database cluster using + * this backup */ + /* + * Amount of raw data. For a full backup, this is the total amount of + * data while for a differential backup this is just the difference + * of data taken. + * BYTES_INVALID means nothing was backed up. + */ + int64 data_bytes; + /* Size of WAL files needed to replay on top of this + * backup to reach the consistency. + */ + int64 wal_bytes; + /* Size of data files before applying compression and block header, + * WAL files are not included. + */ + int64 uncompressed_bytes; + + /* Size of data files in PGDATA at the moment of backup. */ + int64 pgdata_bytes; + + CompressAlg compress_alg; + int compress_level; + + /* Fields needed for compatibility check */ + uint32 block_size; + uint32 wal_block_size; + uint32 checksum_version; + char program_version[100]; + char server_version[100]; + + bool stream; /* Was this backup taken in stream mode? + * i.e. does it include all needed WAL files? */ + time_t parent_backup; /* Identifier of the previous backup. + * Which is basic backup for this + * incremental backup. */ + pgBackup *parent_backup_link; + char *external_dir_str; /* List of external directories, + * separated by ':' */ + char *root_dir; /* Full path for root backup directory: + backup_path/instance_name/backup_id */ + char *database_dir; /* Full path to directory with data files: + backup_path/instance_name/backup_id/database */ + parray *files; /* list of files belonging to this backup + * must be populated explicitly */ + char *note; + + pg_crc32 content_crc; + + /* map used for access to page headers */ + HeaderMap hdr_map; +}; + +/* Recovery target for restore and validate subcommands */ +typedef struct pgRecoveryTarget +{ + time_t target_time; + /* add one more field in order to avoid deparsing target_time back */ + const char *time_string; + TransactionId target_xid; + /* add one more field in order to avoid deparsing target_xid back */ + const char *xid_string; + XLogRecPtr target_lsn; + /* add one more field in order to avoid deparsing target_lsn back */ + const char *lsn_string; + TimeLineID target_tli; + bool target_inclusive; + bool inclusive_specified; + const char *target_stop; + const char *target_name; + const char *target_action; +} pgRecoveryTarget; + +/* Options needed for restore and validate commands */ +typedef struct pgRestoreParams +{ + bool force; + bool is_restore; + bool no_validate; + bool skip_external_dirs; + bool skip_block_validation; //Start using it + const char *restore_command; + + /* options for incremental restore */ + IncrRestoreMode incremental_mode; + XLogRecPtr shift_lsn; +} pgRestoreParams; + +/* Options needed for set-backup command */ +typedef struct pgSetBackupParams +{ + int64 ttl; /* amount of time backup must be pinned + * -1 - do nothing + * 0 - disable pinning + */ + time_t expire_time; /* Point in time until backup + * must be pinned. + */ + char *note; +} pgSetBackupParams; + +typedef struct +{ + PGNodeInfo *nodeInfo; + + const char *from_root; + const char *to_root; + const char *external_prefix; + + parray *files_list; + parray *prev_filelist; + parray *external_dirs; + XLogRecPtr prev_start_lsn; + + ConnectionArgs conn_arg; + int thread_num; + HeaderMap *hdr_map; + + /* + * Return value from the thread. + * 0 means there is no error, 1 - there is an error. + */ + int ret; +} backup_files_arg; + + +typedef struct timelineInfo timelineInfo; + +/* struct to collect info about timelines in WAL archive */ +struct timelineInfo { + + TimeLineID tli; /* this timeline */ + TimeLineID parent_tli; /* parent timeline. 0 if none */ + timelineInfo *parent_link; /* link to parent timeline */ + XLogRecPtr switchpoint; /* if this timeline has a parent, then + * switchpoint contains switchpoint LSN, + * otherwise 0 */ + XLogSegNo begin_segno; /* first present segment in this timeline */ + XLogSegNo end_segno; /* last present segment in this timeline */ + size_t n_xlog_files; /* number of segments (only really existing) + * does not include lost segments */ + size_t size; /* space on disk taken by regular WAL files */ + parray *backups; /* array of pgBackup sturctures with info + * about backups belonging to this timeline */ + parray *xlog_filelist; /* array of ordinary WAL segments, '.partial' + * and '.backup' files belonging to this timeline */ + parray *lost_segments; /* array of intervals of lost segments */ + parray *keep_segments; /* array of intervals of segments used by WAL retention */ + pgBackup *closest_backup; /* link to valid backup, closest to timeline */ + pgBackup *oldest_backup; /* link to oldest backup on timeline */ + XLogRecPtr anchor_lsn; /* LSN belonging to the oldest segno to keep for 'wal-depth' */ + TimeLineID anchor_tli; /* timeline of anchor_lsn */ +}; + +typedef struct xlogInterval +{ + XLogSegNo begin_segno; + XLogSegNo end_segno; +} xlogInterval; + +typedef struct lsnInterval +{ + TimeLineID tli; + XLogRecPtr begin_lsn; + XLogRecPtr end_lsn; +} lsnInterval; + +typedef enum xlogFileType +{ + SEGMENT, + TEMP_SEGMENT, + PARTIAL_SEGMENT, + BACKUP_HISTORY_FILE +} xlogFileType; + +typedef struct xlogFile +{ + pgFile file; + XLogSegNo segno; + xlogFileType type; + bool keep; /* Used to prevent removal of WAL segments + * required by ARCHIVE backups. */ +} xlogFile; + + +/* + * When copying datafiles to backup we validate and compress them block + * by block. Thus special header is required for each data block. + */ +typedef struct BackupPageHeader +{ + BlockNumber block; /* block number */ + int32 compressed_size; +} BackupPageHeader; + +/* 4MB for 1GB file */ +typedef struct BackupPageHeader2 +{ + XLogRecPtr lsn; + int32 block; /* block number */ + int32 pos; /* position in backup file */ + uint16 checksum; +} BackupPageHeader2; + +/* Special value for compressed_size field */ +#define PageIsOk 0 +#define SkipCurrentPage -1 +#define PageIsTruncated -2 +#define PageIsCorrupted -3 /* used by checkdb */ + + +/* + * return pointer that exceeds the length of prefix from character string. + * ex. str="/xxx/yyy/zzz", prefix="/xxx/yyy", return="zzz". + * + * Deprecated. Do not use this in new code. + */ +#define GetRelativePath(str, prefix) \ + ((strlen(str) <= strlen(prefix)) ? "" : str + strlen(prefix) + 1) + +/* + * Return timeline, xlog ID and record offset from an LSN of the type + * 0/B000188, usual result from pg_stop_backup() and friends. + */ +#define XLogDataFromLSN(data, xlogid, xrecoff) \ + sscanf(data, "%X/%X", xlogid, xrecoff) + +#define IsCompressedXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".gz") == 0) + +#if PG_VERSION_NUM >= 110000 +#define GetXLogSegNo(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes) +#define GetXLogRecPtr(segno, offset, wal_segsz_bytes, dest) \ + XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest) +#define GetXLogFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFileName(fname, tli, logSegNo, wal_segsz_bytes) +#define IsInXLogSeg(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes) +#define GetXLogSegName(fname, logSegNo, wal_segsz_bytes) \ + snprintf(fname, 20, "%08X%08X", \ + (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes))) + +#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \ + logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg + +#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) +#else +#define GetXLogSegNo(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteToSeg(xlrp, logSegNo) +#define GetXLogRecPtr(segno, offset, wal_segsz_bytes, dest) \ + XLogSegNoOffsetToRecPtr(segno, offset, dest) +#define GetXLogFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFileName(fname, tli, logSegNo) +#define IsInXLogSeg(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteInSeg(xlrp, logSegNo) +#define GetXLogSegName(fname, logSegNo, wal_segsz_bytes) \ + snprintf(fname, 20, "%08X%08X",\ + (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId)) + +#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \ + logSegNo = (uint64) log * XLogSegmentsPerXLogId + seg + +#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFromFileName(fname, tli, logSegNo) +#endif + +#define IsPartialCompressXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz.partial") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".gz.partial") == 0) + +#define IsTempXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".part") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".part") == 0) + +#define IsTempCompressXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz.part") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".gz.part") == 0) + +#define IsSshProtocol() (instance_config.remote.host && strcmp(instance_config.remote.proto, "ssh") == 0) + +/* directory options */ +extern char *backup_path; +extern char backup_instance_path[MAXPGPATH]; +extern char arclog_path[MAXPGPATH]; + +/* common options */ +extern pid_t my_pid; +extern __thread int my_thread_num; +extern int num_threads; +extern bool stream_wal; +extern bool progress; +#if PG_VERSION_NUM >= 100000 +/* In pre-10 'replication_slot' is defined in receivelog.h */ +extern char *replication_slot; +#endif +extern bool temp_slot; + +/* backup options */ +extern bool smooth_checkpoint; + +/* remote probackup options */ +extern char* remote_agent; + +extern bool exclusive_backup; + +/* delete options */ +extern bool delete_wal; +extern bool delete_expired; +extern bool merge_expired; +extern bool dry_run; + +/* compression options */ +extern bool compress_shortcut; + +/* other options */ +extern char *instance_name; + +/* show options */ +extern ShowFormat show_format; + +extern bool skip_block_validation; +/* current settings */ +extern pgBackup current; + +/* argv of the process */ +extern char** commands_args; + +/* in dir.c */ +/* exclude directory list for $PGDATA file listing */ +extern const char *pgdata_exclude_dir[]; + +/* in backup.c */ +extern int do_backup(time_t start_time, pgSetBackupParams *set_backup_params, + bool no_validate, bool no_sync, bool backup_logs); +extern BackupMode parse_backup_mode(const char *value); +extern const char *deparse_backup_mode(BackupMode mode); +extern void process_block_change(ForkNumber forknum, RelFileNode rnode, + BlockNumber blkno); + +/* in restore.c */ +extern int do_restore_or_validate(time_t target_backup_id, + pgRecoveryTarget *rt, + pgRestoreParams *params, + bool no_sync); +extern bool satisfy_timeline(const parray *timelines, const pgBackup *backup); +extern bool satisfy_recovery_target(const pgBackup *backup, + const pgRecoveryTarget *rt); +extern pgRecoveryTarget *parseRecoveryTargetOptions( + const char *target_time, const char *target_xid, + const char *target_inclusive, TimeLineID target_tli, const char* target_lsn, + const char *target_stop, const char *target_name, + const char *target_action); + +extern parray *get_backup_filelist(pgBackup *backup, bool strict); +extern parray *read_timeline_history(const char *arclog_path, TimeLineID targetTLI, bool strict); +extern bool tliIsPartOfHistory(const parray *timelines, TimeLineID tli); + +/* in merge.c */ +extern void do_merge(time_t backup_id); +extern void merge_backups(pgBackup *backup, pgBackup *next_backup); +extern void merge_chain(parray *parent_chain, + pgBackup *full_backup, pgBackup *dest_backup); + +extern parray *read_database_map(pgBackup *backup); + +/* in init.c */ +extern int do_init(void); +extern int do_add_instance(InstanceConfig *instance); + +/* in configure.c */ +extern void do_show_config(void); +extern void do_set_config(bool missing_ok); +extern void init_config(InstanceConfig *config, const char *instance_name); +extern InstanceConfig *readInstanceConfigFile(const char *instance_name); + +/* in show.c */ +extern int do_show(const char *instance_name, time_t requested_backup_id, bool show_archive); + +/* in delete.c */ +extern void do_delete(time_t backup_id); +extern void delete_backup_files(pgBackup *backup); +extern void do_retention(void); +extern int do_delete_instance(void); +extern void do_delete_status(InstanceConfig *instance_config, const char *status); + +/* in fetch.c */ +extern char *slurpFile(const char *datadir, + const char *path, + size_t *filesize, + bool safe, + fio_location location); +extern char *fetchFile(PGconn *conn, const char *filename, size_t *filesize); + +/* in help.c */ +extern void help_pg_probackup(void); +extern void help_command(char *command); + +/* in validate.c */ +extern void pgBackupValidate(pgBackup* backup, pgRestoreParams *params); +extern int do_validate_all(void); +extern int validate_one_page(Page page, BlockNumber absolute_blkno, + XLogRecPtr stop_lsn, PageState *page_st, + uint32 checksum_version); + +/* return codes for validate_one_page */ +/* TODO: use enum */ +#define PAGE_IS_VALID (-1) +#define PAGE_IS_NOT_FOUND (-2) +#define PAGE_IS_ZEROED (-3) +#define PAGE_HEADER_IS_INVALID (-4) +#define PAGE_CHECKSUM_MISMATCH (-5) +#define PAGE_LSN_FROM_FUTURE (-6) + +/* in catalog.c */ +extern pgBackup *read_backup(const char *root_dir); +extern void write_backup(pgBackup *backup, bool strict); +extern void write_backup_status(pgBackup *backup, BackupStatus status, + const char *instance_name, bool strict); +extern void write_backup_data_bytes(pgBackup *backup); +extern bool lock_backup(pgBackup *backup, bool strict); + +extern const char *pgBackupGetBackupMode(pgBackup *backup); + +extern parray *catalog_get_instance_list(void); +extern parray *catalog_get_backup_list(const char *instance_name, time_t requested_backup_id); +extern void catalog_lock_backup_list(parray *backup_list, int from_idx, + int to_idx, bool strict); +extern pgBackup *catalog_get_last_data_backup(parray *backup_list, + TimeLineID tli, + time_t current_start_time); +extern pgBackup *get_multi_timeline_parent(parray *backup_list, parray *tli_list, + TimeLineID current_tli, time_t current_start_time, + InstanceConfig *instance); +extern void timelineInfoFree(void *tliInfo); +extern parray *catalog_get_timelines(InstanceConfig *instance); +extern void do_set_backup(const char *instance_name, time_t backup_id, + pgSetBackupParams *set_backup_params); +extern void pin_backup(pgBackup *target_backup, + pgSetBackupParams *set_backup_params); +extern void add_note(pgBackup *target_backup, char *note); +extern void pgBackupWriteControl(FILE *out, pgBackup *backup); +extern void write_backup_filelist(pgBackup *backup, parray *files, + const char *root, parray *external_list, bool sync); + +extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, + const char *subdir); +extern void pgBackupGetPath2(const pgBackup *backup, char *path, size_t len, + const char *subdir1, const char *subdir2); +extern void pgBackupGetPathInInstance(const char *instance_name, + const pgBackup *backup, char *path, size_t len, + const char *subdir1, const char *subdir2); +extern int pgBackupCreateDir(pgBackup *backup); +extern void pgNodeInit(PGNodeInfo *node); +extern void pgBackupInit(pgBackup *backup); +extern void pgBackupFree(void *backup); +extern int pgBackupCompareId(const void *f1, const void *f2); +extern int pgBackupCompareIdDesc(const void *f1, const void *f2); +extern int pgBackupCompareIdEqual(const void *l, const void *r); + +extern pgBackup* find_parent_full_backup(pgBackup *current_backup); +extern int scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup); +/* return codes for scan_parent_chain */ +#define ChainIsBroken 0 +#define ChainIsInvalid 1 +#define ChainIsOk 2 + +extern bool is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive); +extern bool is_prolific(parray *backup_list, pgBackup *target_backup); +extern int get_backup_index_number(parray *backup_list, pgBackup *backup); +extern void append_children(parray *backup_list, pgBackup *target_backup, parray *append_list); +extern bool launch_agent(void); +extern void launch_ssh(char* argv[]); +extern void wait_ssh(void); + +#define COMPRESS_ALG_DEFAULT NOT_DEFINED_COMPRESS +#define COMPRESS_LEVEL_DEFAULT 1 + +extern CompressAlg parse_compress_alg(const char *arg); +extern const char* deparse_compress_alg(int alg); + +/* in dir.c */ +extern void dir_list_file(parray *files, const char *root, bool exclude, + bool follow_symlink, bool add_root, bool backup_logs, + bool skip_hidden, int external_dir_num, fio_location location); + +extern void create_data_directories(parray *dest_files, + const char *data_dir, + const char *backup_dir, + bool extract_tablespaces, + bool incremental, + fio_location location); + +extern void read_tablespace_map(parray *files, const char *backup_dir); +extern void opt_tablespace_map(ConfigOption *opt, const char *arg); +extern void opt_externaldir_map(ConfigOption *opt, const char *arg); +extern void check_tablespace_mapping(pgBackup *backup, bool incremental, bool *tblspaces_are_empty); +extern void check_external_dir_mapping(pgBackup *backup, bool incremental); +extern char *get_external_remap(char *current_dir); + +extern void print_database_map(FILE *out, parray *database_list); +extern void write_database_map(pgBackup *backup, parray *database_list, + parray *backup_file_list); +extern void db_map_entry_free(void *map); + +extern void print_file_list(FILE *out, const parray *files, const char *root, + const char *external_prefix, parray *external_list); +extern parray *dir_read_file_list(const char *root, const char *external_prefix, + const char *file_txt, fio_location location, pg_crc32 expected_crc); +extern parray *make_external_directory_list(const char *colon_separated_dirs, + bool remap); +extern void free_dir_list(parray *list); +extern void makeExternalDirPathByNum(char *ret_path, const char *pattern_path, + const int dir_num); +extern bool backup_contains_external(const char *dir, parray *dirs_list); + +extern int dir_create_dir(const char *path, mode_t mode); +extern bool dir_is_empty(const char *path, fio_location location); + +extern bool fileExists(const char *path, fio_location location); +extern size_t pgFileSize(const char *path); + +extern pgFile *pgFileNew(const char *path, const char *rel_path, + bool follow_symlink, int external_dir_num, + fio_location location); +extern pgFile *pgFileInit(const char *rel_path); +extern void pgFileDelete(mode_t mode, const char *full_path); +extern void fio_pgFileDelete(pgFile *file, const char *full_path); + +extern void pgFileFree(void *file); + +extern pg_crc32 pgFileGetCRC(const char *file_path, bool missing_ok, bool use_crc32c); +extern pg_crc32 pgFileGetCRCgz(const char *file_path, bool missing_ok, bool use_crc32c); + +extern int pgFileMapComparePath(const void *f1, const void *f2); +extern int pgFileCompareName(const void *f1, const void *f2); +extern int pgFileCompareRelPathWithExternal(const void *f1, const void *f2); +extern int pgFileCompareRelPathWithExternalDesc(const void *f1, const void *f2); +extern int pgFileCompareLinked(const void *f1, const void *f2); +extern int pgFileCompareSize(const void *f1, const void *f2); +extern int pgCompareOid(const void *f1, const void *f2); + +/* in data.c */ +extern bool check_data_file(ConnectionArgs *arguments, pgFile *file, + const char *from_fullpath, uint32 checksum_version); + +extern void backup_data_file(ConnectionArgs* conn_arg, pgFile *file, + const char *from_fullpath, const char *to_fullpath, + XLogRecPtr prev_backup_start_lsn, BackupMode backup_mode, + CompressAlg calg, int clevel, uint32 checksum_version, + HeaderMap *hdr_map, bool missing_ok); +extern void backup_non_data_file(pgFile *file, pgFile *prev_file, + const char *from_fullpath, const char *to_fullpath, + BackupMode backup_mode, time_t parent_backup_time, + bool missing_ok); +extern void backup_non_data_file_internal(const char *from_fullpath, + fio_location from_location, + const char *to_fullpath, pgFile *file, + bool missing_ok); + +extern size_t restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, + const char *to_fullpath, bool use_bitmap, PageState *checksum_map, + XLogRecPtr shift_lsn, datapagemap_t *lsn_map, bool use_headers); +extern size_t restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint32 backup_version, + const char *from_fullpath, const char *to_fullpath, int nblocks, + datapagemap_t *map, PageState *checksum_map, int checksum_version, + datapagemap_t *lsn_map, BackupPageHeader2 *headers); +extern size_t restore_non_data_file(parray *parent_chain, pgBackup *dest_backup, + pgFile *dest_file, FILE *out, const char *to_fullpath, + bool already_exists); +extern void restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file, + const char *from_fullpath, const char *to_fullpath); +extern bool create_empty_file(fio_location from_location, const char *to_root, + fio_location to_location, pgFile *file); + +extern PageState *get_checksum_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr dest_stop_lsn, BlockNumber segmentno); +extern datapagemap_t *get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr shift_lsn, BlockNumber segmentno); +extern pid_t check_postmaster(const char *pgdata); + +extern bool validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version, HeaderMap *hdr_map); + +extern BackupPageHeader2* get_data_file_headers(HeaderMap *hdr_map, pgFile *file, uint32 backup_version, bool strict); +extern void write_page_headers(BackupPageHeader2 *headers, pgFile *file, HeaderMap *hdr_map, bool is_merge); +extern void init_header_map(pgBackup *backup); +extern void cleanup_header_map(HeaderMap *hdr_map); +/* parsexlog.c */ +extern bool extractPageMap(const char *archivedir, uint32 wal_seg_size, + XLogRecPtr startpoint, TimeLineID start_tli, + XLogRecPtr endpoint, TimeLineID end_tli, + parray *tli_list); +extern void validate_wal(pgBackup *backup, const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, TimeLineID tli, + uint32 seg_size); +extern bool validate_wal_segment(TimeLineID tli, XLogSegNo segno, + const char *prefetch_dir, uint32 wal_seg_size); +extern bool read_recovery_info(const char *archivedir, TimeLineID tli, + uint32 seg_size, + XLogRecPtr start_lsn, XLogRecPtr stop_lsn, + time_t *recovery_time); +extern bool wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, + TimeLineID target_tli, uint32 seg_size); +extern XLogRecPtr get_prior_record_lsn(const char *archivedir, XLogRecPtr start_lsn, + XLogRecPtr stop_lsn, TimeLineID tli, + bool seek_prev_segment, uint32 seg_size); + +extern XLogRecPtr get_first_record_lsn(const char *archivedir, XLogRecPtr start_lsn, + TimeLineID tli, uint32 wal_seg_size, int timeout); +extern XLogRecPtr get_next_record_lsn(const char *archivedir, XLogSegNo segno, TimeLineID tli, + uint32 wal_seg_size, int timeout, XLogRecPtr target); + +/* in util.c */ +extern TimeLineID get_current_timeline(PGconn *conn); +extern TimeLineID get_current_timeline_from_control(bool safe); +extern XLogRecPtr get_checkpoint_location(PGconn *conn); +extern uint64 get_system_identifier(const char *pgdata_path); +extern uint64 get_remote_system_identifier(PGconn *conn); +extern uint32 get_data_checksum_version(bool safe); +extern pg_crc32c get_pgcontrol_checksum(const char *pgdata_path); +extern uint32 get_xlog_seg_size(char *pgdata_path); +extern void get_redo(const char *pgdata_path, RedoParams *redo); +extern void set_min_recovery_point(pgFile *file, const char *backup_path, + XLogRecPtr stop_backup_lsn); +extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, + const char *to_fullpath, fio_location to_location, pgFile *file); + +extern void time2iso(char *buf, size_t len, time_t time); +extern const char *status2str(BackupStatus status); +extern BackupStatus str2status(const char *status); +extern const char *base36enc(long unsigned int value); +extern char *base36enc_dup(long unsigned int value); +extern long unsigned int base36dec(const char *text); +extern uint32 parse_server_version(const char *server_version_str); +extern uint32 parse_program_version(const char *program_version); +extern bool parse_page(Page page, XLogRecPtr *lsn); +extern int32 do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, + CompressAlg alg, int level, const char **errormsg); +extern int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, + CompressAlg alg, const char **errormsg); + +extern void pretty_size(int64 size, char *buf, size_t len); +extern void pretty_time_interval(double time, char *buf, size_t len); + +extern PGconn *pgdata_basic_setup(ConnectionOptions conn_opt, PGNodeInfo *nodeInfo); +extern void check_system_identifiers(PGconn *conn, char *pgdata); +extern void parse_filelist_filenames(parray *files, const char *root); + +/* in ptrack.c */ +extern void make_pagemap_from_ptrack(parray* files, + PGconn* backup_conn, + XLogRecPtr lsn); +extern XLogRecPtr get_last_ptrack_lsn(PGconn *backup_conn, PGNodeInfo *nodeInfo); +extern parray * pg_ptrack_get_pagemapset(PGconn *backup_conn, XLogRecPtr lsn); + +/* open local file to writing */ +extern FILE* open_local_file_rw(const char *to_fullpath, char **out_buf, uint32 buf_size); + +extern int send_pages(ConnectionArgs* conn_arg, const char *to_fullpath, const char *from_fullpath, + pgFile *file, XLogRecPtr prev_backup_start_lsn, CompressAlg calg, int clevel, + uint32 checksum_version, bool use_pagemap, BackupPageHeader2 **headers, + BackupMode backup_mode); + +/* FIO */ +extern void fio_delete(mode_t mode, const char *fullpath, fio_location location); +extern int fio_send_pages(const char *to_fullpath, const char *from_fullpath, pgFile *file, + XLogRecPtr horizonLsn, int calg, int clevel, uint32 checksum_version, + bool use_pagemap, BlockNumber *err_blknum, char **errormsg, + BackupPageHeader2 **headers); +/* return codes for fio_send_pages */ +extern int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, char **errormsg); +extern int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, + pgFile *file, char **errormsg); + +extern void fio_list_dir(parray *files, const char *root, bool exclude, bool follow_symlink, + bool add_root, bool backup_logs, bool skip_hidden, int external_dir_num); + +extern bool pgut_rmtree(const char *path, bool rmtopdir, bool strict); + +extern PageState *fio_get_checksum_map(const char *fullpath, uint32 checksum_version, int n_blocks, + XLogRecPtr dest_stop_lsn, BlockNumber segmentno, fio_location location); + +extern datapagemap_t *fio_get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr horizonLsn, BlockNumber segmentno, + fio_location location); +extern pid_t fio_check_postmaster(const char *pgdata, fio_location location); + +extern int32 fio_decompress(void* dst, void const* src, size_t size, int compress_alg); + +/* return codes for fio_send_pages() and fio_send_file() */ +#define SEND_OK (0) +#define FILE_MISSING (-1) +#define OPEN_FAILED (-2) +#define READ_FAILED (-3) +#define WRITE_FAILED (-4) +#define ZLIB_ERROR (-5) +#define REMOTE_ERROR (-6) +#define PAGE_CORRUPTION (-8) + +/* Check if specified location is local for current node */ +extern bool fio_is_remote(fio_location location); +extern bool fio_is_remote_simple(fio_location location); + +extern void get_header_errormsg(Page page, char **errormsg); +extern void get_checksum_errormsg(Page page, char **errormsg, + BlockNumber absolute_blkno); + +extern bool +datapagemap_is_set(datapagemap_t *map, BlockNumber blkno); + +extern void +datapagemap_print_debug(datapagemap_t *map); + +void *gs_palloc0(Size size); +char *gs_pstrdup(const char *in); +void *gs_repalloc(void *pointer, Size size); + +#endif /* PG_PROBACKUP_H */ diff --git a/src/bin/pg_probackup/pgut.cpp b/src/bin/pg_probackup/pgut.cpp new file mode 100644 index 000000000..3cfbdc842 --- /dev/null +++ b/src/bin/pg_probackup/pgut.cpp @@ -0,0 +1,1546 @@ +/*------------------------------------------------------------------------- + * + * pgut.c + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" +#include "postgres_fe.h" + +#include "getopt_long.h" +#include "libpq/libpq-fe.h" +#include "libpq/pqsignal.h" +#include "libpq/pqexpbuffer.h" +#include "common/fe_memutils.h" + +#include + +#include "pgut.h" +#include "logger.h" +#include "file.h" + + +static char *password = NULL; +bool prompt_password = true; +bool force_password = false; + +/* Database connections */ +static PGcancel *volatile cancel_conn = NULL; + +/* Interrupted by SIGINT (Ctrl+C) ? */ +bool interrupted = false; +bool in_cleanup = false; +bool in_password = false; + +/* Connection routines */ +static void init_cancel_handler(void); +static void on_before_exec(PGconn *conn, PGcancel *thread_cancel_conn); +static void on_after_exec(PGcancel *thread_cancel_conn); +static void on_interrupt(void); +static void on_cleanup(void); +static pqsigfunc oldhandler = NULL; + +static char ** pgut_pgfnames(const char *path, bool strict); +static void pgut_pgfnames_cleanup(char **filenames); + +void discard_response(PGconn *conn); + +#define DefaultHost "localhost" +#define DefaultTty "" +#define DefaultOption "" +#define DefaultAuthtype "" +#define DefaultTargetSessionAttrs "any" +#ifdef USE_SSL +#define DefaultSSLMode "prefer" +#else +#define DefaultSSLMode "disable" +#endif +#ifdef ENABLE_GSS +#define DefaultGSSMode "prefer" +#else +#define DefaultGSSMode "disable" +#endif + +typedef struct _internalPQconninfoOption +{ + char *keyword; /* The keyword of the option */ + char *envvar; /* Fallback environment variable name */ + char *compiled; /* Fallback compiled in default value */ + char *val; /* Option's current value, or NULL */ + char *label; /* Label for field in connect dialog */ + char *dispchar; /* Indicates how to display this field in a */ + int dispsize; /* Field size in characters for dialog */ + off_t connofs; /* Offset into PGconn struct, -1 if not there */ +} internalPQconninfoOption; + +static const internalPQconninfoOption PQconninfoOptions[] = { + /* + * * "authtype" is no longer used, so mark it "don't show". We keep it in + * * the array so as not to reject conninfo strings from old apps that might + * * still try to set it. + * */ + {(char *)"authtype", (char *)"PGAUTHTYPE", (char *)DefaultAuthtype, NULL, + (char *)"Database-Authtype", (char *)"D", 20, -1}, + + {(char *)"service", (char *)"PGSERVICE", NULL, NULL, + (char *)"Database-Service", (char *)"", 20, -1}, + + {(char *)"user", (char *)"PGUSER", NULL, NULL, + (char *)"Database-User", (char *)"", 20, + offsetof(struct pg_conn, pguser)}, + + {(char *)"password", (char *)"PGPASSWORD", NULL, NULL, + (char *)"Database-Password", (char *)"*", 20, + offsetof(struct pg_conn, pgpass)}, + + + {(char *)"connect_timeout", (char *)"PGCONNECT_TIMEOUT", NULL, NULL, + (char *)"Connect-timeout", (char *)"", 10, /* strlen(INT32_MAX) == 10 */ + offsetof(struct pg_conn, connect_timeout)}, + + {(char *)"dbname", (char *)"PGDATABASE", NULL, NULL, + (char *)"Database-Name", (char *)"", 20, + offsetof(struct pg_conn, dbName)}, + + {(char *)"host", (char *)"PGHOST", NULL, NULL, + (char *)"Database-Host", (char *)"", 40, + offsetof(struct pg_conn, pghost)}, + + {(char *)"hostaddr", (char *)"PGHOSTADDR", NULL, NULL, + (char *)"Database-Host-IP-Address", (char *)"", 45, + offsetof(struct pg_conn, pghostaddr)}, + + {(char *)"port", (char *)"PGPORT", (char *)DEF_PGPORT_STR, NULL, + (char *)"Database-Port", (char *)"", 6, + offsetof(struct pg_conn, pgport)}, + + {(char *)"client_encoding", (char *)"PGCLIENTENCODING", NULL, NULL, + (char *)"Client-Encoding", (char *)"", 10, + offsetof(struct pg_conn, client_encoding_initial)}, + + /* + * * "tty" is no longer used either, but keep it present for backwards + * * compatibility. + * */ + {(char *)"tty", (char *)"PGTTY", (char *)DefaultTty, NULL, + (char *)"Backend-Debug-TTY", (char *)"D", 40, + offsetof(struct pg_conn, pgtty)}, + + {(char *)"options", (char *)"PGOPTIONS", (char *)DefaultOption, NULL, + (char *)"Backend-Options", (char *)"", 40, + offsetof(struct pg_conn, pgoptions)}, + + {(char *)"application_name", (char *)"PGAPPNAME", NULL, NULL, + (char *)"Application-Name", (char *)"", 64, + offsetof(struct pg_conn, appname)}, + + {(char *)"fallback_application_name", NULL, NULL, NULL, + (char *)"Fallback-Application-Name", (char *)"", 64, + offsetof(struct pg_conn, fbappname)}, + + {(char *)"keepalives", NULL, NULL, NULL, + (char *)"TCP-Keepalives", (char *)"", 1, /* should be just '0' or '1' */ + offsetof(struct pg_conn, keepalives)}, + + {(char *)"keepalives_idle", NULL, NULL, NULL, + (char *)"TCP-Keepalives-Idle", (char *)"", 10, /* strlen(INT32_MAX) == 10 */ + offsetof(struct pg_conn, keepalives_idle)}, + + {(char *)"keepalives_interval", NULL, NULL, NULL, + (char *)"TCP-Keepalives-Interval", (char *)"", 10, /* strlen(INT32_MAX) == 10 */ + offsetof(struct pg_conn, keepalives_interval)}, + + {(char *)"keepalives_count", NULL, NULL, NULL, + (char *)"TCP-Keepalives-Count", (char *)"", 10, /* strlen(INT32_MAX) == 10 */ + offsetof(struct pg_conn, keepalives_count)}, + + /* + * * ssl options are allowed even without client SSL support because the + * * client can still handle SSL modes "disable" and "allow". Other + * * parameters have no effect on non-SSL connections, so there is no reason + * * to exclude them since none of them are mandatory. + * */ + {(char *)"sslmode", (char *)"PGSSLMODE", (char *)DefaultSSLMode, NULL, + (char *)"SSL-Mode", (char *)"", 12, /* sizeof("verify-full") == 12 */ + offsetof(struct pg_conn, sslmode)}, + + {(char *)"sslcompression", (char *)"PGSSLCOMPRESSION", (char *)"0", NULL, + (char *)"SSL-Compression", (char *)"", 1, + offsetof(struct pg_conn, sslcompression)}, + + {(char *)"sslcert", (char *)"PGSSLCERT", NULL, NULL, + (char *)"SSL-Client-Cert", (char *)"", 64, + offsetof(struct pg_conn, sslcert)}, + + {(char *)"sslkey", (char *)"PGSSLKEY", NULL, NULL, + (char *)"SSL-Client-Key", (char *)"", 64, + offsetof(struct pg_conn, sslkey)}, + + {(char *)"sslrootcert", (char *)"PGSSLROOTCERT", NULL, NULL, + (char *)"SSL-Root-Certificate", (char *)"", 64, + offsetof(struct pg_conn, sslrootcert)}, + + {(char *)"sslcrl", (char *)"PGSSLCRL", NULL, NULL, + (char *)"SSL-Revocation-List", (char *)"", 64, + offsetof(struct pg_conn, sslcrl)}, + + {(char *)"requirepeer", (char *)"PGREQUIREPEER", NULL, NULL, + (char *)"Require-Peer", (char *)"", 10, + offsetof(struct pg_conn, requirepeer)}, + + /* + * * As with SSL, all GSS options are exposed even in builds that don't have + * * support. + * */ + + /* Kerberos and GSSAPI authentication support specifying the service name */ + {(char *)"krbsrvname", (char *)"PGKRBSRVNAME", (char *)PG_KRB_SRVNAM, NULL, + (char *)"Kerberos-service-name", (char *)"", 20, + offsetof(struct pg_conn, krbsrvname)}, + + + {(char *)"replication", NULL, NULL, NULL, + (char *)"Replication", (char *)"D", 5, + offsetof(struct pg_conn, replication)}, + + + /* Terminating entry --- MUST BE LAST */ + {NULL, NULL, NULL, NULL, + NULL, NULL, 0} +}; + + +/* + * * Build a working copy of the constant PQconninfoOptions array. + * */ +static PQconninfoOption * +conninfo_init(PQExpBuffer errorMessage) +{ + PQconninfoOption *options; + PQconninfoOption *opt_dest; + const internalPQconninfoOption *cur_opt; + + /* + * * Get enough memory for all options in PQconninfoOptions, even if some + * * end up being filtered out. + * */ + options = (PQconninfoOption *) malloc(sizeof(PQconninfoOption) * sizeof(PQconninfoOptions) / sizeof(PQconninfoOptions[0])); + if (options == NULL) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("out of memory\n")); + return NULL; + } + opt_dest = options; + + for (cur_opt = PQconninfoOptions; cur_opt->keyword; cur_opt++) + { + /* Only copy the public part of the struct, not the full internal */ + memcpy(opt_dest, cur_opt, sizeof(PQconninfoOption)); + opt_dest++; + } + MemSet(opt_dest, 0, sizeof(PQconninfoOption)); + + return options; +} + +static bool +get_hexdigit(char digit, int *value) +{ + if ('0' <= digit && digit <= '9') + *value = digit - '0'; + else if ('A' <= digit && digit <= 'F') + *value = digit - 'A' + 10; + else if ('a' <= digit && digit <= 'f') + *value = digit - 'a' + 10; + else + return false; + + return true; +} + + +static char * +conninfo_uri_decode(const char *str, PQExpBuffer errorMessage) +{ + char *buf; + char *p; + const char *q = str; + + buf = (char *)malloc(strlen(str) + 1); + if (buf == NULL) + { + printfPQExpBuffer(errorMessage, libpq_gettext("out of memory\n")); + return NULL; + } + p = buf; + + for (;;) + { + if (*q != '%') + { + /* copy and check for NUL terminator */ + if (!(*(p++) = *(q++))) + break; + } + else + { + int hi; + int lo; + int c; + + ++q; /* skip the percent sign itself */ + + /* + * * Possible EOL will be caught by the first call to + * * get_hexdigit(), so we never dereference an invalid q pointer. + * */ + if (!(get_hexdigit(*q++, &hi) && get_hexdigit(*q++, &lo))) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("invalid percent-encoded token: \"%s\"\n"), + str); + free(buf); + return NULL; + } + + c = (hi << 4) | lo; + if (c == 0) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("forbidden value %%00 in percent-encoded value: \"%s\"\n"), + str); + free(buf); + return NULL; + } + *(p++) = c; + } + } + + return buf; +} + + +static PQconninfoOption * +conninfo_find(PQconninfoOption *connOptions, const char *keyword) +{ + PQconninfoOption *option; + + for (option = connOptions; option->keyword != NULL; option++) + { + if (strcmp(option->keyword, keyword) == 0) + return option; + } + + return NULL; +} + + +static PQconninfoOption * +conninfo_storeval(PQconninfoOption *connOptions, + const char *keyword, const char *value, + PQExpBuffer errorMessage, bool ignoreMissing, + bool uri_decode) +{ + PQconninfoOption *option; + char *value_copy; + + /* + * * For backwards compatibility, requiressl=1 gets translated to + * * sslmode=require, and requiressl=0 gets translated to sslmode=prefer + * * (which is the default for sslmode). + * */ + if (strcmp(keyword, "requiressl") == 0) + { + keyword = "sslmode"; + if (value[0] == '1') + value = "require"; + else + value = "prefer"; + } + + option = conninfo_find(connOptions, keyword); + if (option == NULL) + { + if (!ignoreMissing) + printfPQExpBuffer(errorMessage, + libpq_gettext("invalid connection option \"%s\"\n"), + keyword); + return NULL; + } + + if (uri_decode) + { + value_copy = conninfo_uri_decode(value, errorMessage); + if (value_copy == NULL) + /* conninfo_uri_decode already set an error message */ + return NULL; + } + else + { + value_copy = strdup(value); + if (value_copy == NULL) + { + printfPQExpBuffer(errorMessage, libpq_gettext("out of memory\n")); + return NULL; + } + } + + if (option->val) + free(option->val); + option->val = value_copy; + + return option; +} + + +/* + * * Return the connection options used for the connection + * */ +PQconninfoOption * +PQconninfo(PGconn *conn) +{ + PQExpBufferData errorBuf; + PQconninfoOption *connOptions; + + if (conn == NULL) + return NULL; + + /* We don't actually report any errors here, but callees want a buffer */ + initPQExpBuffer(&errorBuf); + if (PQExpBufferDataBroken(errorBuf)) + return NULL; /* out of memory already :-( */ + + connOptions = conninfo_init(&errorBuf); + + if (connOptions != NULL) + { + const internalPQconninfoOption *option; + + for (option = PQconninfoOptions; option->keyword; option++) + { + char **connmember; + + if (option->connofs < 0) + continue; + + connmember = (char **) ((char *) conn + option->connofs); + + if (*connmember) + conninfo_storeval(connOptions, option->keyword, *connmember, + &errorBuf, true, false); + } + } + + termPQExpBuffer(&errorBuf); + + return connOptions; +} + +void +pgut_init(void) +{ + init_cancel_handler(); + atexit(on_cleanup); +} + +/* + * Ask the user for a password; 'username' is the username the + * password is for, if one has been explicitly specified. + * Set malloc'd string to the global variable 'password'. + */ +static void +prompt_for_password(const char *username) +{ + in_password = true; + + if (password) + { + free(password); + password = NULL; + } + +#if PG_VERSION_NUM >= 100000 + password = (char *) pgut_malloc(sizeof(char) * 100 + 1); + if (username == NULL) + simple_prompt("Password: ", password, 100, false); + else + { + char message[256]; + snprintf(message, lengthof(message), "Password for user %s: ", username); + simple_prompt(message, password, 100, false); + } +#else + if (username == NULL) + password = simple_prompt("Password: ", 100, false); + else + { + char message[256]; + snprintf(message, lengthof(message), "Password for user %s: ", username); + password = simple_prompt(message, 100, false); + } +#endif + + in_password = false; +} + +/* + * Copied from pg_basebackup.c + * Escape a parameter value so that it can be used as part of a libpq + * connection string, e.g. in: + * + * application_name= + * + * The returned string is malloc'd. Return NULL on out-of-memory. + */ +static char * +escapeConnectionParameter(const char *src) +{ + bool need_quotes = false; + bool need_escaping = false; + const char *p; + char *dstbuf; + char *dst; + + /* + * First check if quoting is needed. Any quote (') or backslash (\) + * characters need to be escaped. Parameters are separated by whitespace, + * so any string containing whitespace characters need to be quoted. An + * empty string is represented by ''. + */ + if (strchr(src, '\'') != NULL || strchr(src, '\\') != NULL) + need_escaping = true; + + for (p = src; *p; p++) + { + if (isspace((unsigned char) *p)) + { + need_quotes = true; + break; + } + } + + if (*src == '\0') + return pg_strdup("''"); + + if (!need_quotes && !need_escaping) + return pg_strdup(src); /* no quoting or escaping needed */ + + /* + * Allocate a buffer large enough for the worst case that all the source + * characters need to be escaped, plus quotes. + */ + dstbuf = (char *)pg_malloc(strlen(src) * 2 + 2 + 1); + + dst = dstbuf; + if (need_quotes) + *(dst++) = '\''; + for (; *src; src++) + { + if (*src == '\'' || *src == '\\') + *(dst++) = '\\'; + *(dst++) = *src; + } + if (need_quotes) + *(dst++) = '\''; + *dst = '\0'; + + return dstbuf; +} + +/* TODO: it is better to use PQconnectdbParams like in psql + * It will allow to set application_name for pg_probackup + */ +PGconn * +pgut_connect(const char *host, const char *port, + const char *dbname, const char *username) +{ + PGconn *conn; + + if (interrupted && !in_cleanup) + elog(ERROR, "interrupted"); + + if (force_password && !prompt_password) + elog(ERROR, "You cannot specify --password and --no-password options together"); + + if (!password && force_password) + prompt_for_password(username); + + /* Start the connection. Loop until we have a password if requested by backend. */ + for (;;) + { + conn = PQsetdbLogin(host, port, NULL, NULL, + dbname, username, password); + + if (PQstatus(conn) == CONNECTION_OK) + { + pgut_atexit_push(pgut_disconnect_callback, conn); + return conn; + } + + if (conn && PQconnectionNeedsPassword(conn) && prompt_password) + { + PQfinish(conn); + prompt_for_password(username); + + if (interrupted) + elog(ERROR, "interrupted"); + + if (password == NULL || password[0] == '\0') + elog(ERROR, "no password supplied"); + + continue; + } + elog(ERROR, "could not connect to database %s: %s", + dbname, PQerrorMessage(conn)); + + PQfinish(conn); + return NULL; + } +} + +PGconn * +pgut_connect_replication(const char *host, const char *port, + const char *dbname, const char *username) +{ + PGconn *tmpconn; + int argcount = 7; /* dbname, replication, fallback_app_name, + * host, user, port, password */ + int i; + const char **keywords; + const char **values; + + if (interrupted && !in_cleanup) + elog(ERROR, "interrupted"); + + if (force_password && !prompt_password) + elog(ERROR, "You cannot specify --password and --no-password options together"); + + if (!password && force_password) + prompt_for_password(username); + + i = 0; + + keywords = (const char**)pg_malloc0((argcount + 1) * sizeof(*keywords)); + values = (const char**)pg_malloc0((argcount + 1) * sizeof(*values)); + + + keywords[i] = "dbname"; + values[i] = "replication"; + i++; + keywords[i] = "replication"; + values[i] = "true"; + i++; + keywords[i] = "fallback_application_name"; + values[i] = PROGRAM_NAME; + i++; + + if (host) + { + keywords[i] = "host"; + values[i] = host; + i++; + } + if (username) + { + keywords[i] = "user"; + values[i] = username; + i++; + } + if (port) + { + keywords[i] = "port"; + values[i] = port; + i++; + } + + /* Use (or reuse, on a subsequent connection) password if we have it */ + if (password) + { + keywords[i] = "password"; + values[i] = password; + } + else + { + keywords[i] = NULL; + values[i] = NULL; + } + + for (;;) + { + tmpconn = PQconnectdbParams(keywords, values, true); + + + if (PQstatus(tmpconn) == CONNECTION_OK) + { + free(values); + free(keywords); + return tmpconn; + } + + if (tmpconn && PQconnectionNeedsPassword(tmpconn) && prompt_password) + { + PQfinish(tmpconn); + prompt_for_password(username); + keywords[i] = "password"; + values[i] = password; + continue; + } + + elog(ERROR, "could not connect to database %s: %s", + dbname, PQerrorMessage(tmpconn)); + PQfinish(tmpconn); + free(values); + free(keywords); + return NULL; + } +} + + +void +pgut_disconnect(PGconn *conn) +{ + if (conn) + PQfinish(conn); + pgut_atexit_pop(pgut_disconnect_callback, conn); +} + + +PGresult * +pgut_execute_parallel(PGconn* conn, + PGcancel* thread_cancel_conn, const char *query, + int nParams, const char **params, + bool text_result, bool ok_error, bool async) +{ + PGresult *res; + + if (interrupted && !in_cleanup) + elog(ERROR, "interrupted"); + + /* write query to elog if verbose */ + if (logger_config.log_level_console <= VERBOSE || + logger_config.log_level_file <= VERBOSE) + { + int i; + + if (strchr(query, '\n')) + elog(VERBOSE, "(query)\n%s", query); + else + elog(VERBOSE, "(query) %s", query); + for (i = 0; i < nParams; i++) + elog(VERBOSE, "\t(param:%d) = %s", i, params[i] ? params[i] : "(null)"); + } + + if (conn == NULL) + { + elog(ERROR, "not connected"); + return NULL; + } + + //on_before_exec(conn, thread_cancel_conn); + if (async) + { + /* clean any old data */ + discard_response(conn); + + if (nParams == 0) + PQsendQuery(conn, query); + else + PQsendQueryParams(conn, query, nParams, NULL, params, NULL, NULL, + /* + * Specify zero to obtain results in text format, + * or one to obtain results in binary format. + */ + (text_result) ? 0 : 1); + + /* wait for processing, TODO: timeout */ + for (;;) + { + if (interrupted) + { + pgut_cancel(conn); + pgut_disconnect(conn); + elog(ERROR, "interrupted"); + } + + if (!PQconsumeInput(conn)) + elog(ERROR, "query failed: %s query was: %s", + PQerrorMessage(conn), query); + + /* query is no done */ + if (!PQisBusy(conn)) + break; + + usleep(10000); + } + + res = PQgetResult(conn); + } + else + { + if (nParams == 0) + res = PQexec(conn, query); + else + res = PQexecParams(conn, query, nParams, NULL, params, NULL, NULL, + /* + * Specify zero to obtain results in text format, + * or one to obtain results in binary format. + */ + (text_result) ? 0 : 1); + } + //on_after_exec(thread_cancel_conn); + + switch (PQresultStatus(res)) + { + case PGRES_TUPLES_OK: + case PGRES_COMMAND_OK: + case PGRES_COPY_IN: + break; + default: + if (ok_error && PQresultStatus(res) == PGRES_FATAL_ERROR) + break; + + elog(ERROR, "query failed: %squery was: %s", + PQerrorMessage(conn), query); + break; + } + + return res; +} + +PGresult * +pgut_execute(PGconn* conn, const char *query, int nParams, const char **params) +{ + return pgut_execute_extended(conn, query, nParams, params, true, false); +} + +PGresult * +pgut_execute_extended(PGconn* conn, const char *query, int nParams, + const char **params, bool text_result, bool ok_error) +{ + PGresult *res; + ExecStatusType res_status; + + if (interrupted && !in_cleanup) + elog(ERROR, "interrupted"); + + /* write query to elog if verbose */ + if (logger_config.log_level_console <= VERBOSE || + logger_config.log_level_file <= VERBOSE) + { + int i; + + if (strchr(query, '\n')) + elog(VERBOSE, "(query)\n%s", query); + else + elog(VERBOSE, "(query) %s", query); + for (i = 0; i < nParams; i++) + elog(VERBOSE, "\t(param:%d) = %s", i, params[i] ? params[i] : "(null)"); + } + + if (conn == NULL) + { + elog(ERROR, "not connected"); + return NULL; + } + + on_before_exec(conn, NULL); + if (nParams == 0) + res = PQexec(conn, query); + else + res = PQexecParams(conn, query, nParams, NULL, params, NULL, NULL, + /* + * Specify zero to obtain results in text format, + * or one to obtain results in binary format. + */ + (text_result) ? 0 : 1); + on_after_exec(NULL); + + res_status = PQresultStatus(res); + switch (res_status) + { + case PGRES_TUPLES_OK: + case PGRES_COMMAND_OK: + case PGRES_COPY_IN: + break; + default: + if (ok_error && res_status == PGRES_FATAL_ERROR) + break; + + elog(ERROR, "query failed: %squery was: %s", + PQerrorMessage(conn), query); + break; + } + + return res; +} + +bool +pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int elevel) +{ + int res; + + if (interrupted && !in_cleanup) + elog(ERROR, "interrupted"); + + /* write query to elog if verbose */ + if (logger_config.log_level_console <= VERBOSE || + logger_config.log_level_file <= VERBOSE) + { + int i; + + if (strchr(query, '\n')) + elog(VERBOSE, "(query)\n%s", query); + else + elog(VERBOSE, "(query) %s", query); + for (i = 0; i < nParams; i++) + elog(VERBOSE, "\t(param:%d) = %s", i, params[i] ? params[i] : "(null)"); + } + + if (conn == NULL) + { + elog(elevel, "not connected"); + return false; + } + + if (nParams == 0) + res = PQsendQuery(conn, query); + else + res = PQsendQueryParams(conn, query, nParams, NULL, params, NULL, NULL, 0); + + if (res != 1) + { + elog(elevel, "query failed: %squery was: %s", + PQerrorMessage(conn), query); + return false; + } + + return true; +} + +void +pgut_cancel(PGconn* conn) +{ + PGcancel *cancel_conn = PQgetCancel(conn); + char errbuf[256]; + + if (cancel_conn != NULL) + { + if (PQcancel(cancel_conn, errbuf, sizeof(errbuf))) + elog(WARNING, "Cancel request sent"); + else + elog(WARNING, "Cancel request failed"); + } + + if (cancel_conn) + PQfreeCancel(cancel_conn); +} + +int +pgut_wait(int num, PGconn *connections[], struct timeval *timeout) +{ + /* all connections are busy. wait for finish */ + while (!interrupted) + { + int i; + fd_set mask; + int maxsock; + + FD_ZERO(&mask); + + maxsock = -1; + for (i = 0; i < num; i++) + { + int sock; + + if (connections[i] == NULL) + continue; + sock = PQsocket(connections[i]); + if (sock >= 0) + { + FD_SET(sock, &mask); + if (maxsock < sock) + maxsock = sock; + } + } + + if (maxsock == -1) + { + errno = ENOENT; + return -1; + } + + i = wait_for_sockets(maxsock + 1, &mask, timeout); + if (i == 0) + break; /* timeout */ + + for (i = 0; i < num; i++) + { + if (connections[i] && FD_ISSET(PQsocket(connections[i]), &mask)) + { + PQconsumeInput(connections[i]); + if (PQisBusy(connections[i])) + continue; + return i; + } + } + } + + errno = EINTR; + return -1; +} + +#ifdef WIN32 +static CRITICAL_SECTION cancelConnLock; +#endif + +/* + * on_before_exec + * + * Set cancel_conn to point to the current database connection. + */ +static void +on_before_exec(PGconn *conn, PGcancel *thread_cancel_conn) +{ + PGcancel *old; + + if (in_cleanup) + return; /* forbid cancel during cleanup */ + +#ifdef WIN32 + EnterCriticalSection(&cancelConnLock); +#endif + + if (thread_cancel_conn) + { + //elog(WARNING, "Handle tread_cancel_conn. on_before_exec"); + old = thread_cancel_conn; + + /* be sure handle_interrupt doesn't use pointer while freeing */ + thread_cancel_conn = NULL; + + if (old != NULL) + PQfreeCancel(old); + + thread_cancel_conn = PQgetCancel(conn); + } + else + { + /* Free the old one if we have one */ + old = cancel_conn; + + /* be sure handle_interrupt doesn't use pointer while freeing */ + cancel_conn = NULL; + + if (old != NULL) + PQfreeCancel(old); + + cancel_conn = PQgetCancel(conn); + } + +#ifdef WIN32 + LeaveCriticalSection(&cancelConnLock); +#endif +} + +/* + * on_after_exec + * + * Free the current cancel connection, if any, and set to NULL. + */ +static void +on_after_exec(PGcancel *thread_cancel_conn) +{ + PGcancel *old; + + if (in_cleanup) + return; /* forbid cancel during cleanup */ + +#ifdef WIN32 + EnterCriticalSection(&cancelConnLock); +#endif + + if (thread_cancel_conn) + { + //elog(WARNING, "Handle tread_cancel_conn. on_after_exec"); + old = thread_cancel_conn; + + /* be sure handle_interrupt doesn't use pointer while freeing */ + thread_cancel_conn = NULL; + + if (old != NULL) + PQfreeCancel(old); + } + else + { + old = cancel_conn; + + /* be sure handle_interrupt doesn't use pointer while freeing */ + cancel_conn = NULL; + + if (old != NULL) + PQfreeCancel(old); + } +#ifdef WIN32 + LeaveCriticalSection(&cancelConnLock); +#endif +} + +/* + * Handle interrupt signals by cancelling the current command. + */ +static void +on_interrupt(void) +{ + int save_errno = errno; + char errbuf[256]; + + /* Set interrupted flag */ + interrupted = true; + + /* + * User prompts password, call on_cleanup() byhand. Unless we do that we will + * get stuck forever until a user enters a password. + */ + if (in_password) + { + on_cleanup(); + + pqsignal(SIGINT, oldhandler); + kill(0, SIGINT); + } + + /* Send QueryCancel if we are processing a database query */ + if (!in_cleanup && cancel_conn != NULL && + PQcancel(cancel_conn, errbuf, sizeof(errbuf))) + { + elog(WARNING, "Cancel request sent"); + } + + errno = save_errno; /* just in case the write changed it */ +} + +typedef struct pgut_atexit_item pgut_atexit_item; +struct pgut_atexit_item +{ + pgut_atexit_callback callback; + void *userdata; + pgut_atexit_item *next; +}; + +static pgut_atexit_item *pgut_atexit_stack = NULL; + +void +pgut_disconnect_callback(bool fatal, void *userdata) +{ + PGconn *conn = (PGconn *) userdata; + if (conn) + pgut_disconnect(conn); +} + +void +pgut_atexit_push(pgut_atexit_callback callback, void *userdata) +{ + pgut_atexit_item *item; + + AssertArg(callback != NULL); + + item = pgut_new(pgut_atexit_item); + item->callback = callback; + item->userdata = userdata; + item->next = pgut_atexit_stack; + + pgut_atexit_stack = item; +} + +void +pgut_atexit_pop(pgut_atexit_callback callback, void *userdata) +{ + pgut_atexit_item *item; + pgut_atexit_item **prev; + + for (item = pgut_atexit_stack, prev = &pgut_atexit_stack; + item; + prev = &item->next, item = item->next) + { + if (item->callback == callback && item->userdata == userdata) + { + *prev = item->next; + free(item); + break; + } + } +} + +static void +call_atexit_callbacks(bool fatal) +{ + pgut_atexit_item *item; + pgut_atexit_item *next; + for (item = pgut_atexit_stack; item; item = next){ + next = item->next; + item->callback(fatal, item->userdata); + } +} + +static void +on_cleanup(void) +{ + in_cleanup = true; + interrupted = false; + call_atexit_callbacks(false); +} + +void * +pgut_malloc(size_t size) +{ + char *ret; + + if ((ret = (char *)malloc(size)) == NULL) + elog(ERROR, "could not allocate memory (%lu bytes): %s", + (unsigned long) size, strerror(errno)); + return ret; +} + +void * +pgut_realloc(void *p, size_t size) +{ + char *ret; + + if ((ret = (char *)realloc(p, size)) == NULL) + elog(ERROR, "could not re-allocate memory (%lu bytes): %s", + (unsigned long) size, strerror(errno)); + return ret; +} + +char * +pgut_strdup(const char *str) +{ + char *ret; + + if (str == NULL) + return NULL; + + if ((ret = strdup(str)) == NULL) + elog(ERROR, "could not duplicate string \"%s\": %s", + str, strerror(errno)); + return ret; +} + +FILE * +pgut_fopen(const char *path, const char *mode, bool missing_ok) +{ + FILE *fp; + + if ((fp = fio_open_stream(path, FIO_BACKUP_HOST)) == NULL) + { + if (missing_ok && errno == ENOENT) + return NULL; + + elog(ERROR, "could not open file \"%s\": %s", + path, strerror(errno)); + } + + return fp; +} + +#ifdef WIN32 +static int select_win32(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout); +#define select select_win32 +#endif + +int +wait_for_socket(int sock, struct timeval *timeout) +{ + fd_set fds; + + FD_ZERO(&fds); + FD_SET(sock, &fds); + return wait_for_sockets(sock + 1, &fds, timeout); +} + +int +wait_for_sockets(int nfds, fd_set *fds, struct timeval *timeout) +{ + int i; + + for (;;) + { + i = select(nfds, fds, NULL, NULL, timeout); + if (i < 0) + { + if (interrupted) + elog(ERROR, "interrupted"); + else if (errno != EINTR) + elog(ERROR, "select failed: %s", strerror(errno)); + } + else + return i; + } +} + +#ifndef WIN32 +static void +handle_interrupt(SIGNAL_ARGS) +{ + on_interrupt(); +} + +/* Handle various inrerruptions in the same way */ +static void +init_cancel_handler(void) +{ + oldhandler = pqsignal(SIGINT, handle_interrupt); + pqsignal(SIGQUIT, handle_interrupt); + pqsignal(SIGTERM, handle_interrupt); +} +#else /* WIN32 */ + +/* + * Console control handler for Win32. Note that the control handler will + * execute on a *different thread* than the main one, so we need to do + * proper locking around those structures. + */ +static BOOL WINAPI +consoleHandler(DWORD dwCtrlType) +{ + if (dwCtrlType == CTRL_C_EVENT || + dwCtrlType == CTRL_BREAK_EVENT) + { + EnterCriticalSection(&cancelConnLock); + on_interrupt(); + LeaveCriticalSection(&cancelConnLock); + return TRUE; + } + else + /* Return FALSE for any signals not being handled */ + return FALSE; +} + +static void +init_cancel_handler(void) +{ + InitializeCriticalSection(&cancelConnLock); + + SetConsoleCtrlHandler(consoleHandler, TRUE); +} + +int +sleep(unsigned int seconds) +{ + Sleep(seconds * 1000); + return 0; +} + +int +usleep(unsigned int usec) +{ + Sleep((usec + 999) / 1000); /* rounded up */ + return 0; +} + +#undef select +static int +select_win32(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout) +{ + struct timeval remain; + + if (timeout != NULL) + remain = *timeout; + else + { + remain.tv_usec = 0; + remain.tv_sec = LONG_MAX; /* infinite */ + } + + /* sleep only one second because Ctrl+C doesn't interrupt select. */ + while (remain.tv_sec > 0 || remain.tv_usec > 0) + { + int ret; + struct timeval onesec; + + if (remain.tv_sec > 0) + { + onesec.tv_sec = 1; + onesec.tv_usec = 0; + remain.tv_sec -= 1; + } + else + { + onesec.tv_sec = 0; + onesec.tv_usec = remain.tv_usec; + remain.tv_usec = 0; + } + + ret = select(nfds, readfds, writefds, exceptfds, &onesec); + if (ret != 0) + { + /* succeeded or error */ + return ret; + } + else if (interrupted) + { + errno = EINTR; + return 0; + } + } + + return 0; /* timeout */ +} + +#endif /* WIN32 */ + +void +discard_response(PGconn *conn) +{ + PGresult *res; + + do + { + res = PQgetResult(conn); + if (res) + PQclear(res); + } while (res); +} + +/* + * pgfnames + * + * return a list of the names of objects in the argument directory. Caller + * must call pgfnames_cleanup later to free the memory allocated by this + * function. + */ +char ** +pgut_pgfnames(const char *path, bool strict) +{ + DIR *dir; + struct dirent *file; + char **filenames; + int numnames = 0; + int fnsize = 200; /* enough for many small dbs */ + + dir = opendir(path); + if (dir == NULL) + { + elog(strict ? ERROR : WARNING, "could not open directory \"%s\": %m", path); + return NULL; + } + + filenames = (char **) palloc(fnsize * sizeof(char *)); + + while (errno = 0, (file = readdir(dir)) != NULL) + { + if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0) + { + if (numnames + 1 >= fnsize) + { + fnsize *= 2; + filenames = (char **) gs_repalloc(filenames, + fnsize * sizeof(char *)); + } + filenames[numnames++] = gs_pstrdup(file->d_name); + } + } + + if (errno) + { + elog(strict ? ERROR : WARNING, "could not read directory \"%s\": %m", path); + return NULL; + } + + filenames[numnames] = NULL; + + if (closedir(dir)) + { + elog(strict ? ERROR : WARNING, "could not close directory \"%s\": %m", path); + return NULL; + } + + return filenames; +} + +/* + * pgfnames_cleanup + * + * deallocate memory used for filenames + */ +void +pgut_pgfnames_cleanup(char **filenames) +{ + char **fn; + + for (fn = filenames; *fn; fn++) + pfree(*fn); + + pfree(filenames); +} + +/* Shamelessly stolen from commom/rmtree.c */ +bool +pgut_rmtree(const char *path, bool rmtopdir, bool strict) +{ + bool result = true; + char pathbuf[MAXPGPATH]; + char **filenames; + char **filename; + struct stat statbuf; + + /* + * we copy all the names out of the directory before we start modifying + * it. + */ + filenames = pgut_pgfnames(path, strict); + + if (filenames == NULL) + return false; + + /* now we have the names we can start removing things */ + for (filename = filenames; *filename; filename++) + { + snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename); + + if (lstat(pathbuf, &statbuf) != 0) + { + elog(strict ? ERROR : WARNING, "could not stat file or directory \"%s\": %m", pathbuf); + result = false; + break; + } + + if (S_ISDIR(statbuf.st_mode)) + { + /* call ourselves recursively for a directory */ + if (!pgut_rmtree(pathbuf, true, strict)) + { + result = false; + break; + } + } + else + { + if (unlink(pathbuf) != 0) + { + elog(strict ? ERROR : WARNING, "could not remove file or directory \"%s\": %m", pathbuf); + result = false; + break; + } + } + } + + if (rmtopdir) + { + if (rmdir(path) != 0) + { + elog(strict ? ERROR : WARNING, "could not remove file or directory \"%s\": %m", path); + result = false; + } + } + + pgut_pgfnames_cleanup(filenames); + + return result; +} diff --git a/src/bin/pg_probackup/pgut.h b/src/bin/pg_probackup/pgut.h new file mode 100644 index 000000000..eddc20d82 --- /dev/null +++ b/src/bin/pg_probackup/pgut.h @@ -0,0 +1,107 @@ +/*------------------------------------------------------------------------- + * + * pgut.h + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef PGUT_H +#define PGUT_H + +#include "postgres_fe.h" +#include "libpq/libpq-fe.h" + +typedef void (*pgut_atexit_callback)(bool fatal, void *userdata); + +extern void pgut_help(bool details); + +/* + * pgut framework variables and functions + */ +extern bool prompt_password; +extern bool force_password; + +extern bool interrupted; +extern bool in_cleanup; +extern bool in_password; /* User prompts password */ + +extern void pgut_atexit_push(pgut_atexit_callback callback, void *userdata); +extern void pgut_atexit_pop(pgut_atexit_callback callback, void *userdata); + +extern void pgut_init(void); + +/* + * Database connections + */ +extern PGconn *pgut_connect(const char *host, const char *port, + const char *dbname, const char *username); +extern PGconn *pgut_connect_replication(const char *host, const char *port, + const char *dbname, + const char *username); +extern void pgut_disconnect(PGconn *conn); +extern void pgut_disconnect_callback(bool fatal, void *userdata); +extern PGresult *pgut_execute(PGconn* conn, const char *query, int nParams, + const char **params); +extern PGresult *pgut_execute_extended(PGconn* conn, const char *query, int nParams, + const char **params, bool text_result, bool ok_error); +extern PGresult *pgut_execute_parallel(PGconn* conn, PGcancel* thread_cancel_conn, + const char *query, int nParams, + const char **params, bool text_result, bool ok_error, bool async); +extern bool pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int elevel); +extern void pgut_cancel(PGconn* conn); +extern int pgut_wait(int num, PGconn *connections[], struct timeval *timeout); + +/* + * memory allocators + */ +extern void *pgut_malloc(size_t size); +extern void *pgut_realloc(void *p, size_t size); +extern char *pgut_strdup(const char *str); + +#define pgut_new(type) ((type *) pgut_malloc(sizeof(type))) +#define pgut_newarray(type, n) ((type *) pgut_malloc(sizeof(type) * (n))) + +/* + * file operations + */ +extern FILE *pgut_fopen(const char *path, const char *mode, bool missing_ok); + +/* + * Assert + */ +#undef Assert +#undef AssertArg +#undef AssertMacro + +#ifdef USE_ASSERT_CHECKING +#define Assert(x) assert(x) +#define AssertArg(x) assert(x) +#define AssertMacro(x) assert(x) +#else +#define Assert(x) ((void) 0) +#define AssertArg(x) ((void) 0) +#define AssertMacro(x) ((void) 0) +#endif + +#define IsSpace(c) (isspace((unsigned char)(c))) +#define IsAlpha(c) (isalpha((unsigned char)(c))) +#define IsAlnum(c) (isalnum((unsigned char)(c))) +#define ToLower(c) (tolower((unsigned char)(c))) +#define ToUpper(c) (toupper((unsigned char)(c))) + +/* + * socket operations + */ +extern int wait_for_socket(int sock, struct timeval *timeout); +extern int wait_for_sockets(int nfds, fd_set *fds, struct timeval *timeout); + +#ifdef WIN32 +extern int sleep(unsigned int seconds); +extern int usleep(unsigned int usec); +#endif + +#endif /* PGUT_H */ diff --git a/src/bin/pg_probackup/psprintf.cpp b/src/bin/pg_probackup/psprintf.cpp new file mode 100644 index 000000000..7e08f27fe --- /dev/null +++ b/src/bin/pg_probackup/psprintf.cpp @@ -0,0 +1,198 @@ +/*------------------------------------------------------------------------- + * + * psprintf.c + * sprintf into an allocated-on-demand buffer + * + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/psprintf.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND + +#include "postgres.h" + +#include "utils/memutils.h" + +#else + +#include "postgres_fe.h" +#include "common/fe_memutils.h" + +/* It's possible we could use a different value for this in frontend code */ +#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */ + +#endif + +size_t pvsnprintf(char *buf, size_t len, const char *fmt, va_list args); + +/* + * psprintf + * + * Format text data under the control of fmt (an sprintf-style format string) + * and return it in an allocated-on-demand buffer. The buffer is allocated + * with palloc in the backend, or malloc in frontend builds. Caller is + * responsible to free the buffer when no longer needed, if appropriate. + * + * Errors are not returned to the caller, but are reported via elog(ERROR) + * in the backend, or printf-to-stderr-and-exit() in frontend builds. + * One should therefore think twice about using this in libpq. + */ +char * +psprintf(const char *fmt,...) +{ + int save_errno = errno; + size_t len = 128; /* initial assumption about buffer size */ + + for (;;) + { + char *result; + va_list args; + size_t newlen; + + /* + * Allocate result buffer. Note that in frontend this maps to malloc + * with exit-on-error. + */ + result = (char *) palloc(len); + + /* Try to format the data. */ + errno = save_errno; + va_start(args, fmt); + newlen = pvsnprintf(result, len, fmt, args); + va_end(args); + + if (newlen < len) + return result; /* success */ + + /* Release buffer and loop around to try again with larger len. */ + pfree(result); + len = newlen; + } +} + +/* + * pvsnprintf + * + * Attempt to format text data under the control of fmt (an sprintf-style + * format string) and insert it into buf (which has length len). + * + * If successful, return the number of bytes emitted, not counting the + * trailing zero byte. This will always be strictly less than len. + * + * If there's not enough space in buf, return an estimate of the buffer size + * needed to succeed (this *must* be more than the given len, else callers + * might loop infinitely). + * + * Other error cases do not return, but exit via elog(ERROR) or exit(). + * Hence, this shouldn't be used inside libpq. + * + * Caution: callers must be sure to preserve their entry-time errno + * when looping, in case the fmt contains "%m". + * + * Note that the semantics of the return value are not exactly C99's. + * First, we don't promise that the estimated buffer size is exactly right; + * callers must be prepared to loop multiple times to get the right size. + * (Given a C99-compliant vsnprintf, that won't happen, but it is rumored + * that some implementations don't always return the same value ...) + * Second, we return the recommended buffer size, not one less than that; + * this lets overflow concerns be handled here rather than in the callers. + */ +size_t +pvsnprintf(char *buf, size_t len, const char *fmt, va_list args) +{ + int nprinted; + + nprinted = vsnprintf(buf, len, fmt, args); + + /* We assume failure means the fmt is bogus, hence hard failure is OK */ + if (unlikely(nprinted < 0)) + { +#ifndef FRONTEND + elog(ERROR, "vsnprintf failed: %m with format string \"%s\"", fmt); +#else + fprintf(stderr, "vsnprintf failed: %s with format string \"%s\"\n", + strerror(errno), fmt); + exit(EXIT_FAILURE); +#endif + } + + if ((size_t) nprinted < len) + { + /* Success. Note nprinted does not include trailing null. */ + return (size_t) nprinted; + } + + /* + * We assume a C99-compliant vsnprintf, so believe its estimate of the + * required space, and add one for the trailing null. (If it's wrong, the + * logic will still work, but we may loop multiple times.) + * + * Choke if the required space would exceed MaxAllocSize. Note we use + * this palloc-oriented overflow limit even when in frontend. + */ + if (unlikely((size_t) nprinted > MaxAllocSize - 1)) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("out of memory"))); +#else + fprintf(stderr, _("out of memory\n")); + exit(EXIT_FAILURE); +#endif + } + + return nprinted + 1; +} + +static inline void * +gs_malloc_internal(size_t size, int flags) +{ + void *tmp; + + /* Avoid unportable behavior of malloc(0) */ + if (size == 0) + size = 1; + tmp = malloc(size); + if (tmp == NULL) + { + if ((flags & MCXT_ALLOC_NO_OOM) == 0) + { + fprintf(stderr, _("out of memory\n")); + exit(EXIT_FAILURE); + } + return NULL; + } + + if ((flags & MCXT_ALLOC_ZERO) != 0) + MemSet(tmp, 0, size); + return tmp; +} + + +void * +gs_palloc0(Size size) +{ + return gs_malloc_internal(size, MCXT_ALLOC_ZERO); +} + +char * +gs_pstrdup(const char *in) +{ + return pg_strdup(in); +} + +void * +gs_repalloc(void *pointer, Size size) +{ + return pg_realloc(pointer, size); +} + diff --git a/src/bin/pg_probackup/ptrack.cpp b/src/bin/pg_probackup/ptrack.cpp new file mode 100644 index 000000000..8766885eb --- /dev/null +++ b/src/bin/pg_probackup/ptrack.cpp @@ -0,0 +1,198 @@ +/*------------------------------------------------------------------------- + * + * ptrack.c: support functions for ptrack backups + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Copyright (c) 2019 Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#if PG_VERSION_NUM < 110000 +#include "catalog/catalog.h" +#endif +#include "catalog/pg_tablespace.h" +#include "common/fe_memutils.h" +/* + * Macro needed to parse ptrack. + * NOTE Keep those values synchronized with definitions in ptrack.h + */ +#define PTRACK_BITS_PER_HEAPBLOCK 1 +#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / PTRACK_BITS_PER_HEAPBLOCK) + +/* + * Get lsn of the moment when ptrack was enabled the last time. + */ +XLogRecPtr +get_last_ptrack_lsn(PGconn *backup_conn, PGNodeInfo *nodeInfo) +{ + PGresult *res; + uint32 lsn_hi; + uint32 lsn_lo; + int ret; + XLogRecPtr lsn; + + res = pgut_execute(backup_conn, "SELECT pg_cbm_tracked_location()", 0, NULL); + + ret = sscanf_s(pg_strdup(PQgetvalue(res, 0, 0)), "%X/%X", &lsn_hi, &lsn_lo); + securec_check_for_sscanf_s(ret, 2, "\0", "\0"); + + /* Calculate LSN */ + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; + + PQclear(res); + return lsn; +} + +/* + * Fetch a list of changed files with their ptrack maps. + */ +parray * +pg_ptrack_get_pagemapset(PGconn *backup_conn, XLogRecPtr lsn) +{ + PGresult *res; + char start_lsn[17 + 1]; + char *params[2]; + char *saved = NULL; + char *blocknum_str = NULL; + parray *pagemapset = NULL; + int i; + int ret = 0; + int blkcnt = 0; + BlockNumber blknum = 0; + datapagemap_t pagemap; + + snprintf(start_lsn, sizeof(start_lsn), "%X/%X", (uint32) (lsn >> 32), (uint32) lsn); + params[0] = gs_pstrdup(start_lsn); + + res = pgut_execute(backup_conn, "CHECKPOINT;", 0, NULL); + PQclear(res); + + res = pgut_execute(backup_conn, "SELECT pg_cbm_tracked_location()", 0, NULL); + if (PQnfields(res) != 1) { + elog(ERROR, "cannot get cbm tracked lsn location"); + } + params[1] = gs_pstrdup(PQgetvalue(res, 0, 0)); + PQclear(res); + + res = pgut_execute(backup_conn, + "SELECT path,changed_block_number,changed_block_list " + "FROM pg_cbm_get_changed_block($1, $2)", + 2, (const char **) params); + pfree(params[0]); + pfree(params[1]); + + if (PQnfields(res) == 0) + elog(ERROR, "cannot get ptrack pagemapset"); + + /* Initialize bitmap */ + pagemap.bitmap = NULL; + pagemap.bitmapsize = 0; + + /* Construct database map */ + for (i = 0; i < PQntuples(res); i++) + { + page_map_entry *pm_entry = (page_map_entry *) pgut_malloc(sizeof(page_map_entry)); + + /* get path */ + pm_entry->path = pgut_strdup(PQgetvalue(res, i, 0)); + + ret = sscanf_s(PQgetvalue(res, i, 1), "%u", &blkcnt); + securec_check_for_sscanf_s(ret, 1, "\0", "\0"); + + if (blkcnt == 1) { + ret = sscanf_s(PQgetvalue(res, i, 2), "%u", &blknum); + securec_check_for_sscanf_s(ret, 1, "\0", "\0"); + datapagemap_add(&pagemap, blknum % ((BlockNumber) RELSEG_SIZE)); + } else { + blocknum_str = strtok_r(PQgetvalue(res, i, 2), ", ", &saved); + while (blocknum_str != NULL) { + ret = sscanf_s(blocknum_str, "%u", &blknum); + securec_check_for_sscanf_s(ret, 1, "\0", "\0"); + datapagemap_add(&pagemap, blknum % ((BlockNumber) RELSEG_SIZE)); + blocknum_str = strtok_r(NULL, ", ", &saved); + } + } + + pm_entry->pagemap = (char *)pagemap.bitmap; + pm_entry->pagemapsize = pagemap.bitmapsize; + + pagemap.bitmap = NULL; + pagemap.bitmapsize = 0; + + if (pagemapset == NULL) + pagemapset = parray_new(); + + parray_append(pagemapset, pm_entry); + } + + PQclear(res); + + return pagemapset; +} + +/* + * Given a list of files in the instance to backup, build a pagemap for each + * data file that has ptrack. Result is saved in the pagemap field of pgFile. + * + * We fetch a list of changed files with their ptrack maps. After that files + * are merged with their bitmaps. File without bitmap is treated as unchanged. + */ +void +make_pagemap_from_ptrack(parray *files, + PGconn *backup_conn, + XLogRecPtr lsn) +{ + parray *filemaps; + int file_i = 0; + page_map_entry *dummy_map = NULL; + + /* Receive all available ptrack bitmaps at once */ + filemaps = pg_ptrack_get_pagemapset(backup_conn, lsn); + + if (filemaps != NULL) + parray_qsort(filemaps, pgFileMapComparePath); + else + return; + + dummy_map = (page_map_entry *) pgut_malloc(sizeof(page_map_entry)); + + /* Iterate over files and look for corresponding pagemap if any */ + for (file_i = 0; file_i < parray_num(files); file_i++) + { + pgFile *file = (pgFile *) parray_get(files, file_i); + page_map_entry **res_map = NULL; + page_map_entry *map = NULL; + + /* + * For now nondata files are not entitled to have pagemap + * TODO It's possible to use ptrack for incremental backup of + * relation forks. Not implemented yet. + */ + if (!file->is_datafile || file->is_cfs) + continue; + + /* Consider only files from PGDATA (this check is probably redundant) */ + if (file->external_dir_num != 0) + continue; + + if (filemaps) + { + dummy_map->path = file->rel_path; + res_map = (page_map_entry **)parray_bsearch(filemaps, dummy_map, pgFileMapComparePath); + map = (res_map) ? *res_map : NULL; + } + + /* Found map */ + if (map) + { + elog(VERBOSE, "Using ptrack pagemap for file \"%s\"", file->rel_path); + file->pagemap.bitmapsize = map->pagemapsize; + file->pagemap.bitmap = (unsigned char *)map->pagemap; + } + } + + free(dummy_map); +} diff --git a/src/bin/pg_probackup/remote.cpp b/src/bin/pg_probackup/remote.cpp new file mode 100644 index 000000000..8587386ae --- /dev/null +++ b/src/bin/pg_probackup/remote.cpp @@ -0,0 +1,261 @@ +/*------------------------------------------------------------------------- + * + * remote.c + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include +#include + +#ifdef WIN32 +#define __thread __declspec(thread) +#else +#include +#endif + +#include "pg_probackup.h" +#include "file.h" +#include "common/fe_memutils.h" + +#define MAX_CMDLINE_LENGTH 4096 +#define MAX_CMDLINE_OPTIONS 256 +#define ERR_BUF_SIZE 4096 +#define PIPE_SIZE (64*1024) + +static int split_options(int argc, char* argv[], int max_options, char* options) +{ + char* opt = options; + char in_quote = '\0'; + while (true) { + switch (*opt) { + case '\'': + case '\"': + if (!in_quote) { + in_quote = *opt++; + continue; + } + if (*opt == in_quote && *++opt != in_quote) { + in_quote = '\0'; + continue; + } + break; + case '\0': + if (opt != options) { + argv[argc++] = options; + if (argc >= max_options) + elog(ERROR, "Too much options"); + } + return argc; + case ' ': + argv[argc++] = options; + if (argc >= max_options) + elog(ERROR, "Too much options"); + *opt++ = '\0'; + options = opt; + continue; + default: + break; + } + opt += 1; + } + return argc; +} + +static __thread int child_pid; + +#if 0 +static void kill_child(void) +{ + kill(child_pid, SIGTERM); +} +#endif + + +void wait_ssh(void) +{ +/* + * We need to wait termination of SSH process to eliminate zombies. + * There is no waitpid() function at Windows but there are no zombie processes caused by lack of wait/waitpid. + * So just disable waitpid for Windows. + */ +#ifndef WIN32 + int status; + waitpid(child_pid, &status, 0); + elog(LOG, "SSH process %d is terminated with status %d", child_pid, status); +#endif +} + +#ifdef WIN32 +void launch_ssh(char* argv[]) +{ + int infd = atoi(argv[2]); + int outfd = atoi(argv[3]); + + SYS_CHECK(close(STDIN_FILENO)); + SYS_CHECK(close(STDOUT_FILENO)); + + SYS_CHECK(dup2(infd, STDIN_FILENO)); + SYS_CHECK(dup2(outfd, STDOUT_FILENO)); + + SYS_CHECK(execvp(argv[4], argv+4)); +} +#endif + +static bool needs_quotes(char const* path) +{ + return strchr(path, ' ') != NULL; +} + +bool launch_agent(void) +{ + char cmd[MAX_CMDLINE_LENGTH]; + char* ssh_argv[MAX_CMDLINE_OPTIONS]; + int ssh_argc; + int outfd[2]; + int infd[2]; + int errfd[2]; + int agent_version; + + ssh_argc = 0; +#ifdef WIN32 + ssh_argv[ssh_argc++] = PROGRAM_NAME_FULL; + ssh_argv[ssh_argc++] = "ssh"; + ssh_argc += 2; /* reserve space for pipe descriptors */ +#endif + ssh_argv[ssh_argc++] = instance_config.remote.proto; + if (instance_config.remote.port != NULL) { + ssh_argv[ssh_argc++] = (char *)"-p"; + ssh_argv[ssh_argc++] = instance_config.remote.port; + } + if (instance_config.remote.user != NULL) { + ssh_argv[ssh_argc++] = (char *)"-l"; + ssh_argv[ssh_argc++] = instance_config.remote.user; + } + if (instance_config.remote.ssh_config != NULL) { + ssh_argv[ssh_argc++] = (char *)"-F"; + ssh_argv[ssh_argc++] = instance_config.remote.ssh_config; + } + if (instance_config.remote.ssh_options != NULL) { + ssh_argc = split_options(ssh_argc, ssh_argv, MAX_CMDLINE_OPTIONS, pg_strdup(instance_config.remote.ssh_options)); + } + + ssh_argv[ssh_argc++] = (char *)"-o"; + ssh_argv[ssh_argc++] = (char *)"PasswordAuthentication=no"; + + ssh_argv[ssh_argc++] = (char *)"-o"; + ssh_argv[ssh_argc++] = (char *)"Compression=no"; + + ssh_argv[ssh_argc++] = (char *)"-o"; + ssh_argv[ssh_argc++] = (char *)"LogLevel=error"; + + ssh_argv[ssh_argc++] = instance_config.remote.host; + ssh_argv[ssh_argc++] = cmd; + ssh_argv[ssh_argc] = NULL; + + if (instance_config.remote.path) + { + char const* probackup = PROGRAM_NAME_FULL; + char* sep = (char *)strrchr(probackup, '/'); + if (sep != NULL) { + probackup = sep + 1; + } +#ifdef WIN32 + else { + sep = strrchr(probackup, '\\'); + if (sep != NULL) { + probackup = sep + 1; + } + } + if (needs_quotes(instance_config.remote.path) || needs_quotes(PROGRAM_NAME_FULL)) + snprintf(cmd, sizeof(cmd), "\"%s\\%s\" agent", + instance_config.remote.path, probackup); + else + snprintf(cmd, sizeof(cmd), "%s\\%s agent", + instance_config.remote.path, probackup); +#else + if (needs_quotes(instance_config.remote.path) || needs_quotes(PROGRAM_NAME_FULL)) + snprintf(cmd, sizeof(cmd), "\"%s/%s\" agent", + instance_config.remote.path, probackup); + else + snprintf(cmd, sizeof(cmd), "%s/%s agent", + instance_config.remote.path, probackup); +#endif + } else { + if (needs_quotes(PROGRAM_NAME_FULL)) + snprintf(cmd, sizeof(cmd), "\"%s\" agent", PROGRAM_NAME_FULL); + else + snprintf(cmd, sizeof(cmd), "%s agent", PROGRAM_NAME_FULL); + } + +#ifdef WIN32 + SYS_CHECK(_pipe(infd, PIPE_SIZE, _O_BINARY)) ; + SYS_CHECK(_pipe(outfd, PIPE_SIZE, _O_BINARY)); + ssh_argv[2] = psprintf("%d", outfd[0]); + ssh_argv[3] = psprintf("%d", infd[1]); + { + intptr_t pid = _spawnvp(_P_NOWAIT, ssh_argv[0], ssh_argv); + if (pid < 0) + return false; + child_pid = GetProcessId((HANDLE)pid); +#else + SYS_CHECK(pipe(infd)); + SYS_CHECK(pipe(outfd)); + SYS_CHECK(pipe(errfd)); + + SYS_CHECK(child_pid = fork()); + + if (child_pid == 0) { /* child */ + SYS_CHECK(close(STDIN_FILENO)); + SYS_CHECK(close(STDOUT_FILENO)); + SYS_CHECK(close(STDERR_FILENO)); + + SYS_CHECK(dup2(outfd[0], STDIN_FILENO)); + SYS_CHECK(dup2(infd[1], STDOUT_FILENO)); + SYS_CHECK(dup2(errfd[1], STDERR_FILENO)); + + SYS_CHECK(close(infd[0])); + SYS_CHECK(close(infd[1])); + SYS_CHECK(close(outfd[0])); + SYS_CHECK(close(outfd[1])); + SYS_CHECK(close(errfd[0])); + SYS_CHECK(close(errfd[1])); + + if (execvp(ssh_argv[0], ssh_argv) < 0) + return false; + } else { +#endif + elog(LOG, "Start SSH client process, pid %d", child_pid); + SYS_CHECK(close(infd[1])); /* These are being used by the child */ + SYS_CHECK(close(outfd[0])); + SYS_CHECK(close(errfd[1])); + /*atexit(kill_child);*/ + + fio_redirect(infd[0], outfd[1], errfd[0]); /* write to stdout */ + } + + /* Make sure that remote agent has the same version + * TODO: we must also check PG version and fork edition + */ + agent_version = fio_get_agent_version(); + if (agent_version != AGENT_PROTOCOL_VERSION) + { + char agent_version_str[1024]; + sprintf(agent_version_str, "%d.%d.%d", + agent_version / 10000, + (agent_version / 100) % 100, + agent_version % 100); + + elog(ERROR, "Remote agent version %s does not match local program version %s", + agent_version_str, PROGRAM_VERSION); + } + + return true; +} diff --git a/src/bin/pg_probackup/remote.h b/src/bin/pg_probackup/remote.h new file mode 100644 index 000000000..dc98644ab --- /dev/null +++ b/src/bin/pg_probackup/remote.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * remote.h: - prototypes of remote functions. + * + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef REMOTE_H +#define REMOTE_H + +typedef struct RemoteConfig +{ + char* proto; + char* host; + char* port; + char* path; + char* user; + char *ssh_config; + char *ssh_options; +} RemoteConfig; + +#endif diff --git a/src/bin/pg_probackup/restore.cpp b/src/bin/pg_probackup/restore.cpp new file mode 100644 index 000000000..c4a5ba691 --- /dev/null +++ b/src/bin/pg_probackup/restore.cpp @@ -0,0 +1,1826 @@ +/*------------------------------------------------------------------------- + * + * restore.c: restore DB cluster and archived WAL. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +//#include "access/timeline.h" + +#include +#include + +#include "thread.h" +#include "common/fe_memutils.h" + +typedef struct +{ + parray *pgdata_files; + parray *dest_files; + pgBackup *dest_backup; + parray *dest_external_dirs; + parray *parent_chain; + bool skip_external_dirs; + const char *to_root; + size_t restored_bytes; + bool use_bitmap; + IncrRestoreMode incremental_mode; + XLogRecPtr shift_lsn; /* used only in LSN incremental_mode */ + + /* + * Return value from the thread. + * 0 means there is no error, 1 - there is an error. + */ + int ret; +} restore_files_arg; + +static void create_recovery_conf(time_t backup_id, + pgRecoveryTarget *rt, + pgBackup *backup, + pgRestoreParams *params); +static void *restore_files(void *arg); +static void set_orphan_status(parray *backups, pgBackup *parent_backup); +static void pg12_recovery_config(pgBackup *backup, bool add_include); + +static void restore_chain(pgBackup *dest_backup, parray *parent_chain, + pgRestoreParams *params, + const char *pgdata_path, bool no_sync); +static void check_incremental_compatibility(const char *pgdata, uint64 system_identifier, + IncrRestoreMode incremental_mode); + +/* + * Iterate over backup list to find all ancestors of the broken parent_backup + * and update their status to BACKUP_STATUS_ORPHAN + */ +static void +set_orphan_status(parray *backups, pgBackup *parent_backup) +{ + /* chain is intact, but at least one parent is invalid */ + char *parent_backup_id; + int j; + + /* parent_backup_id is a human-readable backup ID */ + parent_backup_id = base36enc_dup(parent_backup->start_time); + + for (j = 0; j < parray_num(backups); j++) + { + + pgBackup *backup = (pgBackup *) parray_get(backups, j); + + if (is_parent(parent_backup->start_time, backup, false)) + { + if (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(backup, BACKUP_STATUS_ORPHAN, instance_name, true); + + elog(WARNING, + "Backup %s is orphaned because his parent %s has status: %s", + base36enc(backup->start_time), + parent_backup_id, + status2str(parent_backup->status)); + } + else + { + elog(WARNING, "Backup %s has parent %s with status: %s", + base36enc(backup->start_time), parent_backup_id, + status2str(parent_backup->status)); + } + } + } + pg_free(parent_backup_id); +} + +/* + * Entry point of pg_probackup RESTORE and VALIDATE subcommands. + */ +int +do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, + pgRestoreParams *params, bool no_sync) +{ + int i = 0; + int j = 0; + parray *backups = NULL; + pgBackup *tmp_backup = NULL; + pgBackup *current_backup = NULL; + pgBackup *dest_backup = NULL; + pgBackup *base_full_backup = NULL; + pgBackup *corrupted_backup = NULL; + char *action = (char *)(params->is_restore ? "Restore":"Validate"); + parray *parent_chain = NULL; + bool pgdata_is_empty = true; + bool tblspaces_are_empty = true; + XLogRecPtr shift_lsn = InvalidXLogRecPtr; + + if (params->is_restore) + { + if (instance_config.pgdata == NULL) + elog(ERROR, + "required parameter not specified: PGDATA (-D, --pgdata)"); + /* Check if restore destination empty */ + if (!dir_is_empty(instance_config.pgdata, FIO_DB_HOST)) + { + /* Check that remote system is NOT running and systemd id is the same as ours */ + if (params->incremental_mode != INCR_NONE) + { + elog(INFO, "Running incremental restore into nonempty directory: \"%s\"", + instance_config.pgdata); + + check_incremental_compatibility(instance_config.pgdata, + instance_config.system_identifier, + params->incremental_mode); + } + else + elog(ERROR, "Restore destination is not empty: \"%s\"", + instance_config.pgdata); + + /* if destination directory is empty, then incremental restore may be disabled */ + pgdata_is_empty = false; + } + } + + if (instance_name == NULL) + elog(ERROR, "required parameter not specified: --instance"); + + elog(LOG, "%s begin.", action); + + /* Get list of all backups sorted in order of descending start time */ + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + /* Find backup range we should restore or validate. */ + while ((i < parray_num(backups)) && !dest_backup) + { + current_backup = (pgBackup *) parray_get(backups, i); + i++; + + /* Skip all backups which started after target backup */ + if (target_backup_id && current_backup->start_time > target_backup_id) + continue; + + /* + * [PGPRO-1164] If BACKUP_ID is not provided for restore command, + * we must find the first valid(!) backup. + + * If target_backup_id is not provided, we can be sure that + * PITR for restore or validate is requested. + * So we can assume that user is more interested in recovery to specific point + * in time and NOT interested in revalidation of invalid backups. + * So based on that assumptions we should choose only OK and DONE backups + * as candidates for validate and restore. + */ + + if (target_backup_id == INVALID_BACKUP_ID && + (current_backup->status != BACKUP_STATUS_OK && + current_backup->status != BACKUP_STATUS_DONE)) + { + elog(WARNING, "Skipping backup %s, because it has non-valid status: %s", + base36enc(current_backup->start_time), status2str(current_backup->status)); + continue; + } + + /* + * We found target backup. Check its status and + * ensure that it satisfies recovery target. + */ + if ((target_backup_id == current_backup->start_time + || target_backup_id == INVALID_BACKUP_ID)) + { + + /* backup is not ok, + * but in case of CORRUPT or ORPHAN revalidation is possible + * unless --no-validate is used, + * in other cases throw an error. + */ + // 1. validate + // 2. validate -i INVALID_ID <- allowed revalidate + // 3. restore -i INVALID_ID <- allowed revalidate and restore + // 4. restore <- impossible + // 5. restore --no-validate <- forbidden + if (current_backup->status != BACKUP_STATUS_OK && + current_backup->status != BACKUP_STATUS_DONE) + { + if ((current_backup->status == BACKUP_STATUS_ORPHAN || + current_backup->status == BACKUP_STATUS_CORRUPT || + current_backup->status == BACKUP_STATUS_RUNNING) + && (!params->no_validate || params->force)) + elog(WARNING, "Backup %s has status: %s", + base36enc(current_backup->start_time), status2str(current_backup->status)); + else + elog(ERROR, "Backup %s has status: %s", + base36enc(current_backup->start_time), status2str(current_backup->status)); + } + + if (rt->target_tli) + { + parray *timelines; + + // elog(LOG, "target timeline ID = %u", rt->target_tli); + /* Read timeline history files from archives */ + timelines = read_timeline_history(arclog_path, rt->target_tli, true); + + if (!satisfy_timeline(timelines, current_backup)) + { + if (target_backup_id != INVALID_BACKUP_ID) + elog(ERROR, "target backup %s does not satisfy target timeline", + base36enc(target_backup_id)); + else + /* Try to find another backup that satisfies target timeline */ + continue; + } + + parray_walk(timelines, pfree); + parray_free(timelines); + } + + if (!satisfy_recovery_target(current_backup, rt)) + { + if (target_backup_id != INVALID_BACKUP_ID) + elog(ERROR, "Requested backup %s does not satisfy restore options", + base36enc(target_backup_id)); + else + /* Try to find another backup that satisfies target options */ + continue; + } + + /* + * Backup is fine and satisfies all recovery options. + * Save it as dest_backup + */ + dest_backup = current_backup; + } + } + + /* TODO: Show latest possible target */ + if (dest_backup == NULL) + { + /* Failed to find target backup */ + if (target_backup_id) + elog(ERROR, "Requested backup %s is not found.", base36enc(target_backup_id)); + else + elog(ERROR, "Backup satisfying target options is not found."); + /* TODO: check if user asked PITR or just restore of latest backup */ + } + + /* If we already found dest_backup, look for full backup. */ + if (dest_backup->backup_mode == BACKUP_MODE_FULL) + base_full_backup = dest_backup; + else + { + int result; + + result = scan_parent_chain(dest_backup, &tmp_backup); + + if (result == ChainIsBroken) + { + /* chain is broken, determine missing backup ID + * and orphinize all his descendants + */ + char *missing_backup_id; + time_t missing_backup_start_time; + + missing_backup_start_time = tmp_backup->parent_backup; + missing_backup_id = base36enc_dup(tmp_backup->parent_backup); + + for (j = 0; j < parray_num(backups); j++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, j); + + /* use parent backup start_time because he is missing + * and we must orphinize his descendants + */ + if (is_parent(missing_backup_start_time, backup, false)) + { + if (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(backup, BACKUP_STATUS_ORPHAN, instance_name, true); + + elog(WARNING, "Backup %s is orphaned because his parent %s is missing", + base36enc(backup->start_time), missing_backup_id); + } + else + { + elog(WARNING, "Backup %s has missing parent %s", + base36enc(backup->start_time), missing_backup_id); + } + } + } + pg_free(missing_backup_id); + /* No point in doing futher */ + elog(ERROR, "%s of backup %s failed.", action, base36enc(dest_backup->start_time)); + } + else if (result == ChainIsInvalid) + { + /* chain is intact, but at least one parent is invalid */ + set_orphan_status(backups, tmp_backup); + tmp_backup = find_parent_full_backup(dest_backup); + + /* sanity */ + if (!tmp_backup) + elog(ERROR, "Parent full backup for the given backup %s was not found", + base36enc(dest_backup->start_time)); + } + + /* We have found full backup */ + base_full_backup = tmp_backup; + } + + if (base_full_backup == NULL) + elog(ERROR, "Full backup satisfying target options is not found."); + + /* + * Ensure that directories provided in tablespace mapping are valid + * i.e. empty or not exist. + */ + if (params->is_restore) + { + check_tablespace_mapping(dest_backup, params->incremental_mode != INCR_NONE, &tblspaces_are_empty); + + if (params->incremental_mode != INCR_NONE && pgdata_is_empty && tblspaces_are_empty) + { + elog(INFO, "Destination directory and tablespace directories are empty, " + "disable incremental restore"); + params->incremental_mode = INCR_NONE; + } + + /* no point in checking external directories if their restore is not requested */ + if (!params->skip_external_dirs) + check_external_dir_mapping(dest_backup, params->incremental_mode != INCR_NONE); + } + + /* At this point we are sure that parent chain is whole + * so we can build separate array, containing all needed backups, + * to simplify validation and restore + */ + parent_chain = parray_new(); + + /* Take every backup that is a child of base_backup AND parent of dest_backup + * including base_backup and dest_backup + */ + + tmp_backup = dest_backup; + while (tmp_backup) + { + parray_append(parent_chain, tmp_backup); + tmp_backup = tmp_backup->parent_backup_link; + } + + /* + * Determine the shift-LSN + * Consider the example A: + * + * + * /----D----------F-> + * -A--B---C---*-------X-----> + * + * [A,F] - incremental chain + * X - the state of pgdata + * F - destination backup + * * - switch point + * + * When running incremental restore in 'lsn' mode, we get a bitmap of pages, + * whose LSN is less than shift-LSN (backup C stop_lsn). + * So when restoring file, we can skip restore of pages coming from + * A, B and C. + * Pages from D and F cannot be skipped due to incremental restore. + * + * Consider the example B: + * + * + * /----------X----> + * ----*---A---B---C--> + * + * [A,C] - incremental chain + * X - the state of pgdata + * C - destination backup + * * - switch point + * + * Incremental restore in shift mode IS NOT POSSIBLE in this case. + * We must be able to differentiate the scenario A and scenario B. + * + */ + if (params->is_restore && params->incremental_mode == INCR_LSN) + { + RedoParams redo; + parray *timelines = NULL; + get_redo(instance_config.pgdata, &redo); + + if (redo.checksum_version == 0) + elog(ERROR, "Incremental restore in 'lsn' mode require " + "data_checksums to be enabled in destination data directory"); + + timelines = read_timeline_history(arclog_path, redo.tli, false); + + if (!timelines) + elog(WARNING, "Failed to get history for redo timeline %i, " + "multi-timeline incremental restore in 'lsn' mode is impossible", redo.tli); + + tmp_backup = dest_backup; + + while (tmp_backup) + { + /* Candidate, whose stop_lsn if less than shift LSN, is found */ + if (tmp_backup->stop_lsn < redo.lsn) + { + /* if candidate timeline is the same as redo TLI, + * then we are good to go. + */ + if (redo.tli == tmp_backup->tli) + { + elog(INFO, "Backup %s is chosen as shiftpoint, its Stop LSN will be used as shift LSN", + base36enc(tmp_backup->start_time)); + + shift_lsn = tmp_backup->stop_lsn; + break; + } + + if (!timelines) + { + elog(WARNING, "Redo timeline %i differs from target timeline %i, " + "in this case, to safely run incremental restore in 'lsn' mode, " + "the history file for timeline %i is mandatory", + redo.tli, tmp_backup->tli, redo.tli); + break; + } + + /* check whether the candidate tli is a part of redo TLI history */ + if (tliIsPartOfHistory(timelines, tmp_backup->tli)) + { + shift_lsn = tmp_backup->stop_lsn; + break; + } + else + elog(INFO, "Backup %s cannot be a shiftpoint, " + "because its tli %i is not in history of redo timeline %i", + base36enc(tmp_backup->start_time), tmp_backup->tli, redo.tli); + } + + tmp_backup = tmp_backup->parent_backup_link; + } + + if (XLogRecPtrIsInvalid(shift_lsn)) + elog(ERROR, "Cannot perform incremental restore of backup chain %s in 'lsn' mode, " + "because destination directory redo point %X/%X on tli %i is out of reach", + base36enc(dest_backup->start_time), + (uint32) (redo.lsn >> 32), (uint32) redo.lsn, redo.tli); + else + elog(INFO, "Destination directory redo point %X/%X on tli %i is " + "within reach of backup %s with Stop LSN %X/%X on tli %i", + (uint32) (redo.lsn >> 32), (uint32) redo.lsn, redo.tli, + base36enc(tmp_backup->start_time), + (uint32) (tmp_backup->stop_lsn >> 32), (uint32) tmp_backup->stop_lsn, + tmp_backup->tli); + + elog(INFO, "shift LSN: %X/%X", + (uint32) (shift_lsn >> 32), (uint32) shift_lsn); + + params->shift_lsn = shift_lsn; + } + + /* for validation or restore with enabled validation */ + if (!params->is_restore || !params->no_validate) + { + if (dest_backup->backup_mode != BACKUP_MODE_FULL) + elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time)); + + /* + * Validate backups from base_full_backup to dest_backup. + */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + tmp_backup = (pgBackup *) parray_get(parent_chain, i); + + /* Do not interrupt, validate the next backup */ + if (!lock_backup(tmp_backup, true)) + { + if (params->is_restore) + elog(ERROR, "Cannot lock backup %s directory", + base36enc(tmp_backup->start_time)); + else + { + elog(WARNING, "Cannot lock backup %s directory, skip validation", + base36enc(tmp_backup->start_time)); + continue; + } + } + + /* validate datafiles only */ + pgBackupValidate(tmp_backup, params); + + /* After pgBackupValidate() only following backup + * states are possible: ERROR, RUNNING, CORRUPT and OK. + * Validate WAL only for OK, because there is no point + * in WAL validation for corrupted, errored or running backups. + */ + if (tmp_backup->status != BACKUP_STATUS_OK) + { + corrupted_backup = tmp_backup; + break; + } + /* We do not validate WAL files of intermediate backups + * It`s done to speed up restore + */ + } + + /* There is no point in wal validation of corrupted backups */ + // TODO: there should be a way for a user to request only(!) WAL validation + if (!corrupted_backup) + { + /* + * Validate corresponding WAL files. + * We pass base_full_backup timeline as last argument to this function, + * because it's needed to form the name of xlog file. + */ + validate_wal(dest_backup, arclog_path, rt->target_time, + rt->target_xid, rt->target_lsn, + dest_backup->tli, instance_config.xlog_seg_size); + } + /* Orphanize every OK descendant of corrupted backup */ + else + set_orphan_status(backups, corrupted_backup); + } + + /* + * If dest backup is corrupted or was orphaned in previous check + * produce corresponding error message + */ + if (dest_backup->status == BACKUP_STATUS_OK || + dest_backup->status == BACKUP_STATUS_DONE) + { + if (params->no_validate) + elog(WARNING, "Backup %s is used without validation.", base36enc(dest_backup->start_time)); + else + elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time)); + } + else if (dest_backup->status == BACKUP_STATUS_CORRUPT) + { + if (params->force) + elog(WARNING, "Backup %s is corrupt.", base36enc(dest_backup->start_time)); + else + elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time)); + } + else if (dest_backup->status == BACKUP_STATUS_ORPHAN) + { + if (params->force) + elog(WARNING, "Backup %s is orphan.", base36enc(dest_backup->start_time)); + else + elog(ERROR, "Backup %s is orphan.", base36enc(dest_backup->start_time)); + } + else + elog(ERROR, "Backup %s has status: %s", + base36enc(dest_backup->start_time), status2str(dest_backup->status)); + + /* We ensured that all backups are valid, now restore if required + */ + if (params->is_restore) + { + if (rt->lsn_string && + parse_server_version(dest_backup->server_version) < 100000) + elog(ERROR, "Backup %s was created for version %s which doesn't support recovery_target_lsn", + base36enc(dest_backup->start_time), + dest_backup->server_version); + + restore_chain(dest_backup, parent_chain, + params, instance_config.pgdata, no_sync); + + /* Create recovery.conf with given recovery target parameters */ + create_recovery_conf(target_backup_id, rt, dest_backup, params); + } + + /* ssh connection to longer needed */ + fio_disconnect(); + + elog(INFO, "%s of backup %s completed.", + action, base36enc(dest_backup->start_time)); + + /* cleanup */ + parray_walk(backups, pgBackupFree); + parray_free(backups); + parray_free(parent_chain); + + return 0; +} + +/* + * Restore backup chain. + */ +void +restore_chain(pgBackup *dest_backup, parray *parent_chain, + pgRestoreParams *params, + const char *pgdata_path, bool no_sync) +{ + int i; + char timestamp[100]; + parray *pgdata_files = NULL; + parray *dest_files = NULL; + parray *external_dirs = NULL; + /* arrays with meta info for multi threaded backup */ + pthread_t *threads; + restore_files_arg *threads_args; + bool restore_isok = true; + bool use_bitmap = true; + + /* fancy reporting */ + char pretty_dest_bytes[20]; + char pretty_total_bytes[20]; + size_t dest_bytes = 0; + size_t total_bytes = 0; + char pretty_time[20]; + time_t start_time, end_time; + + /* Preparations for actual restoring */ + time2iso(timestamp, lengthof(timestamp), dest_backup->start_time); + elog(INFO, "Restoring the database from backup at %s", timestamp); + + dest_files = get_backup_filelist(dest_backup, true); + + /* Lock backup chain and make sanity checks */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + if (!lock_backup(backup, true)) + elog(ERROR, "Cannot lock backup %s", base36enc(backup->start_time)); + + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE) + { + if (params->force) + elog(WARNING, "Backup %s is not valid, restore is forced", + base36enc(backup->start_time)); + else + elog(ERROR, "Backup %s cannot be restored because it is not valid", + base36enc(backup->start_time)); + } + + /* confirm block size compatibility */ + if (backup->block_size != BLCKSZ) + elog(ERROR, + "BLCKSZ(%d) is not compatible(%d expected)", + backup->block_size, BLCKSZ); + + if (backup->wal_block_size != XLOG_BLCKSZ) + elog(ERROR, + "XLOG_BLCKSZ(%d) is not compatible(%d expected)", + backup->wal_block_size, XLOG_BLCKSZ); + + /* populate backup filelist */ + if (backup->start_time != dest_backup->start_time) + backup->files = get_backup_filelist(backup, true); + else + backup->files = dest_files; + + /* + * this sorting is important, because we rely on it to find + * destination file in intermediate backups file lists + * using bsearch. + */ + parray_qsort(backup->files, pgFileCompareRelPathWithExternal); + } + + /* If dest backup version is older than 2.4.0, then bitmap optimization + * is impossible to use, because bitmap restore rely on pgFile.n_blocks, + * which is not always available in old backups. + */ + if (parse_program_version(dest_backup->program_version) < 20400) + { + use_bitmap = false; + + if (params->incremental_mode != INCR_NONE) + elog(ERROR, "incremental restore is not possible for backups older than 2.3.0 version"); + } + + /* There is no point in bitmap restore, when restoring a single FULL backup, + * unless we are running incremental-lsn restore, then bitmap is mandatory. + */ + if (use_bitmap && parray_num(parent_chain) == 1) + { + if (params->incremental_mode == INCR_NONE) + use_bitmap = false; + else + use_bitmap = true; + } + + /* + * Restore dest_backup internal directories. + */ + create_data_directories(dest_files, instance_config.pgdata, + dest_backup->root_dir, true, + params->incremental_mode != INCR_NONE, + FIO_DB_HOST); + + /* + * Restore dest_backup external directories. + */ + if (dest_backup->external_dir_str && !params->skip_external_dirs) + { + external_dirs = make_external_directory_list(dest_backup->external_dir_str, true); + + if (!external_dirs) + elog(ERROR, "Failed to get a list of external directories"); + + if (parray_num(external_dirs) > 0) + elog(LOG, "Restore external directories"); + + for (i = 0; i < parray_num(external_dirs); i++) + fio_mkdir((const char *)parray_get(external_dirs, i), + DIR_PERMISSION, FIO_DB_HOST); + } + + /* + * Setup directory structure for external directories and file locks + */ + for (i = 0; i < parray_num(dest_files); i++) + { + pgFile *file = (pgFile *) parray_get(dest_files, i); + + if (S_ISDIR(file->mode)) + total_bytes += 4096; + + if (!params->skip_external_dirs && + file->external_dir_num && S_ISDIR(file->mode)) + { + char *external_path; + char dirpath[MAXPGPATH]; + + if (parray_num(external_dirs) < file->external_dir_num - 1) + elog(ERROR, "Inconsistent external directory backup metadata"); + + external_path = (char *)parray_get(external_dirs, file->external_dir_num - 1); + join_path_components(dirpath, external_path, file->rel_path); + + elog(VERBOSE, "Create external directory \"%s\"", dirpath); + fio_mkdir(dirpath, file->mode, FIO_DB_HOST); + } + + /* setup threads */ + pg_atomic_clear_flag(&file->lock); + } + + /* Get list of files in destination directory and remove redundant files */ + if (params->incremental_mode != INCR_NONE) + { + pgdata_files = parray_new(); + + elog(INFO, "Extracting the content of destination directory for incremental restore"); + + time(&start_time); + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(pgdata_files, pgdata_path, false, true, false, false, true, 0); + else + dir_list_file(pgdata_files, pgdata_path, + false, true, false, false, true, 0, FIO_LOCAL_HOST); + + /* get external dirs content */ + if (external_dirs) + { + for (i = 0; i < parray_num(external_dirs); i++) + { + char *external_path = (char *)parray_get(external_dirs, i); + parray *external_files = parray_new(); + + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(external_files, external_path, + false, true, false, false, true, i+1); + else + dir_list_file(external_files, external_path, + false, true, false, false, true, i+1, + FIO_LOCAL_HOST); + + parray_concat(pgdata_files, external_files); + parray_free(external_files); + } + } + + parray_qsort(pgdata_files, pgFileCompareRelPathWithExternalDesc); + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + + elog(INFO, "Destination directory content extracted, time elapsed: %s", + pretty_time); + + elog(INFO, "Removing redundant files in destination directory"); + time(&start_time); + for (i = 0; i < parray_num(pgdata_files); i++) + { + pgFile *file = (pgFile *)parray_get(pgdata_files, i); + + /* if file does not exists in destination list, then we can safely unlink it */ + if (parray_bsearch(dest_backup->files, file, pgFileCompareRelPathWithExternal) == NULL) + { + char fullpath[MAXPGPATH]; + + join_path_components(fullpath, pgdata_path, file->rel_path); + +// fio_pgFileDelete(file, full_file_path); + fio_delete(file->mode, fullpath, FIO_DB_HOST); + elog(VERBOSE, "Deleted file \"%s\"", fullpath); + + /* shrink pgdata list */ + parray_remove(pgdata_files, i); + i--; + } + } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + + /* At this point PDATA do not contain files, that do not exists in dest backup file list */ + elog(INFO, "Redundant files are removed, time elapsed: %s", pretty_time); + } + + /* + * Close ssh connection belonging to the main thread + * to avoid the possibility of been killed for idleness + */ + fio_disconnect(); + + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (restore_files_arg *) palloc(sizeof(restore_files_arg) * + num_threads); + if (dest_backup->stream) + dest_bytes = dest_backup->pgdata_bytes + dest_backup->wal_bytes; + else + dest_bytes = dest_backup->pgdata_bytes; + + pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes)); + elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes); + time(&start_time); + thread_interrupted = false; + + /* Restore files into target directory */ + for (i = 0; i < num_threads; i++) + { + restore_files_arg *arg = &(threads_args[i]); + + arg->dest_files = dest_files; + arg->pgdata_files = pgdata_files; + arg->dest_backup = dest_backup; + arg->dest_external_dirs = external_dirs; + arg->parent_chain = parent_chain; + arg->skip_external_dirs = params->skip_external_dirs; + arg->to_root = pgdata_path; + arg->use_bitmap = use_bitmap; + arg->incremental_mode = params->incremental_mode; + arg->shift_lsn = params->shift_lsn; + threads_args[i].restored_bytes = 0; + /* By default there are some error */ + threads_args[i].ret = 1; + + /* Useless message TODO: rewrite */ + elog(LOG, "Start thread %i", i + 1); + + pthread_create(&threads[i], NULL, restore_files, arg); + } + + /* Wait theads */ + for (i = 0; i < num_threads; i++) + { + pthread_join(threads[i], NULL); + if (threads_args[i].ret == 1) + restore_isok = false; + + total_bytes += threads_args[i].restored_bytes; + } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + pretty_size(total_bytes, pretty_total_bytes, lengthof(pretty_total_bytes)); + + if (restore_isok) + { + elog(INFO, "Backup files are restored. Transfered bytes: %s, time elapsed: %s", + pretty_total_bytes, pretty_time); + + elog(INFO, "Restore incremental ratio (less is better): %.f%% (%s/%s)", + ((float) total_bytes / dest_bytes) * 100, + pretty_total_bytes, pretty_dest_bytes); + } + else + elog(ERROR, "Backup files restoring failed. Transfered bytes: %s, time elapsed: %s", + pretty_total_bytes, pretty_time); + + /* Close page header maps */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *)parray_get(parent_chain, i); + cleanup_header_map(&(backup->hdr_map)); + } + + if (no_sync) + elog(WARNING, "Restored files are not synced to disk"); + else + { + elog(INFO, "Syncing restored files to disk"); + time(&start_time); + + for (i = 0; i < parray_num(dest_files); i++) + { + char to_fullpath[MAXPGPATH]; + pgFile *dest_file = (pgFile *)parray_get(dest_files, i); + + if (S_ISDIR(dest_file->mode)) + continue; + + /* skip external files if ordered to do so */ + if (dest_file->external_dir_num > 0 && + params->skip_external_dirs) + continue; + + /* construct fullpath */ + if (dest_file->external_dir_num == 0) + { + if (strcmp(PG_TABLESPACE_MAP_FILE, dest_file->rel_path) == 0) + continue; + if (strcmp(DATABASE_MAP, dest_file->rel_path) == 0) + continue; + join_path_components(to_fullpath, pgdata_path, dest_file->rel_path); + } + else + { + char *external_path = (char *)parray_get(external_dirs, dest_file->external_dir_num - 1); + join_path_components(to_fullpath, external_path, dest_file->rel_path); + } + + /* TODO: write test for case: file to be synced is missing */ + if (fio_sync(to_fullpath, FIO_DB_HOST) != 0) + elog(ERROR, "Failed to sync file \"%s\": %s", to_fullpath, strerror(errno)); + } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + elog(INFO, "Restored backup files are synced, time elapsed: %s", pretty_time); + } + + /* cleanup */ + pfree(threads); + pfree(threads_args); + + if (external_dirs != NULL) + free_dir_list(external_dirs); + + if (pgdata_files) + { + parray_walk(pgdata_files, pgFileFree); + parray_free(pgdata_files); + } + + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *)parray_get(parent_chain, i); + + parray_walk(backup->files, pgFileFree); + parray_free(backup->files); + } +} + +/* + * Restore files into $PGDATA. + */ +static void * +restore_files(void *arg) +{ + int i; + uint64 n_files; + char to_fullpath[MAXPGPATH]; + FILE *out = NULL; + char *out_buf = (char *)pgut_malloc(STDIO_BUFSIZE); + + restore_files_arg *arguments = (restore_files_arg *) arg; + + n_files = (unsigned long) parray_num(arguments->dest_files); + + for (i = 0; i < parray_num(arguments->dest_files); i++) + { + bool already_exists = false; + PageState *checksum_map = NULL; /* it should take ~1.5MB at most */ + datapagemap_t *lsn_map = NULL; /* it should take 16kB at most */ + pgFile *dest_file = (pgFile *)parray_get(arguments->dest_files, i); + + /* Directories were created before */ + if (S_ISDIR(dest_file->mode)) + continue; + + if (!pg_atomic_test_set_flag(&dest_file->lock)) + continue; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during restore"); + + if (progress) + elog(INFO, "Progress: (%d/%lu). Restore file \"%s\"", + i + 1, n_files, dest_file->rel_path); + + /* Do not restore tablespace_map file */ + if ((dest_file->external_dir_num == 0) && + strcmp(PG_TABLESPACE_MAP_FILE, dest_file->rel_path) == 0) + { + elog(VERBOSE, "Skip tablespace_map"); + continue; + } + + /* Do not restore database_map file */ + if ((dest_file->external_dir_num == 0) && + strcmp(DATABASE_MAP, dest_file->rel_path) == 0) + { + elog(VERBOSE, "Skip database_map"); + continue; + } + + /* Do no restore external directory file if a user doesn't want */ + if (arguments->skip_external_dirs && dest_file->external_dir_num > 0) + continue; + + /* set fullpath of destination file */ + if (dest_file->external_dir_num == 0) + join_path_components(to_fullpath, arguments->to_root, dest_file->rel_path); + else + { + char *external_path = (char *)parray_get(arguments->dest_external_dirs, + dest_file->external_dir_num - 1); + join_path_components(to_fullpath, external_path, dest_file->rel_path); + } + + if (arguments->incremental_mode != INCR_NONE && + parray_bsearch(arguments->pgdata_files, dest_file, pgFileCompareRelPathWithExternalDesc)) + { + already_exists = true; + } + + /* + * Handle incremental restore case for data files. + * If file is already exists in pgdata, then + * we scan it block by block and get + * array of checksums for every page. + */ + if (already_exists && + dest_file->is_datafile && !dest_file->is_cfs && + dest_file->n_blocks > 0) + { + if (arguments->incremental_mode == INCR_LSN) + { + lsn_map = fio_get_lsn_map(to_fullpath, arguments->dest_backup->checksum_version, + dest_file->n_blocks, arguments->shift_lsn, + dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); + } + else if (arguments->incremental_mode == INCR_CHECKSUM) + { + checksum_map = fio_get_checksum_map(to_fullpath, arguments->dest_backup->checksum_version, + dest_file->n_blocks, arguments->dest_backup->stop_lsn, + dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); + } + } + + /* + * Open dest file and truncate it to zero, if destination + * file already exists and dest file size is zero, or + * if file do not exist + */ + if ((already_exists && dest_file->write_size == 0) || !already_exists) + out = fio_fopen(to_fullpath, PG_BINARY_W, FIO_DB_HOST); + /* + * If file already exists and dest size is not zero, + * then open it for reading and writing. + */ + else + out = fio_fopen(to_fullpath, PG_BINARY_R "+", FIO_DB_HOST); + + if (out == NULL) + elog(ERROR, "Cannot open restore target file \"%s\": %s", + to_fullpath, strerror(errno)); + + /* update file permission */ + if (fio_chmod(to_fullpath, dest_file->mode, FIO_DB_HOST) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); + + /*if (!dest_file->is_datafile || dest_file->is_cfs) + elog(VERBOSE, "Restoring nonedata file: \"%s\"", to_fullpath); + else + elog(VERBOSE, "Restoring data file: \"%s\"", to_fullpath);*/ + + // If destination file is 0 sized, then just close it and go for the next + if (dest_file->write_size == 0) + goto done; + + /* Restore destination file */ + if (dest_file->is_datafile && !dest_file->is_cfs) + { + /* enable stdio buffering for local destination data file */ + if (!fio_is_remote_file(out)) + setvbuf(out, out_buf, _IOFBF, STDIO_BUFSIZE); + /* Destination file is data file */ + arguments->restored_bytes += restore_data_file(arguments->parent_chain, + dest_file, out, to_fullpath, + arguments->use_bitmap, checksum_map, + arguments->shift_lsn, lsn_map, true); + } + else + { + /* disable stdio buffering for local destination nonedata file */ + if (!fio_is_remote_file(out)) + setvbuf(out, NULL, _IONBF, BUFSIZ); + /* Destination file is nonedata file */ + arguments->restored_bytes += restore_non_data_file(arguments->parent_chain, + arguments->dest_backup, dest_file, out, to_fullpath, + already_exists); + } + +done: + /* close file */ + if (fio_fclose(out) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", to_fullpath, + strerror(errno)); + + /* free pagemap used for restore optimization */ + pg_free(dest_file->pagemap.bitmap); + + if (lsn_map) + pg_free(lsn_map->bitmap); + + pg_free(lsn_map); + pg_free(checksum_map); + } + + free(out_buf); + + /* ssh connection to longer needed */ + fio_disconnect(); + + /* Data files restoring is successful */ + arguments->ret = 0; + + return NULL; +} + +/* + * Create recovery.conf (probackup_recovery.conf in case of PG12) + * with given recovery target parameters + */ +static void +create_recovery_conf(time_t backup_id, + pgRecoveryTarget *rt, + pgBackup *backup, + pgRestoreParams *params) +{ + char path[MAXPGPATH]; + FILE *fp; + bool pitr_requested; + bool target_latest; + bool target_immediate; + bool restore_command_provided = false; + char restore_command_guc[16384]; + + if (instance_config.restore_command && + (pg_strcasecmp(instance_config.restore_command, "none") != 0)) + { + restore_command_provided = true; + } + + /* restore-target='latest' support */ + target_latest = rt->target_stop != NULL && + strcmp(rt->target_stop, "latest") == 0; + + target_immediate = rt->target_stop != NULL && + strcmp(rt->target_stop, "immediate") == 0; + + /* + * Note that setting restore_command alone interpreted + * as PITR with target - "until all available WAL is replayed". + * We do this because of the following case: + * The user is restoring STREAM backup as replica but + * also relies on WAL archive to catch-up with master. + * If restore_command is provided, then it should be + * added to recovery config. + * In this scenario, "would be" replica will replay + * all WAL segments available in WAL archive, after that + * it will try to connect to master via repprotocol. + * + * The risk is obvious, what if masters current state is + * in "the past" relatively to latest state in the archive? + * We will get a replica that is "in the future" to the master. + * We accept this risk because its probability is low. + */ + pitr_requested = !backup->stream || rt->time_string || + rt->xid_string || rt->lsn_string || rt->target_name || + target_immediate || target_latest || restore_command_provided; + + /* No need to generate recovery.conf at all. */ + if (!pitr_requested) + { + /* + * Restoring STREAM backup without PITR and not as replica, + * recovery.signal and standby.signal for PG12 are not needed + * + * We do not add "include" option in this case because + * here we are creating empty "probackup_recovery.conf" + * to handle possible already existing "include" + * directive pointing to "probackup_recovery.conf". + * If don`t do that, recovery will fail. + */ + pg12_recovery_config(backup, false); + return; + } + + elog(LOG, "----------------------------------------"); +#if PG_VERSION_NUM >= 120000 + elog(LOG, "creating probackup_recovery.conf"); + pg12_recovery_config(backup, true); + snprintf(path, lengthof(path), "%s/probackup_recovery.conf", instance_config.pgdata); +#else + elog(LOG, "creating recovery.conf"); + snprintf(path, lengthof(path), "%s/recovery.conf", instance_config.pgdata); +#endif + + fp = fio_fopen(path, "w", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot open file \"%s\": %s", path, + strerror(errno)); + + if (fio_chmod(path, FILE_PERMISSION, FIO_DB_HOST) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", path, strerror(errno)); + +#if PG_VERSION_NUM >= 120000 + fio_fprintf(fp, "# probackup_recovery.conf generated by pg_probackup %s\n", + PROGRAM_VERSION); +#else + fio_fprintf(fp, "# recovery.conf generated by pg_probackup %s\n", + PROGRAM_VERSION); +#endif + + /* construct restore_command */ + if (pitr_requested) + { + fio_fprintf(fp, "\n## recovery settings\n"); + /* If restore_command is provided, use it. Otherwise construct it from scratch. */ + if (restore_command_provided) + sprintf(restore_command_guc, "%s", instance_config.restore_command); + else + { + /* default cmdline, ok for local restore */ + sprintf(restore_command_guc, "%s archive-get -B %s --instance %s " + "--wal-file-path=%%p --wal-file-name=%%f", + PROGRAM_FULL_PATH ? PROGRAM_FULL_PATH : PROGRAM_NAME, + backup_path, instance_name); + + /* append --remote-* parameters provided via --archive-* settings */ + if (instance_config.archive.host) + { + strcat(restore_command_guc, " --remote-host="); + strcat(restore_command_guc, instance_config.archive.host); + } + + if (instance_config.archive.port) + { + strcat(restore_command_guc, " --remote-port="); + strcat(restore_command_guc, instance_config.archive.port); + } + + if (instance_config.archive.user) + { + strcat(restore_command_guc, " --remote-user="); + strcat(restore_command_guc, instance_config.archive.user); + } + } + + /* + * We've already checked that only one of the four following mutually + * exclusive options is specified, so the order of calls is insignificant. + */ + if (rt->target_name) + fio_fprintf(fp, "recovery_target_name = '%s'\n", rt->target_name); + + if (rt->time_string) + fio_fprintf(fp, "recovery_target_time = '%s'\n", rt->time_string); + + if (rt->xid_string) + fio_fprintf(fp, "recovery_target_xid = '%s'\n", rt->xid_string); + + if (rt->lsn_string) + fio_fprintf(fp, "recovery_target_lsn = '%s'\n", rt->lsn_string); + + if (rt->target_stop && target_immediate) + fio_fprintf(fp, "recovery_target = '%s'\n", rt->target_stop); + + if (rt->inclusive_specified) + fio_fprintf(fp, "recovery_target_inclusive = '%s'\n", + rt->target_inclusive ? "true" : "false"); + + if (rt->target_tli) + fio_fprintf(fp, "recovery_target_timeline = '%u'\n", rt->target_tli); + else + { + /* + * In PG12 default recovery target timeline was changed to 'latest', which + * is extremely risky. Explicitly preserve old behavior of recovering to current + * timneline for PG12. + */ +#if PG_VERSION_NUM >= 120000 + fio_fprintf(fp, "recovery_target_timeline = 'current'\n"); +#endif + } + + if (rt->target_action) + fio_fprintf(fp, "recovery_target_action = '%s'\n", rt->target_action); + else + /* default recovery_target_action is 'pause' */ + fio_fprintf(fp, "recovery_target_action = '%s'\n", "pause"); + } + + if (pitr_requested) + { + elog(LOG, "Setting restore_command to '%s'", restore_command_guc); + fio_fprintf(fp, "restore_command = '%s'\n", restore_command_guc); + } + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write file \"%s\": %s", path, + strerror(errno)); + +#if PG_VERSION_NUM >= 120000 + /* + * Create "recovery.signal" to mark this recovery as PITR for PostgreSQL. + * In older versions presense of recovery.conf alone was enough. + * To keep behaviour consistent with older versions, + * we are forced to create "recovery.signal" + * even when only restore_command is provided. + * Presense of "recovery.signal" by itself determine only + * one thing: do PostgreSQL must switch to a new timeline + * after successfull recovery or not? + */ + if (pitr_requested) + { + elog(LOG, "creating recovery.signal file"); + snprintf(path, lengthof(path), "%s/recovery.signal", instance_config.pgdata); + + fp = fio_fopen(path, "w", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot open file \"%s\": %s", path, + strerror(errno)); + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write file \"%s\": %s", path, + strerror(errno)); + } +#endif +} + +/* + * Create empty probackup_recovery.conf in PGDATA and + * add "include" directive to postgresql.auto.conf + + * When restoring PG12 we always(!) must do this, even + * when restoring STREAM backup without PITR or replica options + * because restored instance may have been previously backed up + * and restored again and user didn`t cleaned up postgresql.auto.conf. + + * So for recovery to work regardless of all this factors + * we must always create empty probackup_recovery.conf file. + */ +static void +pg12_recovery_config(pgBackup *backup, bool add_include) +{ +#if PG_VERSION_NUM >= 120000 + char probackup_recovery_path[MAXPGPATH]; + char postgres_auto_path[MAXPGPATH]; + FILE *fp; + + if (add_include) + { + char current_time_str[100]; + + time2iso(current_time_str, lengthof(current_time_str), current_time); + + snprintf(postgres_auto_path, lengthof(postgres_auto_path), + "%s/postgresql.auto.conf", instance_config.pgdata); + + fp = fio_fopen(postgres_auto_path, "a", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot write to file \"%s\": %s", postgres_auto_path, + strerror(errno)); + + // TODO: check if include 'probackup_recovery.conf' already exists + fio_fprintf(fp, "\n# created by pg_probackup restore of backup %s at '%s'\n", + base36enc(backup->start_time), current_time_str); + fio_fprintf(fp, "include '%s'\n", "probackup_recovery.conf"); + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write to file \"%s\": %s", postgres_auto_path, + strerror(errno)); + } + + /* Create empty probackup_recovery.conf */ + snprintf(probackup_recovery_path, lengthof(probackup_recovery_path), + "%s/probackup_recovery.conf", instance_config.pgdata); + fp = fio_fopen(probackup_recovery_path, "w", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot open file \"%s\": %s", probackup_recovery_path, + strerror(errno)); + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write to file \"%s\": %s", probackup_recovery_path, + strerror(errno)); +#endif + return; +} + +/* + * Try to read a timeline's history file. + * + * If successful, return the list of component TLIs (the ancestor + * timelines followed by target timeline). If we cannot find the history file, + * assume that the timeline has no parents, and return a list of just the + * specified timeline ID. + * based on readTimeLineHistory() in timeline.c + */ +parray * +read_timeline_history(const char *arclog_path, TimeLineID targetTLI, bool strict) +{ + parray *result; +#ifdef SUPPORT_MULTI_TIMELINE + char path[MAXPGPATH]; + char fline[MAXPGPATH]; + FILE *fd = NULL; + TimeLineHistoryEntry *entry; + TimeLineHistoryEntry *last_timeline = NULL; + + /* Look for timeline history file in archlog_path */ + snprintf(path, lengthof(path), "%s/%08X.history", arclog_path, + targetTLI); + + /* Timeline 1 does not have a history file */ + if (targetTLI != 1) + { + fd = fopen(path, "rt"); + if (fd == NULL) + { + if (errno != ENOENT) + elog(ERROR, "could not open file \"%s\": %s", path, + strerror(errno)); + + /* There is no history file for target timeline */ + if (strict) + elog(ERROR, "recovery target timeline %u does not exist", + targetTLI); + else + return NULL; + } + } + + result = parray_new(); + + /* + * Parse the file... + */ + while (fd && fgets(fline, sizeof(fline), fd) != NULL) + { + char *ptr; + TimeLineID tli; + uint32 switchpoint_hi; + uint32 switchpoint_lo; + int nfields; + + for (ptr = fline; *ptr; ptr++) + { + if (!isspace((unsigned char) *ptr)) + break; + } + if (*ptr == '\0' || *ptr == '#') + continue; + + nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo); + + if (nfields < 1) + { + /* expect a numeric timeline ID as first field of line */ + elog(ERROR, + "syntax error in history file: %s. Expected a numeric timeline ID.", + fline); + } + if (nfields != 3) + elog(ERROR, + "syntax error in history file: %s. Expected a transaction log switchpoint location.", + fline); + + if (last_timeline && tli <= last_timeline->tli) + elog(ERROR, + "Timeline IDs must be in increasing sequence."); + + entry = pgut_new(TimeLineHistoryEntry); + entry->tli = tli; + entry->end = ((uint64) switchpoint_hi << 32) | switchpoint_lo; + + last_timeline = entry; + /* Build list with newest item first */ + parray_insert(result, 0, entry); + + /* we ignore the remainder of each line */ + } + + if (fd && (ferror(fd))) + elog(ERROR, "Failed to read from file: \"%s\"", path); + + if (fd) + fclose(fd); + + if (last_timeline && targetTLI <= last_timeline->tli) + elog(ERROR, "Timeline IDs must be less than child timeline's ID."); + + /* append target timeline */ + entry = pgut_new(TimeLineHistoryEntry); + entry->tli = targetTLI; + /* LSN in target timeline is valid */ + entry->end = InvalidXLogRecPtr; + parray_insert(result, 0, entry); + +#endif + return result; +} + +/* TODO: do not ignore timelines. What if requested target located in different timeline? */ +bool +satisfy_recovery_target(const pgBackup *backup, const pgRecoveryTarget *rt) +{ + if (rt->xid_string) + return backup->recovery_xid <= rt->target_xid; + + if (rt->time_string) + return backup->recovery_time <= rt->target_time; + + if (rt->lsn_string) + return backup->stop_lsn <= rt->target_lsn; + + return true; +} + + +/* TODO description */ +bool +satisfy_timeline(const parray *timelines, const pgBackup *backup) +{ + int i; + +#ifdef SUPPORT_MULTI_TIMELINE + for (i = 0; i < parray_num(timelines); i++) + { + TimeLineHistoryEntry *timeline; + + timeline = (TimeLineHistoryEntry *) parray_get(timelines, i); + if (backup->tli == timeline->tli && + (XLogRecPtrIsInvalid(timeline->end) || + backup->stop_lsn <= timeline->end)) + return true; + } +#endif + return false; +} + +/* timelines represents a history of one particular timeline, + * we must determine whether a target tli is part of that history. + * + * /--------* + * ---------*--------------> + */ +bool +tliIsPartOfHistory(const parray *timelines, TimeLineID tli) +{ +#ifdef SUPPORT_MULTI_TIMELINE + int i; + + for (i = 0; i < parray_num(timelines); i++) + { + TimeLineHistoryEntry *timeline = (TimeLineHistoryEntry *) parray_get(timelines, i); + + if (tli == timeline->tli) + return true; + } + +#endif + return false; +} + +/* + * Get recovery options in the string format, parse them + * and fill up the pgRecoveryTarget structure. + */ +pgRecoveryTarget * +parseRecoveryTargetOptions(const char *target_time, + const char *target_xid, + const char *target_inclusive, + TimeLineID target_tli, + const char *target_lsn, + const char *target_stop, + const char *target_name, + const char *target_action) +{ + bool dummy_bool; + /* + * count the number of the mutually exclusive options which may specify + * recovery target. If final value > 1, throw an error. + */ + int recovery_target_specified = 0; + pgRecoveryTarget *rt = pgut_new(pgRecoveryTarget); + + /* fill all options with default values */ + MemSet(rt, 0, sizeof(pgRecoveryTarget)); + + /* parse given options */ + if (target_time) + { + time_t dummy_time; + + recovery_target_specified++; + rt->time_string = target_time; + + if (parse_time(target_time, &dummy_time, false)) + rt->target_time = dummy_time; + else + elog(ERROR, "Invalid value for '--recovery-target-time' option '%s'", + target_time); + } + + if (target_xid) + { + TransactionId dummy_xid; + + recovery_target_specified++; + rt->xid_string = target_xid; + +#ifdef PGPRO_EE + if (parse_uint64(target_xid, &dummy_xid, 0)) +#else + if (parse_uint32(target_xid,(uint32 *)&dummy_xid, 0)) +#endif + rt->target_xid = dummy_xid; + else + elog(ERROR, "Invalid value for '--recovery-target-xid' option '%s'", + target_xid); + } + + if (target_lsn) + { + XLogRecPtr dummy_lsn; + + recovery_target_specified++; + rt->lsn_string = target_lsn; + if (parse_lsn(target_lsn, &dummy_lsn)) + rt->target_lsn = dummy_lsn; + else + elog(ERROR, "Invalid value of '--recovery-target-lsn' option '%s'", + target_lsn); + } + + if (target_inclusive) + { + rt->inclusive_specified = true; + if (parse_bool(target_inclusive, &dummy_bool)) + rt->target_inclusive = dummy_bool; + else + elog(ERROR, "Invalid value for '--recovery-target-inclusive' option '%s'", + target_inclusive); + } + + rt->target_tli = target_tli; + if (target_stop) + { + if ((strcmp(target_stop, "immediate") != 0) + && (strcmp(target_stop, "latest") != 0)) + elog(ERROR, "Invalid value for '--recovery-target' option '%s'", + target_stop); + + recovery_target_specified++; + rt->target_stop = target_stop; + } + + if (target_name) + { + recovery_target_specified++; + rt->target_name = target_name; + } + + if (target_action) + { + if ((strcmp(target_action, "pause") != 0) + && (strcmp(target_action, "promote") != 0) + && (strcmp(target_action, "shutdown") != 0)) + elog(ERROR, "Invalid value for '--recovery-target-action' option '%s'", + target_action); + + rt->target_action = target_action; + } + + /* More than one mutually exclusive option was defined. */ + if (recovery_target_specified > 1) + elog(ERROR, "At most one of '--recovery-target', '--recovery-target-name', " + "'--recovery-target-time', '--recovery-target-xid' or " + "'--recovery-target-lsn' options can be specified"); + + /* + * If none of the options is defined, '--recovery-target-inclusive' option + * is meaningless. + */ + if (!(rt->xid_string || rt->time_string || rt->lsn_string) && + rt->target_inclusive) + elog(ERROR, "The '--recovery-target-inclusive' option can be applied only when " + "either of '--recovery-target-time', '--recovery-target-xid' or " + "'--recovery-target-lsn' options is specified"); + + /* If none of the options is defined, '--recovery-target-action' is meaningless */ + if (rt->target_action && recovery_target_specified == 0) + elog(ERROR, "The '--recovery-target-action' option can be applied only when " + "either of '--recovery-target', '--recovery-target-time', '--recovery-target-xid', " + "'--recovery-target-lsn' or '--recovery-target-name' options is specified"); + + /* TODO: sanity for recovery-target-timeline */ + + return rt; +} + +/* Check that instance is suitable for incremental restore + * Depending on type of incremental restore requirements are differs. + */ +void +check_incremental_compatibility(const char *pgdata, uint64 system_identifier, + IncrRestoreMode incremental_mode) +{ + uint64 system_id_pgdata; + bool success = true; + pid_t pid; + char backup_label[MAXPGPATH]; + + /* slurp pg_control and check that system ID is the same */ + /* check that instance is not running */ + /* if lsn_based, check that there is no backup_label files is around AND + * get redo point lsn from destination pg_control. + + * It is really important to be sure that pg_control is in cohesion with + * data files content, because based on pg_control information we will + * choose a backup suitable for lsn based incremental restore. + */ + + system_id_pgdata = get_system_identifier(pgdata); + + if (system_id_pgdata != instance_config.system_identifier) + { + elog(WARNING, "Backup catalog was initialized for system id %lu, " + "but destination directory system id is %lu", + system_identifier, system_id_pgdata); + success = false; + } + + /* check postmaster pid */ + pid = fio_check_postmaster(pgdata, FIO_DB_HOST); + + if (pid == 1) /* postmaster.pid is mangled */ + { + char pid_file[MAXPGPATH]; + + snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pgdata); + elog(WARNING, "Pid file \"%s\" is mangled, cannot determine whether postmaster is running or not", + pid_file); + success = false; + } + else if (pid > 1) /* postmaster is up */ + { + elog(WARNING, "Postmaster with pid %u is running in destination directory \"%s\"", + pid, pgdata); + success = false; + } + + /* + * TODO: maybe there should be some other signs, pointing to pg_control + * desynchronization with cluster state. + */ + if (incremental_mode == INCR_LSN) + { + snprintf(backup_label, MAXPGPATH, "%s/backup_label", pgdata); + if (fio_access(backup_label, F_OK, FIO_DB_HOST) == 0) + { + elog(WARNING, "Destination directory contains \"backup_control\" file. " + "This does NOT mean that you should delete this file and retry, only that " + "incremental restore in 'lsn' mode may produce incorrect result, when applied " + "to cluster with pg_control not synchronized with cluster state." + "Consider to use incremental restore in 'checksum' mode"); + success = false; + } + } + + if (!success) + elog(ERROR, "Incremental restore is impossible"); +} diff --git a/src/bin/pg_probackup/s_lock.cpp b/src/bin/pg_probackup/s_lock.cpp new file mode 100644 index 000000000..80a1545c9 --- /dev/null +++ b/src/bin/pg_probackup/s_lock.cpp @@ -0,0 +1,398 @@ +/*------------------------------------------------------------------------- + * + * s_lock.c + * Hardware-dependent implementation of spinlocks. + * + * When waiting for a contended spinlock we loop tightly for awhile, then + * delay using pg_usleep() and try again. Preferably, "awhile" should be a + * small multiple of the maximum time we expect a spinlock to be held. 100 + * iterations seems about right as an initial guess. However, on a + * uniprocessor the loop is a waste of cycles, while in a multi-CPU scenario + * it's usually better to spin a bit longer than to call the kernel, so we try + * to adapt the spin loop count depending on whether we seem to be in a + * uniprocessor or multiprocessor. + * + * Note: you might think MIN_SPINS_PER_DELAY should be just 1, but you'd + * be wrong; there are platforms where that can result in a "stuck + * spinlock" failure. This has been seen particularly on Alphas; it seems + * that the first TAS after returning from kernel space will always fail + * on that hardware. + * + * Once we do decide to block, we use randomly increasing pg_usleep() + * delays. The first delay is 1 msec, then the delay randomly increases to + * about one second, after which we reset to 1 msec and start again. The + * idea here is that in the presence of heavy contention we need to + * increase the delay, else the spinlock holder may never get to run and + * release the lock. (Consider situation where spinlock holder has been + * nice'd down in priority by the scheduler --- it will not get scheduled + * until all would-be acquirers are sleeping, so if we always use a 1-msec + * sleep, there is a real possibility of starvation.) But we can't just + * clamp the delay to an upper bound, else it would take a long time to + * make a reasonable number of tries. + * + * We time out and declare error after NUM_DELAYS delays (thus, exactly + * that many tries). With the given settings, this will usually take 2 or + * so minutes. It seems better to fix the total number of tries (and thus + * the probability of unintended failure) than to fix the total time + * spent. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/storage/lmgr/s_lock.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "storage/s_lock.h" +#ifdef FRONTEND +#undef FRONTEND +#include "atomics.h" +#define FRONTEND +#else +#include "atomics.h" +#endif +#include "logger.h" + +#define MIN_SPINS_PER_DELAY 10 +#define MAX_SPINS_PER_DELAY 1000 +#define NUM_DELAYS 1000 +#define MIN_DELAY_USEC 1000L +#define MAX_DELAY_USEC 1000000L + + +slock_t dummy_spinlock; + +static int spins_per_delay = DEFAULT_SPINS_PER_DELAY; +extern int tas_sema(volatile slock_t *lock); + + +/* + * s_lock_stuck() - complain about a stuck spinlock + */ +static void +s_lock_stuck(const char *file, int line) +{ +#if defined(S_LOCK_TEST) + fprintf(stderr, + "\nStuck spinlock detected at %s:%d.\n", + file, line); + exit(1); +#else + elog(FATAL, "stuck spinlock detected at %s:%d", + file, line); +#endif +} + + +#ifdef init_spin_delay +#undef init_spin_delay +#endif +static void +init_spin_delay(SpinDelayStatus *status,const char *file, int line) +{ + status->spins = 0; + status->delays = 0; + status->cur_delay = 0; + status->file = file; + status->line = line; +} + +/* + * s_lock(lock) - platform-independent portion of waiting for a spinlock. + */ +int +s_lock(volatile slock_t *lock, const char *file, int line) +{ + SpinDelayStatus delayStatus; + + init_spin_delay(&delayStatus, file, line); + + while (TAS_SPIN(lock)) + { + perform_spin_delay(&delayStatus); + } + + finish_spin_delay(&delayStatus); + + return delayStatus.delays; +} + +#ifdef USE_DEFAULT_S_UNLOCK +void +s_unlock(volatile slock_t *lock) +{ +#ifdef TAS_ACTIVE_WORD + /* HP's PA-RISC */ + *TAS_ACTIVE_WORD(lock) = -1; +#else + *lock = 0; +#endif +} +#endif + +/* + * Wait while spinning on a contended spinlock. + */ +void +perform_spin_delay(SpinDelayStatus *status) +{ + /* CPU-specific delay each time through the loop */ + SPIN_DELAY(); + + /* Block the process every spins_per_delay tries */ + if (++(status->spins) >= spins_per_delay) + { + if (++(status->delays) > NUM_DELAYS) + s_lock_stuck(status->file, status->line); + + if (status->cur_delay == 0) /* first time to delay? */ + status->cur_delay = MIN_DELAY_USEC; + + pg_usleep(status->cur_delay); + +#if defined(S_LOCK_TEST) + fprintf(stdout, "*"); + fflush(stdout); +#endif + + /* increase delay by a random fraction between 1X and 2X */ + status->cur_delay += (int) (status->cur_delay * + ((double) random() / (double) MAX_RANDOM_VALUE) + 0.5); + /* wrap back to minimum delay when max is exceeded */ + if (status->cur_delay > MAX_DELAY_USEC) + status->cur_delay = MIN_DELAY_USEC; + + status->spins = 0; + } +} + +/* + * After acquiring a spinlock, update estimates about how long to loop. + * + * If we were able to acquire the lock without delaying, it's a good + * indication we are in a multiprocessor. If we had to delay, it's a sign + * (but not a sure thing) that we are in a uniprocessor. Hence, we + * decrement spins_per_delay slowly when we had to delay, and increase it + * rapidly when we didn't. It's expected that spins_per_delay will + * converge to the minimum value on a uniprocessor and to the maximum + * value on a multiprocessor. + * + * Note: spins_per_delay is local within our current process. We want to + * average these observations across multiple backends, since it's + * relatively rare for this function to even get entered, and so a single + * backend might not live long enough to converge on a good value. That + * is handled by the two routines below. + */ +void +finish_spin_delay(SpinDelayStatus *status) +{ + if (status->cur_delay == 0) + { + /* we never had to delay */ + if (spins_per_delay < MAX_SPINS_PER_DELAY) + spins_per_delay = Min(spins_per_delay + 100, MAX_SPINS_PER_DELAY); + } + else + { + if (spins_per_delay > MIN_SPINS_PER_DELAY) + spins_per_delay = Max(spins_per_delay - 1, MIN_SPINS_PER_DELAY); + } +} + +/* + * Set local copy of spins_per_delay during backend startup. + * + * NB: this has to be pretty fast as it is called while holding a spinlock + */ +void +set_spins_per_delay(int shared_spins_per_delay) +{ + spins_per_delay = shared_spins_per_delay; +} + +/* + * Update shared estimate of spins_per_delay during backend exit. + * + * NB: this has to be pretty fast as it is called while holding a spinlock + */ +int +update_spins_per_delay(int shared_spins_per_delay) +{ + /* + * We use an exponential moving average with a relatively slow adaption + * rate, so that noise in any one backend's result won't affect the shared + * value too much. As long as both inputs are within the allowed range, + * the result must be too, so we need not worry about clamping the result. + * + * We deliberately truncate rather than rounding; this is so that single + * adjustments inside a backend can affect the shared estimate (see the + * asymmetric adjustment rules above). + */ + return (shared_spins_per_delay * 15 + spins_per_delay) / 16; +} + + +/* + * Various TAS implementations that cannot live in s_lock.h as no inline + * definition exists (yet). + * In the future, get rid of tas.[cso] and fold it into this file. + * + * If you change something here, you will likely need to modify s_lock.h too, + * because the definitions for these are split between this file and s_lock.h. + */ + + +#ifdef HAVE_SPINLOCKS /* skip spinlocks if requested */ + + +#if defined(__GNUC__) + +/* + * All the gcc flavors that are not inlined + */ + + +/* + * Note: all the if-tests here probably ought to be testing gcc version + * rather than platform, but I don't have adequate info to know what to + * write. Ideally we'd flush all this in favor of the inline version. + */ +#if defined(__m68k__) && !defined(__linux__) +/* really means: extern int tas(slock_t* **lock); */ +static void +tas_dummy() +{ + __asm__ __volatile__( +#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(__ELF__) +/* no underscore for label and % for registers */ + "\ +.global tas \n\ +tas: \n\ + movel %sp@(0x4),%a0 \n\ + tas %a0@ \n\ + beq _success \n\ + moveq #-128,%d0 \n\ + rts \n\ +_success: \n\ + moveq #0,%d0 \n\ + rts \n" +#else + "\ +.global _tas \n\ +_tas: \n\ + movel sp@(0x4),a0 \n\ + tas a0@ \n\ + beq _success \n\ + moveq #-128,d0 \n\ + rts \n\ +_success: \n\ + moveq #0,d0 \n\ + rts \n" +#endif /* (__NetBSD__ || __OpenBSD__) && __ELF__ */ + ); +} +#endif /* __m68k__ && !__linux__ */ +#endif /* not __GNUC__ */ +#endif /* HAVE_SPINLOCKS */ + + + +/*****************************************************************************/ +#if defined(S_LOCK_TEST) + +/* + * test program for verifying a port's spinlock support. + */ + +struct test_lock_struct +{ + char pad1; + slock_t lock; + char pad2; +}; + +volatile struct test_lock_struct test_lock; + +int +main() +{ + srandom((unsigned int) time(NULL)); + + test_lock.pad1 = test_lock.pad2 = 0x44; + + S_INIT_LOCK(&test_lock.lock); + + if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44) + { + printf("S_LOCK_TEST: failed, declared datatype is wrong size\n"); + return 1; + } + + if (!S_LOCK_FREE(&test_lock.lock)) + { + printf("S_LOCK_TEST: failed, lock not initialized\n"); + return 1; + } + + S_LOCK(&test_lock.lock); + + if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44) + { + printf("S_LOCK_TEST: failed, declared datatype is wrong size\n"); + return 1; + } + + if (S_LOCK_FREE(&test_lock.lock)) + { + printf("S_LOCK_TEST: failed, lock not locked\n"); + return 1; + } + + S_UNLOCK(&test_lock.lock); + + if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44) + { + printf("S_LOCK_TEST: failed, declared datatype is wrong size\n"); + return 1; + } + + if (!S_LOCK_FREE(&test_lock.lock)) + { + printf("S_LOCK_TEST: failed, lock not unlocked\n"); + return 1; + } + + S_LOCK(&test_lock.lock); + + if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44) + { + printf("S_LOCK_TEST: failed, declared datatype is wrong size\n"); + return 1; + } + + if (S_LOCK_FREE(&test_lock.lock)) + { + printf("S_LOCK_TEST: failed, lock not re-locked\n"); + return 1; + } + + printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS); + printf(" exit with a 'stuck spinlock' message\n"); + printf(" if S_LOCK() and TAS() are working.\n"); + fflush(stdout); + + s_lock(&test_lock.lock, __FILE__, __LINE__); + + printf("S_LOCK_TEST: failed, lock not locked\n"); + return 1; +} + +#endif /* S_LOCK_TEST */ diff --git a/src/bin/pg_probackup/show.cpp b/src/bin/pg_probackup/show.cpp new file mode 100644 index 000000000..b4b63a9ce --- /dev/null +++ b/src/bin/pg_probackup/show.cpp @@ -0,0 +1,1116 @@ +/*------------------------------------------------------------------------- + * + * show.c: show backup information. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include +#include + +#include "json.h" +#include "common/fe_memutils.h" + +#define half_rounded(x) (((x) + ((x) < 0 ? 0 : 1)) / 2) + +/* struct to align fields printed in plain format */ +typedef struct ShowBackendRow +{ + const char *instance; + const char *version; + char backup_id[20]; + char recovery_time[100]; + const char *mode; + const char *wal_mode; + char tli[20]; + char duration[20]; + char data_bytes[20]; + char wal_bytes[20]; + char zratio[20]; + char start_lsn[20]; + char stop_lsn[20]; + const char *status; +} ShowBackendRow; + +/* struct to align fields printed in plain format */ +typedef struct ShowArchiveRow +{ + char tli[20]; + char parent_tli[20]; + char switchpoint[20]; + char min_segno[MAXFNAMELEN]; + char max_segno[MAXFNAMELEN]; + char n_segments[20]; + char size[20]; + char zratio[20]; + const char *status; + char n_backups[20]; +} ShowArchiveRow; + +static void show_instance_start(void); +static void show_instance_end(void); +static void show_instance(const char *instance_name, time_t requested_backup_id, bool show_name); +static void print_backup_json_object(PQExpBuffer buf, pgBackup *backup); +static int show_backup(const char *instance_name, time_t requested_backup_id); + +static void show_instance_plain(const char *instance_name, parray *backup_list, bool show_name); +static void show_instance_json(const char *instance_name, parray *backup_list); + +static void show_instance_archive(InstanceConfig *instance); +static void show_archive_plain(const char *instance_name, uint32 xlog_seg_size, + parray *timelines_list, bool show_name); +static void show_archive_json(const char *instance_name, uint32 xlog_seg_size, + parray *tli_list); + +static PQExpBufferData show_buf; +static bool first_instance = true; +static int32 json_level = 0; + +/* + * Entry point of pg_probackup SHOW subcommand. + */ +int +do_show(const char *instance_name, time_t requested_backup_id, bool show_archive) +{ + int i; + + if (instance_name == NULL && + requested_backup_id != INVALID_BACKUP_ID) + elog(ERROR, "You must specify --instance to use (-i, --backup-id) option"); + + if (show_archive && + requested_backup_id != INVALID_BACKUP_ID) + elog(ERROR, "You cannot specify --archive and (-i, --backup-id) options together"); + + /* + * if instance_name is not specified, + * show information about all instances in this backup catalog + */ + if (instance_name == NULL) + { + parray *instances = catalog_get_instance_list(); + + show_instance_start(); + for (i = 0; i < parray_num(instances); i++) + { + InstanceConfig *instance = (InstanceConfig *)parray_get(instances, i); + char backup_instance_path[MAXPGPATH]; + + sprintf(backup_instance_path, "%s/%s/%s", backup_path, BACKUPS_DIR, instance->name); + + if (show_archive) + show_instance_archive(instance); + else + show_instance(instance->name, INVALID_BACKUP_ID, true); + } + show_instance_end(); + + return 0; + } + /* always use */ + else if (show_format == SHOW_JSON || + requested_backup_id == INVALID_BACKUP_ID) + { + show_instance_start(); + + if (show_archive) + { + InstanceConfig *instance = readInstanceConfigFile(instance_name); + show_instance_archive(instance); + } + else + show_instance(instance_name, requested_backup_id, false); + + show_instance_end(); + + return 0; + } + else + { + if (show_archive) + { + InstanceConfig *instance = readInstanceConfigFile(instance_name); + show_instance_archive(instance); + } + else + show_backup(instance_name, requested_backup_id); + + return 0; + } +} + +void +pretty_size(int64 size, char *buf, size_t len) +{ + int64 limit = 10 * 1024; + int64 limit2 = limit * 2 - 1; + + /* minus means the size is invalid */ +// if (size < 0) +// { +// strncpy(buf, "----", len); +// return; +// } + + if (size <= 0) + { + strncpy(buf, "0", len); + return; + } + + if (Abs(size) < limit) + snprintf(buf, len, "%dB", (int) size); + else + { + size >>= 9; + if (Abs(size) < limit2) + snprintf(buf, len, "%dkB", (int) half_rounded(size)); + else + { + size >>= 10; + if (Abs(size) < limit2) + snprintf(buf, len, "%dMB", (int) half_rounded(size)); + else + { + size >>= 10; + if (Abs(size) < limit2) + snprintf(buf, len, "%dGB", (int) half_rounded(size)); + else + { + size >>= 10; + snprintf(buf, len, "%dTB", (int) half_rounded(size)); + } + } + } + } +} + +void +pretty_time_interval(double time, char *buf, size_t len) +{ + int num_seconds = 0; + int milliseconds = 0; + int seconds = 0; + int minutes = 0; + int hours = 0; + int days = 0; + + num_seconds = (int) time; + + if (time <= 0) + { + strncpy(buf, "0", len); + return; + } + + days = num_seconds / (24 * 3600); + num_seconds %= (24 * 3600); + + hours = num_seconds / 3600; + num_seconds %= 3600; + + minutes = num_seconds / 60; + num_seconds %= 60; + + seconds = num_seconds; + milliseconds = (int)((time - (int) time) * 1000.0); + + if (days > 0) + { + snprintf(buf, len, "%dd:%dh", days, hours); + return; + } + + if (hours > 0) + { + snprintf(buf, len, "%dh:%dm", hours, minutes); + return; + } + + if (minutes > 0) + { + snprintf(buf, len, "%dm:%ds", minutes, seconds); + return; + } + + if (seconds > 0) + { + if (milliseconds > 0) + snprintf(buf, len, "%ds:%dms", seconds, milliseconds); + else + snprintf(buf, len, "%ds", seconds); + return; + } + + snprintf(buf, len, "%dms", milliseconds); + return; +} + +/* + * Initialize instance visualization. + */ +static void +show_instance_start(void) +{ + initPQExpBuffer(&show_buf); + + if (show_format == SHOW_PLAIN) + return; + + first_instance = true; + json_level = 0; + + appendPQExpBufferChar(&show_buf, '['); + json_level++; +} + +/* + * Finalize instance visualization. + */ +static void +show_instance_end(void) +{ + if (show_format == SHOW_JSON) + appendPQExpBufferStr(&show_buf, "\n]\n"); + + fputs(show_buf.data, stdout); + termPQExpBuffer(&show_buf); +} + +/* + * Show brief meta information about all backups in the backup instance. + */ +static void +show_instance(const char *instance_name, time_t requested_backup_id, bool show_name) +{ + parray *backup_list; + + backup_list = catalog_get_backup_list(instance_name, requested_backup_id); + + if (show_format == SHOW_PLAIN) + show_instance_plain(instance_name, backup_list, show_name); + else if (show_format == SHOW_JSON) + show_instance_json(instance_name, backup_list); + else + elog(ERROR, "Invalid show format %d", (int) show_format); + + /* cleanup */ + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); +} + +/* helper routine to print backup info as json object */ +static void +print_backup_json_object(PQExpBuffer buf, pgBackup *backup) +{ + TimeLineID parent_tli = 0; + char timestamp[100] = "----"; + char lsn[20]; + + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + json_add_value(buf, "id", base36enc(backup->start_time), json_level, + true); + + if (backup->parent_backup != 0) + json_add_value(buf, "parent-backup-id", + base36enc(backup->parent_backup), json_level, true); + + json_add_value(buf, "backup-mode", pgBackupGetBackupMode(backup), + json_level, true); + + json_add_value(buf, "wal", backup->stream ? "STREAM": "ARCHIVE", + json_level, true); + + json_add_value(buf, "compress-alg", + deparse_compress_alg(backup->compress_alg), json_level, + true); + + json_add_key(buf, "compress-level", json_level); + appendPQExpBuffer(buf, "%d", backup->compress_level); + + json_add_key(buf, "block-size", json_level); + appendPQExpBuffer(buf, "%u", backup->block_size); + + json_add_key(buf, "xlog-block-size", json_level); + appendPQExpBuffer(buf, "%u", backup->wal_block_size); + + json_add_key(buf, "checksum-version", json_level); + appendPQExpBuffer(buf, "%u", backup->checksum_version); + + json_add_value(buf, "program-version", backup->program_version, + json_level, true); + json_add_value(buf, "server-version", backup->server_version, + json_level, true); + + json_add_key(buf, "current-tli", json_level); + appendPQExpBuffer(buf, "%d", backup->tli); + + json_add_key(buf, "parent-tli", json_level); + + /* Only incremental backup can have Parent TLI */ + if (backup->parent_backup_link) + parent_tli = backup->parent_backup_link->tli; + + appendPQExpBuffer(buf, "%u", parent_tli); + + snprintf(lsn, lengthof(lsn), "%X/%X", + (uint32) (backup->start_lsn >> 32), (uint32) backup->start_lsn); + json_add_value(buf, "start-lsn", lsn, json_level, true); + + snprintf(lsn, lengthof(lsn), "%X/%X", + (uint32) (backup->stop_lsn >> 32), (uint32) backup->stop_lsn); + json_add_value(buf, "stop-lsn", lsn, json_level, true); + + time2iso(timestamp, lengthof(timestamp), backup->start_time); + json_add_value(buf, "start-time", timestamp, json_level, true); + + if (backup->end_time) + { + time2iso(timestamp, lengthof(timestamp), backup->end_time); + json_add_value(buf, "end-time", timestamp, json_level, true); + } + + json_add_key(buf, "recovery-xid", json_level); + appendPQExpBuffer(buf, XID_FMT, backup->recovery_xid); + + if (backup->recovery_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->recovery_time); + json_add_value(buf, "recovery-time", timestamp, json_level, true); + } + + if (backup->expire_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->expire_time); + json_add_value(buf, "expire-time", timestamp, json_level, true); + } + + if (backup->data_bytes != BYTES_INVALID) + { + json_add_key(buf, "data-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->data_bytes); + } + + if (backup->wal_bytes != BYTES_INVALID) + { + json_add_key(buf, "wal-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->wal_bytes); + } + + if (backup->uncompressed_bytes >= 0) + { + json_add_key(buf, "uncompressed-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->uncompressed_bytes); + } + + if (backup->uncompressed_bytes >= 0) + { + json_add_key(buf, "pgdata-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->pgdata_bytes); + } + + if (backup->external_dir_str) + json_add_value(buf, "external-dirs", backup->external_dir_str, + json_level, true); + + json_add_value(buf, "status", status2str(backup->status), json_level, + true); + + if (backup->note) + json_add_value(buf, "note", backup->note, + json_level, true); + + if (backup->content_crc != 0) + { + json_add_key(buf, "content-crc", json_level); + appendPQExpBuffer(buf, "%u", backup->content_crc); + } + + json_add(buf, JT_END_OBJECT, &json_level); +} + +/* + * Show detailed meta information about specified backup. + */ +static int +show_backup(const char *instance_name, time_t requested_backup_id) +{ + int i; + pgBackup *backup = NULL; + parray *backups; + + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + /* Find requested backup */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(backups, i); + + /* found target */ + if (tmp_backup->start_time == requested_backup_id) + { + backup = tmp_backup; + break; + } + } + + if (backup == NULL) + { + // TODO for 3.0: we should ERROR out here. + elog(INFO, "Requested backup \"%s\" is not found.", + /* We do not need free base36enc's result, we exit anyway */ + base36enc(requested_backup_id)); + /* This is not error */ + return 0; + } + + if (show_format == SHOW_PLAIN) + pgBackupWriteControl(stdout, backup); + else + elog(ERROR, "Invalid show format %d", (int) show_format); + + /* cleanup */ + parray_walk(backups, pgBackupFree); + parray_free(backups); + + return 0; +} + +/* + * Show instance backups in plain format. + */ +static void +show_instance_plain(const char *instance_name, parray *backup_list, bool show_name) +{ +#define SHOW_FIELDS_COUNT 14 + int i; + const char *names[SHOW_FIELDS_COUNT] = + { "Instance", "Version", "ID", "Recovery Time", + "Mode", "WAL Mode", "TLI", "Time", "Data", "WAL", + "Zratio", "Start LSN", "Stop LSN", "Status" }; + const char *field_formats[SHOW_FIELDS_COUNT] = + { " %-*s ", " %-*s ", " %-*s ", " %-*s ", + " %-*s ", " %-*s ", " %-*s ", " %*s ", " %*s ", " %*s ", + " %*s ", " %-*s ", " %-*s ", " %-*s "}; + uint32 widths[SHOW_FIELDS_COUNT]; + uint32 widths_sum = 0; + ShowBackendRow *rows; + TimeLineID parent_tli = 0; + + for (i = 0; i < SHOW_FIELDS_COUNT; i++) + widths[i] = strlen(names[i]); + + rows = (ShowBackendRow *) palloc(parray_num(backup_list) * + sizeof(ShowBackendRow)); + + /* + * Fill row values and calculate maximum width of each field. + */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *)parray_get(backup_list, i); + ShowBackendRow *row = &rows[i]; + int cur = 0; + float zratio = 1; + + /* Instance */ + row->instance = instance_name; + widths[cur] = Max(widths[cur], strlen(row->instance)); + cur++; + + /* Version */ + row->version = backup->server_version[0] ? + backup->server_version : "----"; + widths[cur] = Max(widths[cur], strlen(row->version)); + cur++; + + /* ID */ + snprintf(row->backup_id, lengthof(row->backup_id), "%s", + base36enc(backup->start_time)); + widths[cur] = Max(widths[cur], strlen(row->backup_id)); + cur++; + + /* Recovery Time */ + if (backup->recovery_time != (time_t) 0) + time2iso(row->recovery_time, lengthof(row->recovery_time), + backup->recovery_time); + else + StrNCpy(row->recovery_time, "----", sizeof(row->recovery_time)); + widths[cur] = Max(widths[cur], strlen(row->recovery_time)); + cur++; + + /* Mode */ + row->mode = pgBackupGetBackupMode(backup); + widths[cur] = Max(widths[cur], strlen(row->mode)); + cur++; + + /* WAL mode*/ + row->wal_mode = backup->stream ? "STREAM": "ARCHIVE"; + widths[cur] = Max(widths[cur], strlen(row->wal_mode)); + cur++; + + /* Current/Parent TLI */ + if (backup->parent_backup_link != NULL) + parent_tli = backup->parent_backup_link->tli; + + snprintf(row->tli, lengthof(row->tli), "%u/%u", + backup->tli, + backup->backup_mode == BACKUP_MODE_FULL ? 0 : parent_tli); + widths[cur] = Max(widths[cur], strlen(row->tli)); + cur++; + + /* Time */ + if (backup->status == BACKUP_STATUS_RUNNING) + pretty_time_interval(difftime(current_time, backup->start_time), + row->duration, lengthof(row->duration)); + else if (backup->merge_time != (time_t) 0) + pretty_time_interval(difftime(backup->end_time, backup->merge_time), + row->duration, lengthof(row->duration)); + else if (backup->end_time != (time_t) 0) + pretty_time_interval(difftime(backup->end_time, backup->start_time), + row->duration, lengthof(row->duration)); + else + StrNCpy(row->duration, "----", sizeof(row->duration)); + widths[cur] = Max(widths[cur], strlen(row->duration)); + cur++; + + /* Data */ + pretty_size(backup->data_bytes, row->data_bytes, + lengthof(row->data_bytes)); + widths[cur] = Max(widths[cur], strlen(row->data_bytes)); + cur++; + + /* WAL */ + pretty_size(backup->wal_bytes, row->wal_bytes, + lengthof(row->wal_bytes)); + widths[cur] = Max(widths[cur], strlen(row->wal_bytes)); + cur++; + + /* Zratio (compression ratio) */ + if (backup->uncompressed_bytes != BYTES_INVALID && + (backup->uncompressed_bytes > 0 && backup->data_bytes > 0)) + { + zratio = (float)backup->uncompressed_bytes / (backup->data_bytes); + snprintf(row->zratio, lengthof(row->zratio), "%.2f", zratio); + } + else + snprintf(row->zratio, lengthof(row->zratio), "%.2f", zratio); + + widths[cur] = Max(widths[cur], strlen(row->zratio)); + cur++; + + /* Start LSN */ + snprintf(row->start_lsn, lengthof(row->start_lsn), "%X/%X", + (uint32) (backup->start_lsn >> 32), + (uint32) backup->start_lsn); + widths[cur] = Max(widths[cur], strlen(row->start_lsn)); + cur++; + + /* Stop LSN */ + snprintf(row->stop_lsn, lengthof(row->stop_lsn), "%X/%X", + (uint32) (backup->stop_lsn >> 32), + (uint32) backup->stop_lsn); + widths[cur] = Max(widths[cur], strlen(row->stop_lsn)); + cur++; + + /* Status */ + row->status = status2str(backup->status); + widths[cur] = Max(widths[cur], strlen(row->status)); + } + + for (i = 0; i < SHOW_FIELDS_COUNT; i++) + widths_sum += widths[i] + 2 /* two space */; + + if (show_name) + appendPQExpBuffer(&show_buf, "\nBACKUP INSTANCE '%s'\n", instance_name); + + /* + * Print header. + */ + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); + + for (i = 0; i < SHOW_FIELDS_COUNT; i++) + { + appendPQExpBuffer(&show_buf, field_formats[i], widths[i], names[i]); + } + appendPQExpBufferChar(&show_buf, '\n'); + + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); + + /* + * Print values. + */ + for (i = 0; i < parray_num(backup_list); i++) + { + ShowBackendRow *row = &rows[i]; + int cur = 0; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->instance); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->version); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->backup_id); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->recovery_time); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->mode); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->wal_mode); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->tli); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->duration); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->data_bytes); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->wal_bytes); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->zratio); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->start_lsn); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->stop_lsn); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->status); + cur++; + + appendPQExpBufferChar(&show_buf, '\n'); + } + + pfree(rows); +} + +/* + * Show instance backups in json format. + */ +static void +show_instance_json(const char *instance_name, parray *backup_list) +{ + int i; + PQExpBuffer buf = &show_buf; + + if (!first_instance) + appendPQExpBufferChar(buf, ','); + + /* Begin of instance object */ + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + json_add_value(buf, "instance", instance_name, json_level, true); + json_add_key(buf, "backups", json_level); + + /* + * List backups. + */ + json_add(buf, JT_BEGIN_ARRAY, &json_level); + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *)parray_get(backup_list, i); + + if (i != 0) + appendPQExpBufferChar(buf, ','); + + print_backup_json_object(buf, backup); + } + + /* End of backups */ + json_add(buf, JT_END_ARRAY, &json_level); + + /* End of instance object */ + json_add(buf, JT_END_OBJECT, &json_level); + + first_instance = false; +} + +/* + * show information about WAL archive of the instance + */ +static void +show_instance_archive(InstanceConfig *instance) +{ + parray *timelineinfos; + + timelineinfos = catalog_get_timelines(instance); + + if (show_format == SHOW_PLAIN) + show_archive_plain(instance->name, instance->xlog_seg_size, timelineinfos, true); + else if (show_format == SHOW_JSON) + show_archive_json(instance->name, instance->xlog_seg_size, timelineinfos); + else + elog(ERROR, "Invalid show format %d", (int) show_format); +} + +static void +show_archive_plain(const char *instance_name, uint32 xlog_seg_size, + parray *tli_list, bool show_name) +{ + char segno_tmp[MAXFNAMELEN]; + parray *actual_tli_list = parray_new(); +#define SHOW_ARCHIVE_FIELDS_COUNT 10 + int i; + const char *names[SHOW_ARCHIVE_FIELDS_COUNT] = + { "TLI", "Parent TLI", "Switchpoint", + "Min Segno", "Max Segno", "N segments", "Size", "Zratio", "N backups", "Status"}; + const char *field_formats[SHOW_ARCHIVE_FIELDS_COUNT] = + { " %-*s ", " %-*s ", " %-*s ", " %-*s ", + " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s "}; + uint32 widths[SHOW_ARCHIVE_FIELDS_COUNT]; + uint32 widths_sum = 0; + ShowArchiveRow *rows; + + for (i = 0; i < SHOW_ARCHIVE_FIELDS_COUNT; i++) + widths[i] = strlen(names[i]); + + /* Ignore empty timelines */ + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); + + if (tlinfo->n_xlog_files > 0) + parray_append(actual_tli_list, tlinfo); + } + + rows = (ShowArchiveRow *) gs_palloc0(parray_num(actual_tli_list) * + sizeof(ShowArchiveRow)); + + /* + * Fill row values and calculate maximum width of each field. + */ + for (i = 0; i < parray_num(actual_tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(actual_tli_list, i); + ShowArchiveRow *row = &rows[i]; + int cur = 0; + float zratio = 0; + + /* TLI */ + snprintf(row->tli, lengthof(row->tli), "%u", + tlinfo->tli); + widths[cur] = Max(widths[cur], strlen(row->tli)); + cur++; + + /* Parent TLI */ + snprintf(row->parent_tli, lengthof(row->parent_tli), "%u", + tlinfo->parent_tli); + widths[cur] = Max(widths[cur], strlen(row->parent_tli)); + cur++; + + /* Switchpoint LSN */ + snprintf(row->switchpoint, lengthof(row->switchpoint), "%X/%X", + (uint32) (tlinfo->switchpoint >> 32), + (uint32) tlinfo->switchpoint); + widths[cur] = Max(widths[cur], strlen(row->switchpoint)); + cur++; + + /* Min Segno */ + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->begin_segno, xlog_seg_size); + snprintf(row->min_segno, lengthof(row->min_segno), "%s",segno_tmp); + + widths[cur] = Max(widths[cur], strlen(row->min_segno)); + cur++; + + /* Max Segno */ + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->end_segno, xlog_seg_size); + snprintf(row->max_segno, lengthof(row->max_segno), "%s", segno_tmp); + + widths[cur] = Max(widths[cur], strlen(row->max_segno)); + cur++; + + /* N files */ + snprintf(row->n_segments, lengthof(row->n_segments), "%lu", + tlinfo->n_xlog_files); + widths[cur] = Max(widths[cur], strlen(row->n_segments)); + cur++; + + /* Size */ + pretty_size(tlinfo->size, row->size, + lengthof(row->size)); + widths[cur] = Max(widths[cur], strlen(row->size)); + cur++; + + /* Zratio (compression ratio) */ + if (tlinfo->size != 0) + zratio = ((float)xlog_seg_size*tlinfo->n_xlog_files) / tlinfo->size; + + snprintf(row->zratio, lengthof(row->n_segments), "%.2f", zratio); + widths[cur] = Max(widths[cur], strlen(row->zratio)); + cur++; + + /* N backups */ + snprintf(row->n_backups, lengthof(row->n_backups), "%lu", + tlinfo->backups?parray_num(tlinfo->backups):0); + widths[cur] = Max(widths[cur], strlen(row->n_backups)); + cur++; + + /* Status */ + if (tlinfo->lost_segments == NULL) + row->status = "OK"; + else + row->status = "DEGRADED"; + widths[cur] = Max(widths[cur], strlen(row->status)); + cur++; + } + + for (i = 0; i < SHOW_ARCHIVE_FIELDS_COUNT; i++) + widths_sum += widths[i] + 2 /* two space */; + + if (show_name) + appendPQExpBuffer(&show_buf, "\nARCHIVE INSTANCE '%s'\n", instance_name); + + /* + * Print header. + */ + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); + + for (i = 0; i < SHOW_ARCHIVE_FIELDS_COUNT; i++) + { + appendPQExpBuffer(&show_buf, field_formats[i], widths[i], names[i]); + } + appendPQExpBufferChar(&show_buf, '\n'); + + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); + + /* + * Print values. + */ + for (i = parray_num(actual_tli_list) - 1; i >= 0; i--) + { + ShowArchiveRow *row = &rows[i]; + int cur = 0; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->tli); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->parent_tli); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->switchpoint); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->min_segno); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->max_segno); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->n_segments); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->size); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->zratio); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->n_backups); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->status); + cur++; + appendPQExpBufferChar(&show_buf, '\n'); + } + + pfree(rows); + //TODO: free timelines +} + +static void +show_archive_json(const char *instance_name, uint32 xlog_seg_size, + parray *tli_list) +{ + int i,j; + PQExpBuffer buf = &show_buf; + parray *actual_tli_list = parray_new(); + char segno_tmp[MAXFNAMELEN]; + + if (!first_instance) + appendPQExpBufferChar(buf, ','); + + /* Begin of instance object */ + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + json_add_value(buf, "instance", instance_name, json_level, true); + json_add_key(buf, "timelines", json_level); + + /* Ignore empty timelines */ + + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); + + if (tlinfo->n_xlog_files > 0) + parray_append(actual_tli_list, tlinfo); + } + + /* + * List timelines. + */ + json_add(buf, JT_BEGIN_ARRAY, &json_level); + + for (i = parray_num(actual_tli_list) - 1; i >= 0; i--) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(actual_tli_list, i); + char tmp_buf[MAXFNAMELEN]; + float zratio = 0; + + if (i != (parray_num(actual_tli_list) - 1)) + appendPQExpBufferChar(buf, ','); + + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + json_add_key(buf, "tli", json_level); + appendPQExpBuffer(buf, "%u", tlinfo->tli); + + json_add_key(buf, "parent-tli", json_level); + appendPQExpBuffer(buf, "%u", tlinfo->parent_tli); + + snprintf(tmp_buf, lengthof(tmp_buf), "%X/%X", + (uint32) (tlinfo->switchpoint >> 32), (uint32) tlinfo->switchpoint); + json_add_value(buf, "switchpoint", tmp_buf, json_level, true); + + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->begin_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "min-segno", tmp_buf, json_level, true); + + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->end_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "max-segno", tmp_buf, json_level, true); + + json_add_key(buf, "n-segments", json_level); + appendPQExpBuffer(buf, "%lu", tlinfo->n_xlog_files); + + json_add_key(buf, "size", json_level); + appendPQExpBuffer(buf, "%lu", tlinfo->size); + + json_add_key(buf, "zratio", json_level); + if (tlinfo->size != 0) + zratio = ((float)xlog_seg_size*tlinfo->n_xlog_files) / tlinfo->size; + appendPQExpBuffer(buf, "%.2f", zratio); + + if (tlinfo->closest_backup != NULL) + snprintf(tmp_buf, lengthof(tmp_buf), "%s", + base36enc(tlinfo->closest_backup->start_time)); + else + snprintf(tmp_buf, lengthof(tmp_buf), "%s", ""); + + json_add_value(buf, "closest-backup-id", tmp_buf, json_level, true); + + if (tlinfo->lost_segments == NULL) + json_add_value(buf, "status", "OK", json_level, true); + else + json_add_value(buf, "status", "DEGRADED", json_level, true); + + json_add_key(buf, "lost-segments", json_level); + + if (tlinfo->lost_segments != NULL) + { + json_add(buf, JT_BEGIN_ARRAY, &json_level); + + for (j = 0; j < parray_num(tlinfo->lost_segments); j++) + { + xlogInterval *lost_segments = (xlogInterval *) parray_get(tlinfo->lost_segments, j); + + if (j != 0) + appendPQExpBufferChar(buf, ','); + + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + GetXLogFileName(segno_tmp, tlinfo->tli, lost_segments->begin_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "begin-segno", tmp_buf, json_level, true); + + GetXLogFileName(segno_tmp, tlinfo->tli, lost_segments->end_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "end-segno", tmp_buf, json_level, true); + json_add(buf, JT_END_OBJECT, &json_level); + } + json_add(buf, JT_END_ARRAY, &json_level); + } + else + appendPQExpBuffer(buf, "[]"); + + json_add_key(buf, "backups", json_level); + + if (tlinfo->backups != NULL) + { + json_add(buf, JT_BEGIN_ARRAY, &json_level); + for (j = 0; j < parray_num(tlinfo->backups); j++) + { + pgBackup *backup = (pgBackup *)parray_get(tlinfo->backups, j); + + if (j != 0) + appendPQExpBufferChar(buf, ','); + + print_backup_json_object(buf, backup); + } + + json_add(buf, JT_END_ARRAY, &json_level); + } + else + appendPQExpBuffer(buf, "[]"); + + /* End of timeline */ + json_add(buf, JT_END_OBJECT, &json_level); + } + + /* End of timelines object */ + json_add(buf, JT_END_ARRAY, &json_level); + + /* End of instance object */ + json_add(buf, JT_END_OBJECT, &json_level); + + first_instance = false; +} diff --git a/src/bin/pg_probackup/thread.cpp b/src/bin/pg_probackup/thread.cpp new file mode 100644 index 000000000..5ceee068d --- /dev/null +++ b/src/bin/pg_probackup/thread.cpp @@ -0,0 +1,109 @@ +/*------------------------------------------------------------------------- + * + * thread.c: - multi-platform pthread implementations. + * + * Copyright (c) 2018-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "thread.h" + +bool thread_interrupted = false; + +#ifdef WIN32 +DWORD main_tid = 0; +#else +pthread_t main_tid = 0; +#endif +#ifdef WIN32 +#include + +typedef struct win32_pthread +{ + HANDLE handle; + void *(*routine) (void *); + void *arg; + void *result; +} win32_pthread; + +static long mutex_initlock = 0; + +static unsigned __stdcall +win32_pthread_run(void *arg) +{ + win32_pthread *th = (win32_pthread *)arg; + + th->result = th->routine(th->arg); + + return 0; +} + +int +pthread_create(pthread_t *thread, + pthread_attr_t *attr, + void *(*start_routine) (void *), + void *arg) +{ + int save_errno; + win32_pthread *th; + + th = (win32_pthread *)pg_malloc(sizeof(win32_pthread)); + th->routine = start_routine; + th->arg = arg; + th->result = NULL; + + th->handle = (HANDLE)_beginthreadex(NULL, 0, win32_pthread_run, th, 0, NULL); + if (th->handle == NULL) + { + save_errno = errno; + free(th); + return save_errno; + } + + *thread = th; + return 0; +} + +int +pthread_join(pthread_t th, void **thread_return) +{ + if (th == NULL || th->handle == NULL) + return errno = EINVAL; + + if (WaitForSingleObject(th->handle, INFINITE) != WAIT_OBJECT_0) + { + _dosmaperr(GetLastError()); + return errno; + } + + if (thread_return) + *thread_return = th->result; + + CloseHandle(th->handle); + free(th); + return 0; +} + +#endif /* WIN32 */ + +int +pthread_lock(pthread_mutex_t *mp) +{ +#ifdef WIN32 + if (*mp == NULL) + { + while (InterlockedExchange(&mutex_initlock, 1) == 1) + /* loop, another thread own the lock */ ; + if (*mp == NULL) + { + if (pthread_mutex_init(mp, NULL)) + return -1; + } + InterlockedExchange(&mutex_initlock, 0); + } +#endif + return pthread_mutex_lock(mp); +} diff --git a/src/bin/pg_probackup/thread.h b/src/bin/pg_probackup/thread.h new file mode 100644 index 000000000..2eaa5fb45 --- /dev/null +++ b/src/bin/pg_probackup/thread.h @@ -0,0 +1,41 @@ +/*------------------------------------------------------------------------- + * + * thread.h: - multi-platform pthread implementations. + * + * Copyright (c) 2018-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef PROBACKUP_THREAD_H +#define PROBACKUP_THREAD_H + +#ifdef WIN32 +#include "postgres_fe.h" +#include "port/pthread-win32.h" + +/* Use native win32 threads on Windows */ +typedef struct win32_pthread *pthread_t; +typedef int pthread_attr_t; + +#define PTHREAD_MUTEX_INITIALIZER NULL //{ NULL, 0 } +#define PTHREAD_ONCE_INIT false + +extern int pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine) (void *), void *arg); +extern int pthread_join(pthread_t th, void **thread_return); +#else +/* Use platform-dependent pthread capability */ +#include +#endif + +#ifdef WIN32 +extern DWORD main_tid; +#else +extern pthread_t main_tid; +#endif + +extern bool thread_interrupted; + +extern int pthread_lock(pthread_mutex_t *mp); + +#endif /* PROBACKUP_THREAD_H */ diff --git a/src/bin/pg_probackup/util.cpp b/src/bin/pg_probackup/util.cpp new file mode 100644 index 000000000..90fb5e723 --- /dev/null +++ b/src/bin/pg_probackup/util.cpp @@ -0,0 +1,591 @@ +/*------------------------------------------------------------------------- + * + * util.c: log messages to log file or stderr, and misc code. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include "catalog/pg_control.h" + +#include + +#include + +#include +#include "common/fe_memutils.h" + +static const char *statusName[] = +{ + "UNKNOWN", + "OK", + "ERROR", + "RUNNING", + "MERGING", + "MERGED", + "DELETING", + "DELETED", + "DONE", + "ORPHAN", + "CORRUPT" +}; + +const char * +base36enc(long unsigned int value) +{ + const char base36[36 + 1] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + /* log(2**64) / log(36) = 12.38 => max 13 char + '\0' */ + static char buffer[14]; + unsigned int offset = sizeof(buffer); + + buffer[--offset] = '\0'; + do { + buffer[--offset] = base36[value % 36]; + } while (value /= 36); + + return &buffer[offset]; +} + +/* + * Same as base36enc(), but the result must be released by the user. + */ +char * +base36enc_dup(long unsigned int value) +{ + const char base36[36 + 1] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + /* log(2**64) / log(36) = 12.38 => max 13 char + '\0' */ + char buffer[14]; + unsigned int offset = sizeof(buffer); + + buffer[--offset] = '\0'; + do { + buffer[--offset] = base36[value % 36]; + } while (value /= 36); + + return strdup(&buffer[offset]); +} + +long unsigned int +base36dec(const char *text) +{ + return strtoul(text, NULL, 36); +} + +/* + * Verify control file contents in the buffer src, and copy it to *ControlFile. + */ +static void +digestControlFile(ControlFileData *ControlFile, char *src, size_t size) +{ + if (size != PG_CONTROL_SIZE) + elog(ERROR, "unexpected control file size %d, expected %d", + (int) size, PG_CONTROL_SIZE); + + memcpy(ControlFile, src, sizeof(ControlFileData)); +} + +/* + * Write ControlFile to pg_control + */ +static void +writeControlFile(ControlFileData *ControlFile, const char *path, fio_location location) +{ + int fd; + char *buffer = NULL; + + buffer = (char *)pg_malloc(PG_CONTROL_SIZE); + memcpy(buffer, ControlFile, sizeof(ControlFileData)); + + /* Write pg_control */ + fd = fio_open(path, + O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, location); + + if (fd < 0) + elog(ERROR, "Failed to open file: %s", path); + + if (fio_write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE) + elog(ERROR, "Failed to overwrite file: %s", path); + + if (fio_flush(fd) != 0) + elog(ERROR, "Failed to sync file: %s", path); + + fio_close(fd); + pg_free(buffer); +} + +/* + * Utility shared by backup and restore to fetch the current timeline + * used by a node. + */ +TimeLineID +get_current_timeline(PGconn *conn) +{ + + PGresult *res; + TimeLineID tli = 0; + char *val; + + res = pgut_execute_extended(conn, + "SELECT timeline_id FROM pg_control_checkpoint()", 0, NULL, true, true); + + if (PQresultStatus(res) == PGRES_TUPLES_OK) + val = PQgetvalue(res, 0, 0); + else + return get_current_timeline_from_control(false); + + if (!parse_uint32(val, &tli, 0)) + { + PQclear(res); + elog(WARNING, "Invalid value of timeline_id %s", val); + + /* TODO 3.0 remove it and just error out */ + return get_current_timeline_from_control(false); + } + + return tli; +} + +/* Get timeline from pg_control file */ +TimeLineID +get_current_timeline_from_control(bool safe) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, + safe, FIO_DB_HOST); + if (safe && buffer == NULL) + return 0; + + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.checkPointCopy.ThisTimeLineID; +} + +/* + * Get last check point record ptr from pg_tonrol. + */ +XLogRecPtr +get_checkpoint_location(PGconn *conn) +{ +#if PG_VERSION_NUM >= 90600 + PGresult *res; + uint32 lsn_hi; + uint32 lsn_lo; + XLogRecPtr lsn; + +#if PG_VERSION_NUM >= 100000 + res = pgut_execute(conn, + "SELECT checkpoint_lsn FROM pg_catalog.pg_control_checkpoint()", + 0, NULL); +#else + res = pgut_execute(conn, + "SELECT checkpoint_location FROM pg_catalog.pg_control_checkpoint()", + 0, NULL); +#endif + XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo); + PQclear(res); + /* Calculate LSN */ + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; + + return lsn; +#else + char *buffer; + size_t size; + ControlFileData ControlFile; + + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.checkPoint; +#endif +} + +uint64 +get_system_identifier(const char *pgdata_path) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + if (buffer == NULL) + return 0; + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.system_identifier; +} + +uint64 +get_remote_system_identifier(PGconn *conn) +{ +#if PG_VERSION_NUM >= 90600 + PGresult *res; + uint64 system_id_conn; + char *val; + + res = pgut_execute(conn, + "SELECT system_identifier FROM pg_catalog.pg_control_system()", + 0, NULL); + val = PQgetvalue(res, 0, 0); + if (!parse_uint64(val, &system_id_conn, 0)) + { + PQclear(res); + elog(ERROR, "%s is not system_identifier", val); + } + PQclear(res); + + return system_id_conn; +#else + char *buffer; + size_t size; + ControlFileData ControlFile; + + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.system_identifier; +#endif +} + +uint32 +get_xlog_seg_size(char *pgdata_path) +{ +#if PG_VERSION_NUM >= 110000 + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.xlog_seg_size; +#else + return (uint32) XLOG_SEG_SIZE; +#endif +} + +uint32 +get_data_checksum_version(bool safe) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, + safe, FIO_DB_HOST); + if (buffer == NULL) + return 0; + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return 0; // ControlFile.data_checksum_version; +} + +pg_crc32c +get_pgcontrol_checksum(const char *pgdata_path) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_BACKUP_HOST); + + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.crc; +} + +void +get_redo(const char *pgdata_path, RedoParams *redo) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + redo->lsn = ControlFile.checkPointCopy.redo; + redo->tli = ControlFile.checkPointCopy.ThisTimeLineID; + + if (ControlFile.minRecoveryPoint > 0 && + ControlFile.minRecoveryPoint < redo->lsn) + { + redo->lsn = ControlFile.minRecoveryPoint; + } + + if (ControlFile.backupStartPoint > 0 && + ControlFile.backupStartPoint < redo->lsn) + { + redo->lsn = ControlFile.backupStartPoint; + redo->tli = ControlFile.checkPointCopy.ThisTimeLineID; + } + +} + +/* + * Rewrite minRecoveryPoint of pg_control in backup directory. minRecoveryPoint + * 'as-is' is not to be trusted. + */ +void +set_min_recovery_point(pgFile *file, const char *backup_path, + XLogRecPtr stop_backup_lsn) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + char fullpath[MAXPGPATH]; + + /* First fetch file content */ + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + digestControlFile(&ControlFile, buffer, size); + + elog(LOG, "Current minRecPoint %X/%X", + (uint32) (ControlFile.minRecoveryPoint >> 32), + (uint32) ControlFile.minRecoveryPoint); + + elog(LOG, "Setting minRecPoint to %X/%X", + (uint32) (stop_backup_lsn >> 32), + (uint32) stop_backup_lsn); + + ControlFile.minRecoveryPoint = stop_backup_lsn; + + /* Update checksum in pg_control header */ + INIT_CRC32C(ControlFile.crc); + COMP_CRC32C(ControlFile.crc, (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(ControlFile.crc); + + /* overwrite pg_control */ + snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); + writeControlFile(&ControlFile, fullpath, FIO_LOCAL_HOST); + + /* Update pg_control checksum in backup_list */ + file->crc = ControlFile.crc; + + pg_free(buffer); +} + +/* + * Copy pg_control file to backup. We do not apply compression to this file. + */ +void +copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, + const char *to_fullpath, fio_location to_location, pgFile *file) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + buffer = slurpFile(from_fullpath, "", &size, false, from_location); + + digestControlFile(&ControlFile, buffer, size); + + file->crc = ControlFile.crc; + file->read_size = size; + file->write_size = size; + file->uncompressed_size = size; + + writeControlFile(&ControlFile, to_fullpath, to_location); + + pg_free(buffer); +} + +/* + * Parse string representation of the server version. + */ +uint32 +parse_server_version(const char *server_version_str) +{ + int nfields; + uint32 result = 0; + int major_version = 0; + int minor_version = 0; + + nfields = sscanf(server_version_str, "%d.%d", &major_version, &minor_version); + if (nfields == 2) + { + /* Server version lower than 10 */ + if (major_version > 10) + elog(ERROR, "Server version format doesn't match major version %d", major_version); + result = major_version * 10000 + minor_version * 100; + } + else if (nfields == 1) + { + if (major_version < 10) + elog(ERROR, "Server version format doesn't match major version %d", major_version); + result = major_version * 10000; + } + else + elog(ERROR, "Unknown server version format %s", server_version_str); + + return result; +} + +/* + * Parse string representation of the program version. + */ +uint32 +parse_program_version(const char *program_version) +{ + int nfields; + int major = 0, + minor = 0, + micro = 0; + uint32 result = 0; + + if (program_version == NULL || program_version[0] == '\0') + return 0; + + nfields = sscanf(program_version, "%d.%d.%d", &major, &minor, µ); + if (nfields == 3) + result = major * 10000 + minor * 100 + micro; + else + elog(ERROR, "Unknown program version format %s", program_version); + + return result; +} + +const char * +status2str(BackupStatus status) +{ + if (status < BACKUP_STATUS_INVALID || BACKUP_STATUS_CORRUPT < status) + return "UNKNOWN"; + + return statusName[status]; +} + +BackupStatus +str2status(const char *status) +{ + for (int i = 0; i <= BACKUP_STATUS_CORRUPT; i++) + { + if (pg_strcasecmp(status, statusName[i]) == 0) { + return (BackupStatus)i; + } + } + return BACKUP_STATUS_INVALID; +} + +bool +datapagemap_is_set(datapagemap_t *map, BlockNumber blkno) +{ + int offset; + int bitno; + + offset = blkno / 8; + bitno = blkno % 8; + + /* enlarge or create bitmap if needed */ + if (map->bitmapsize <= offset) + { + int oldsize = map->bitmapsize; + int newsize; + + /* + * The minimum to hold the new bit is offset + 1. But add some + * headroom, so that we don't need to repeatedly enlarge the bitmap in + * the common case that blocks are modified in order, from beginning + * of a relation to the end. + */ + newsize = offset + 1; + newsize += 10; + + map->bitmap = (unsigned char *)pg_realloc(map->bitmap, newsize); + + /* zero out the newly allocated region */ + memset(&map->bitmap[oldsize], 0, newsize - oldsize); + + map->bitmapsize = newsize; + } + + //datapagemap_print(map); + + /* check the bit */ + return map->bitmap[offset] & (1 << bitno); +} + +/* + * A debugging aid. Prints out the contents of the page map. + */ +void +datapagemap_print_debug(datapagemap_t *map) +{ + datapagemap_iterator_t *iter; + BlockNumber blocknum; + + iter = datapagemap_iterate(map); + while (datapagemap_next(iter, &blocknum)) + elog(INFO, " block %u", blocknum); + + pg_free(iter); +} + +/* + * Return pid of postmaster process running in given pgdata. + * Return 0 if there is none. + * Return 1 if postmaster.pid is mangled. + */ +pid_t +check_postmaster(const char *pgdata) +{ + FILE *fp; + pid_t pid; + char pid_file[MAXPGPATH]; + + snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pgdata); + + fp = fopen(pid_file, "r"); + if (fp == NULL) + { + /* No pid file, acceptable*/ + if (errno == ENOENT) + return 0; + else + elog(ERROR, "Cannot open file \"%s\": %s", + pid_file, strerror(errno)); + } + + if (fscanf(fp, "%i", &pid) != 1) + { + /* something is wrong with the file content */ + pid = 1; + } + + if (pid > 1) + { + if (kill(pid, 0) != 0) + { + /* process no longer exists */ + if (errno == ESRCH) + pid = 0; + else + elog(ERROR, "Failed to send signal 0 to a process %d: %s", + pid, strerror(errno)); + } + } + + fclose(fp); + return pid; +} diff --git a/src/bin/pg_probackup/validate.cpp b/src/bin/pg_probackup/validate.cpp new file mode 100644 index 000000000..7b09a13af --- /dev/null +++ b/src/bin/pg_probackup/validate.cpp @@ -0,0 +1,682 @@ +/*------------------------------------------------------------------------- + * + * validate.c: validate backup files. + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include + +#include "thread.h" +#include "common/fe_memutils.h" + +static void *pgBackupValidateFiles(void *arg); +static void do_validate_instance(void); + +static bool corrupted_backup_found = false; +static bool skipped_due_to_lock = false; + +typedef struct +{ + const char *base_path; + parray *files; + bool corrupted; + XLogRecPtr stop_lsn; + uint32 checksum_version; + uint32 backup_version; + BackupMode backup_mode; + const char *external_prefix; + HeaderMap *hdr_map; + + /* + * Return value from the thread. + * 0 means there is no error, 1 - there is an error. + */ + int ret; +} validate_files_arg; + +/* + * Validate backup files. + * TODO: partial validation. + */ +void +pgBackupValidate(pgBackup *backup, pgRestoreParams *params) +{ + char base_path[MAXPGPATH]; + char external_prefix[MAXPGPATH]; + parray *files = NULL; + bool corrupted = false; + bool validation_isok = true; + /* arrays with meta info for multi threaded validate */ + pthread_t *threads; + validate_files_arg *threads_args; + int i; + + /* Check backup program version */ + if (parse_program_version(backup->program_version) > parse_program_version(PROGRAM_VERSION)) + elog(ERROR, "pg_probackup binary version is %s, but backup %s version is %s. " + "pg_probackup do not guarantee to be forward compatible. " + "Please upgrade pg_probackup binary.", + PROGRAM_VERSION, base36enc(backup->start_time), backup->program_version); + + /* Check backup server version */ + if (strcmp(backup->server_version, PG_MAJORVERSION) != 0) + elog(ERROR, "Backup %s has server version %s, but current pg_probackup binary " + "compiled with server version %s", + base36enc(backup->start_time), backup->server_version, PG_MAJORVERSION); + + if (backup->status == BACKUP_STATUS_RUNNING) + { + elog(WARNING, "Backup %s has status %s, change it to ERROR and skip validation", + base36enc(backup->start_time), status2str(backup->status)); + write_backup_status(backup, BACKUP_STATUS_ERROR, instance_name, true); + corrupted_backup_found = true; + return; + } + + /* Revalidation is attempted for DONE, ORPHAN and CORRUPT backups */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE && + backup->status != BACKUP_STATUS_ORPHAN && + backup->status != BACKUP_STATUS_MERGING && + backup->status != BACKUP_STATUS_CORRUPT) + { + elog(WARNING, "Backup %s has status %s. Skip validation.", + base36enc(backup->start_time), status2str(backup->status)); + corrupted_backup_found = true; + return; + } + + /* additional sanity */ + if (backup->backup_mode == BACKUP_MODE_FULL && + backup->status == BACKUP_STATUS_MERGING) + { + elog(WARNING, "Full backup %s has status %s, skip validation", + base36enc(backup->start_time), status2str(backup->status)); + return; + } + + if (backup->status == BACKUP_STATUS_OK || backup->status == BACKUP_STATUS_DONE || + backup->status == BACKUP_STATUS_MERGING) + elog(INFO, "Validating backup %s", base36enc(backup->start_time)); + else + elog(INFO, "Revalidating backup %s", base36enc(backup->start_time)); + + if (backup->backup_mode != BACKUP_MODE_FULL && + backup->backup_mode != BACKUP_MODE_DIFF_PTRACK) + elog(WARNING, "Invalid backup_mode of backup %s", base36enc(backup->start_time)); + + join_path_components(base_path, backup->root_dir, DATABASE_DIR); + join_path_components(external_prefix, backup->root_dir, EXTERNAL_DIR); + files = get_backup_filelist(backup, false); + + if (!files) + { + elog(WARNING, "Backup %s file list is corrupted", base36enc(backup->start_time)); + backup->status = BACKUP_STATUS_CORRUPT; + write_backup_status(backup, BACKUP_STATUS_CORRUPT, instance_name, true); + return; + } + + /* setup threads */ + for (i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + pg_atomic_clear_flag(&file->lock); + } + + /* init thread args with own file lists */ + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (validate_files_arg *) + palloc(sizeof(validate_files_arg) * num_threads); + + /* Validate files */ + thread_interrupted = false; + for (i = 0; i < num_threads; i++) + { + validate_files_arg *arg = &(threads_args[i]); + + arg->base_path = base_path; + arg->files = files; + arg->corrupted = false; + arg->backup_mode = backup->backup_mode; + arg->stop_lsn = backup->stop_lsn; + arg->checksum_version = backup->checksum_version; + arg->backup_version = parse_program_version(backup->program_version); + arg->external_prefix = external_prefix; + arg->hdr_map = &(backup->hdr_map); + /* By default there are some error */ + threads_args[i].ret = 1; + + pthread_create(&threads[i], NULL, pgBackupValidateFiles, arg); + } + + /* Wait theads */ + for (i = 0; i < num_threads; i++) + { + validate_files_arg *arg = &(threads_args[i]); + + pthread_join(threads[i], NULL); + if (arg->corrupted) + corrupted = true; + if (arg->ret == 1) + validation_isok = false; + } + if (!validation_isok) + elog(ERROR, "Data files validation failed"); + + pfree(threads); + pfree(threads_args); + + /* cleanup */ + parray_walk(files, pgFileFree); + parray_free(files); + cleanup_header_map(&(backup->hdr_map)); + + /* Update backup status */ + if (corrupted) + backup->status = BACKUP_STATUS_CORRUPT; + write_backup_status(backup, corrupted ? BACKUP_STATUS_CORRUPT : + BACKUP_STATUS_OK, instance_name, true); + + if (corrupted) + elog(WARNING, "Backup %s data files are corrupted", base36enc(backup->start_time)); + else + elog(INFO, "Backup %s data files are valid", base36enc(backup->start_time)); + + /* Issue #132 kludge */ + if (!corrupted && + ((parse_program_version(backup->program_version) == 20104)|| + (parse_program_version(backup->program_version) == 20105)|| + (parse_program_version(backup->program_version) == 20201))) + { + char path[MAXPGPATH]; + + //pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); + join_path_components(path, backup->root_dir, DATABASE_FILE_LIST); + + if (pgFileSize(path) >= (BLCKSZ*500)) + { + elog(WARNING, "Backup %s is a victim of metadata corruption. " + "Additional information can be found here: " + "https://github.com/postgrespro/pg_probackup/issues/132", + base36enc(backup->start_time)); + backup->status = BACKUP_STATUS_CORRUPT; + write_backup_status(backup, BACKUP_STATUS_CORRUPT, instance_name, true); + } + + } +} + +/* + * Validate files in the backup. + * NOTE: If file is not valid, do not use ERROR log message, + * rather throw a WARNING and set arguments->corrupted = true. + * This is necessary to update backup status. + */ +static void * +pgBackupValidateFiles(void *arg) +{ + int i; + validate_files_arg *arguments = (validate_files_arg *)arg; + int num_files = parray_num(arguments->files); + pg_crc32 crc; + + for (i = 0; i < num_files; i++) + { + struct stat st; + pgFile *file = (pgFile *) parray_get(arguments->files, i); + char file_fullpath[MAXPGPATH]; + + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during validate"); + + /* Validate only regular files */ + if (!S_ISREG(file->mode)) + continue; + + if (!pg_atomic_test_set_flag(&file->lock)) + continue; + + if (progress) + elog(INFO, "Progress: (%d/%d). Validate file \"%s\"", + i + 1, num_files, file->rel_path); + + /* + * Skip files which has no data, because they + * haven't changed between backups. + */ + if (file->write_size == BYTES_INVALID) + { + /* TODO: lookup corresponding merge bug */ + if (arguments->backup_mode == BACKUP_MODE_FULL) + { + /* It is illegal for file in FULL backup to have BYTES_INVALID */ + elog(WARNING, "Backup file \"%s\" has invalid size. Possible metadata corruption.", + file->rel_path); + arguments->corrupted = true; + break; + } + else + continue; + } + + /* no point in trying to open empty file */ + if (file->write_size == 0) + continue; + + if (file->external_dir_num) + { + char temp[MAXPGPATH]; + + makeExternalDirPathByNum(temp, arguments->external_prefix, file->external_dir_num); + join_path_components(file_fullpath, temp, file->rel_path); + } + else + join_path_components(file_fullpath, arguments->base_path, file->rel_path); + + /* TODO: it is redundant to check file existence using stat */ + if (stat(file_fullpath, &st) == -1) + { + if (errno == ENOENT) + elog(WARNING, "Backup file \"%s\" is not found", file_fullpath); + else + elog(WARNING, "Cannot stat backup file \"%s\": %s", + file_fullpath, strerror(errno)); + arguments->corrupted = true; + break; + } + + if (file->write_size != st.st_size) + { + elog(WARNING, "Invalid size of backup file \"%s\" : " INT64_FORMAT ". Expected %lu", + file_fullpath, (unsigned long) st.st_size, file->write_size); + arguments->corrupted = true; + break; + } + + /* + * If option skip-block-validation is set, compute only file-level CRC for + * datafiles, otherwise check them block by block. + * Currently we don't compute checksums for + * cfs_compressed data files, so skip block validation for them. + */ + if (!file->is_datafile || skip_block_validation || file->is_cfs) + { + /* + * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we + * use CRC-32. + * + * pg_control stores its content and checksum of the content, calculated + * using CRC-32C. If we calculate checksum of the whole pg_control using + * CRC-32C we get same checksum constantly. It might be because of the + * CRC-32C algorithm. + * To avoid this problem we need to use different algorithm, CRC-32 in + * this case. + * + * Starting from 2.0.25 we calculate crc of pg_control differently. + */ + if (arguments->backup_version >= 20025 && + strcmp(file->name, "pg_control") == 0 && + !file->external_dir_num) + crc = get_pgcontrol_checksum(arguments->base_path); + else + crc = pgFileGetCRC(file_fullpath, + arguments->backup_version <= 20021 || + arguments->backup_version >= 20025, + false); + if (crc != file->crc) + { + elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + file_fullpath, crc, file->crc); + // arguments->corrupted = true; + } + } + else + { + /* + * validate relation block by block + * check page headers, checksums (if enabled) + * and compute checksum of the file + */ + if (!validate_file_pages(file, file_fullpath, arguments->stop_lsn, + arguments->checksum_version, + arguments->backup_version, + arguments->hdr_map)) + arguments->corrupted = true; + } + } + + /* Data files validation is successful */ + arguments->ret = 0; + + return NULL; +} + +/* + * Validate all backups in the backup catalog. + * If --instance option was provided, validate only backups of this instance. + */ +int +do_validate_all(void) +{ + corrupted_backup_found = false; + skipped_due_to_lock = false; + + if (instance_name == NULL) + { + /* Show list of instances */ + char path[MAXPGPATH]; + DIR *dir; + struct dirent *dent; + + /* open directory and list contents */ + join_path_components(path, backup_path, BACKUPS_DIR); + dir = opendir(path); + if (dir == NULL) + elog(ERROR, "cannot open directory \"%s\": %s", path, strerror(errno)); + + errno = 0; + while ((dent = readdir(dir))) + { + char conf_path[MAXPGPATH]; + char child[MAXPGPATH]; + struct stat st; + + /* skip entries point current dir or parent dir */ + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + + join_path_components(child, path, dent->d_name); + + if (lstat(child, &st) == -1) + elog(ERROR, "cannot stat file \"%s\": %s", child, strerror(errno)); + + if (!S_ISDIR(st.st_mode)) + continue; + + /* + * Initialize instance configuration. + */ + instance_name = dent->d_name; + sprintf(backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance_name); + join_path_components(conf_path, backup_instance_path, + BACKUP_CATALOG_CONF_FILE); + if (config_read_opt(conf_path, instance_options, ERROR, false, + true) == 0) + { + elog(WARNING, "Configuration file \"%s\" is empty", conf_path); + corrupted_backup_found = true; + continue; + } + + do_validate_instance(); + } + } + else + { + do_validate_instance(); + } + + /* TODO: Probably we should have different exit code for every condition + * and they combination: + * 0 - all backups are valid + * 1 - some backups are corrupt + * 2 - some backups where skipped due to concurrent locks + * 3 - some backups are corrupt and some are skipped due to concurrent locks + */ + + if (skipped_due_to_lock) + elog(WARNING, "Some backups weren't locked and they were skipped"); + + if (corrupted_backup_found) + { + elog(WARNING, "Some backups are not valid"); + return 1; + } + + if (!skipped_due_to_lock && !corrupted_backup_found) + elog(INFO, "All backups are valid"); + + return 0; +} + +/* + * Validate all backups in the given instance of the backup catalog. + */ +static void +do_validate_instance(void) +{ + int i; + int j; + parray *backups; + pgBackup *current_backup = NULL; + + elog(INFO, "Validate backups of the instance '%s'", instance_name); + + /* Get list of all backups sorted in order of descending start time */ + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + /* Examine backups one by one and validate them */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *base_full_backup; + + current_backup = (pgBackup *) parray_get(backups, i); + + /* Find ancestor for incremental backup */ + if (current_backup->backup_mode != BACKUP_MODE_FULL) + { + pgBackup *tmp_backup = NULL; + int result; + + result = scan_parent_chain(current_backup, &tmp_backup); + + /* chain is broken */ + if (result == ChainIsBroken) + { + char *parent_backup_id; + /* determine missing backup ID */ + + parent_backup_id = base36enc_dup(tmp_backup->parent_backup); + corrupted_backup_found = true; + + /* orphanize current_backup */ + if (current_backup->status == BACKUP_STATUS_OK || + current_backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(current_backup, BACKUP_STATUS_ORPHAN, instance_name, true); + elog(WARNING, "Backup %s is orphaned because his parent %s is missing", + base36enc(current_backup->start_time), + parent_backup_id); + } + else + { + elog(WARNING, "Backup %s has missing parent %s", + base36enc(current_backup->start_time), parent_backup_id); + } + pg_free(parent_backup_id); + continue; + } + /* chain is whole, but at least one parent is invalid */ + else if (result == ChainIsInvalid) + { + /* Oldest corrupt backup has a chance for revalidation */ + if (current_backup->start_time != tmp_backup->start_time) + { + char *backup_id = base36enc_dup(tmp_backup->start_time); + /* orphanize current_backup */ + if (current_backup->status == BACKUP_STATUS_OK || + current_backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(current_backup, BACKUP_STATUS_ORPHAN, instance_name, true); + elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", + base36enc(current_backup->start_time), backup_id, + status2str(tmp_backup->status)); + } + else + { + elog(WARNING, "Backup %s has parent %s with status: %s", + base36enc(current_backup->start_time), backup_id, + status2str(tmp_backup->status)); + } + pg_free(backup_id); + continue; + } + base_full_backup = find_parent_full_backup(current_backup); + + /* sanity */ + if (!base_full_backup) + elog(ERROR, "Parent full backup for the given backup %s was not found", + base36enc(current_backup->start_time)); + } + /* chain is whole, all parents are valid at first glance, + * current backup validation can proceed + */ + else + base_full_backup = tmp_backup; + } + else + base_full_backup = current_backup; + + /* Do not interrupt, validate the next backup */ + if (!lock_backup(current_backup, true)) + { + elog(WARNING, "Cannot lock backup %s directory, skip validation", + base36enc(current_backup->start_time)); + skipped_due_to_lock = true; + continue; + } + /* Valiate backup files*/ + pgBackupValidate(current_backup, NULL); + + /* Validate corresponding WAL files */ + if (current_backup->status == BACKUP_STATUS_OK) + validate_wal(current_backup, arclog_path, 0, + 0, 0, base_full_backup->tli, + instance_config.xlog_seg_size); + + /* + * Mark every descendant of corrupted backup as orphan + */ + if (current_backup->status != BACKUP_STATUS_OK) + { + char *current_backup_id; + /* This is ridiculous but legal. + * PAGE_b2 <- OK + * PAGE_a2 <- OK + * PAGE_b1 <- ORPHAN + * PAGE_a1 <- CORRUPT + * FULL <- OK + */ + + corrupted_backup_found = true; + current_backup_id = base36enc_dup(current_backup->start_time); + + for (j = i - 1; j >= 0; j--) + { + pgBackup *backup = (pgBackup *) parray_get(backups, j); + + if (is_parent(current_backup->start_time, backup, false)) + { + if (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(backup, BACKUP_STATUS_ORPHAN, instance_name, true); + + elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", + base36enc(backup->start_time), + current_backup_id, + status2str(current_backup->status)); + } + } + } + free(current_backup_id); + } + + /* For every OK backup we try to revalidate all his ORPHAN descendants. */ + if (current_backup->status == BACKUP_STATUS_OK) + { + /* revalidate all ORPHAN descendants + * be very careful not to miss a missing backup + * for every backup we must check that he is descendant of current_backup + */ + for (j = i - 1; j >= 0; j--) + { + pgBackup *backup = (pgBackup *) parray_get(backups, j); + pgBackup *tmp_backup = NULL; + int result; + + //PAGE_b2 ORPHAN + //PAGE_b1 ORPHAN ----- + //PAGE_a5 ORPHAN | + //PAGE_a4 CORRUPT | + //PAGE_a3 missing | + //PAGE_a2 missing | + //PAGE_a1 ORPHAN | + //PAGE OK <- we are here<-| + //FULL OK + + if (is_parent(current_backup->start_time, backup, false)) + { + /* Revalidation make sense only if parent chain is whole. + * is_parent() do not guarantee that. + */ + result = scan_parent_chain(backup, &tmp_backup); + + if (result == ChainIsInvalid) + { + /* revalidation make sense only if oldest invalid backup is current_backup + */ + + if (tmp_backup->start_time != backup->start_time) + continue; + + if (backup->status == BACKUP_STATUS_ORPHAN) + { + /* Do not interrupt, validate the next backup */ + if (!lock_backup(backup, true)) + { + elog(WARNING, "Cannot lock backup %s directory, skip validation", + base36enc(backup->start_time)); + skipped_due_to_lock = true; + continue; + } + /* Revalidate backup files*/ + pgBackupValidate(backup, NULL); + + if (backup->status == BACKUP_STATUS_OK) + { + + /* Revalidation successful, validate corresponding WAL files */ + validate_wal(backup, arclog_path, 0, + 0, 0, current_backup->tli, + instance_config.xlog_seg_size); + } + } + + if (backup->status != BACKUP_STATUS_OK) + { + corrupted_backup_found = true; + continue; + } + } + } + } + } + } + + /* cleanup */ + parray_walk(backups, pgBackupFree); + parray_free(backups); +} diff --git a/src/common/backend/catalog/builtin_funcs.ini b/src/common/backend/catalog/builtin_funcs.ini index a7322d6e7..c066b866b 100644 --- a/src/common/backend/catalog/builtin_funcs.ini +++ b/src/common/backend/catalog/builtin_funcs.ini @@ -6227,10 +6227,18 @@ "pg_conf_load_time", 1, AddBuiltinFunc(_0(2034), _1("pg_conf_load_time"), _2(0), _3(true), _4(false), _5(pg_conf_load_time), _6(1184), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14('f'), _15(false), _16(false), _17('s'), _18(0), _19(0), _20(NULL), _21(NULL), _22(NULL), _23(NULL), _24("pg_conf_load_time"), _25(NULL), _26(NULL), _27(NULL), _28(0), _29(false), _30(NULL), _31(false)) ), + AddFuncGroup( + "pg_control_checkpoint", 1, + AddBuiltinFunc(_0(3442), _1("pg_control_checkpoint"), _2(0), _3(true), _4(true), _5(pg_control_checkpoint), _6(2249), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(12), _12(0), _13(0), _14('f'), _15(false), _16(false), _17('v'), _18(0), _19(0), _20(12, 20, 20, 25, 23, 16, 26, 28, 28, 28, 26, 28, 1184), _21(12, 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o'), _22(12, "checkpoint_lsn", "redo_lsn", "redo_wal_file", "timeline_id", "full_page_writes", "next_oid", "next_multixact_id", "next_multi_offset", "oldest_xid", "oldest_xid_dbid", "oldest_active_xid", "checkpoint_time"), _23(NULL), _24("pg_control_checkpoint"), _25(NULL), _26(NULL), _27(NULL), _28(0), _29(false), _30(NULL), _31(false)) + ), AddFuncGroup( "pg_control_group_config", 1, AddBuiltinFunc(_0(3843), _1("pg_control_group_config"), _2(0), _3(true), _4(false), _5(pg_control_group_config), _6(25), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14('f'), _15(false), _16(false), _17('s'), _18(0), _19(0), _20(NULL), _21(NULL), _22(NULL), _23(NULL), _24("pg_control_group_config"), _25(NULL), _26(NULL), _27(NULL), _28(0), _29(false), _30(NULL), _31(false)) ), + AddFuncGroup( + "pg_control_system", 1, + AddBuiltinFunc(_0(3441), _1("pg_control_system"), _2(0), _3(true), _4(true), _5(pg_control_system), _6(2249), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(5), _12(0), _13(0), _14('f'), _15(false), _16(false), _17('v'), _18(0), _19(0), _20(4, 23, 23, 20, 1184), _21(4, 'o', 'o', 'o', 'o'), _22(4, "pg_control_version", "catalog_version_no", "system_identifier", "pg_control_last_modified"), _23(NULL), _24("pg_control_system"), _25(NULL), _26(NULL), _27(NULL), _28(0), _29(false), _30(NULL), _31(false)) + ), AddFuncGroup( "pg_conversion_is_visible", 1, AddBuiltinFunc(_0(2093), _1("pg_conversion_is_visible"), _2(1), _3(true), _4(false), _5(pg_conversion_is_visible), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14('f'), _15(false), _16(false), _17('s'), _18(0), _19(1, 26), _20(NULL), _21(NULL), _22(NULL), _23(NULL), _24("pg_conversion_is_visible"), _25(NULL), _26(NULL), _27(NULL), _28(0), _29(false), _30(NULL), _31(false)) diff --git a/src/common/backend/utils/misc/Makefile b/src/common/backend/utils/misc/Makefile index 9da714b5f..13e72905a 100755 --- a/src/common/backend/utils/misc/Makefile +++ b/src/common/backend/utils/misc/Makefile @@ -22,7 +22,7 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif OBJS = guc.o help_config.o pg_rusage.o ps_status.o superuser.o tzparser.o \ - rbtree.o anls_opt.o sec_rls_utils.o + rbtree.o anls_opt.o sec_rls_utils.o pg_controldata.o # This location might depend on the installation directories. Therefore # we can't subsitute it into pg_config.h. diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp index 6ded1a2a3..e3c449c87 100644 --- a/src/common/backend/utils/misc/guc.cpp +++ b/src/common/backend/utils/misc/guc.cpp @@ -4321,7 +4321,6 @@ static void init_configure_names_bool() NULL, NULL }, -#ifdef ENABLE_MULTIPLE_NODES { { "enable_cbm_tracking", @@ -4336,7 +4335,6 @@ static void init_configure_names_bool() NULL, NULL }, -#endif { { "enable_copy_server_files", @@ -12347,7 +12345,6 @@ static void init_single_node_unsupport_guc() u_sess->attr.attr_sql.enable_agg_pushdown_for_cooperation_analysis = true; u_sess->attr.attr_common.enable_tsdb = false; u_sess->attr.attr_sql.acceleration_with_compute_pool = false; - u_sess->attr.attr_storage.enable_cbm_tracking = false; u_sess->attr.attr_sql.enable_constraint_optimization = true; u_sess->attr.attr_sql.enable_csqual_pushdown = true; u_sess->attr.attr_sql.enable_hadoop_env = false; diff --git a/src/common/backend/utils/misc/pg_controldata.cpp b/src/common/backend/utils/misc/pg_controldata.cpp new file mode 100644 index 000000000..2c38d7789 --- /dev/null +++ b/src/common/backend/utils/misc/pg_controldata.cpp @@ -0,0 +1,277 @@ +/* ------------------------------------------------------------------------- + * + * pg_controldata.cpp + * + * Routines to expose the contents of the control data file via + * a set of SQL functions. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/misc/pg_controldata.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +// #include "access/htup_details.h" +#include "access/transam.h" +#include "access/xlog_internal.h" +#include "access/xlog.h" +#include "catalog/pg_control.h" +#include "catalog/pg_type.h" +// #include "common/controldata_utils.h" +#include "utils/elog.h" +#include "catalog/pg_control.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/pg_lsn.h" +#include "utils/timestamp.h" + +ControlFileData* get_controlfile(const char *DataDir, bool *crc_ok_p); + +static inline char* getDataDir() +{ + if (t_thrd.proc_cxt.DataDir != NULL && strlen(t_thrd.proc_cxt.DataDir) > 0) { + return t_thrd.proc_cxt.DataDir; + } else { + char *dataDir = getenv("PGDATA"); + if (dataDir == NULL) { + ereport(ERROR, (errmsg("Cannot open control file"))); + return NULL; + } + return dataDir; + } +} + +Datum pg_control_system(PG_FUNCTION_ARGS) +{ + Datum values[4]; + bool nulls[4]; + TupleDesc tupdesc; + HeapTuple htup; + ControlFileData *ControlFile; + bool crc_ok; + + /* + * Construct a tuple descriptor for the result row. This must match this + * function's pg_proc entry! + */ + tupdesc = CreateTemplateTupleDesc(4, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pg_control_version", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "catalog_version_no", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "system_identifier", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "pg_control_last_modified", TIMESTAMPTZOID, -1, 0); + tupdesc = BlessTupleDesc(tupdesc); + + /* read the control file */ + ControlFile = get_controlfile(getDataDir(), &crc_ok); + if (!crc_ok) + ereport(ERROR, + (errmsg("calculated CRC checksum does not match value stored in file"))); + + values[0] = Int32GetDatum(ControlFile->pg_control_version); + nulls[0] = false; + + values[1] = Int32GetDatum(ControlFile->catalog_version_no); + nulls[1] = false; + + values[2] = Int64GetDatum(ControlFile->system_identifier); + nulls[2] = false; + + values[3] = TimestampTzGetDatum(time_t_to_timestamptz(ControlFile->time)); + nulls[3] = false; + + htup = heap_form_tuple(tupdesc, values, nulls); + + PG_RETURN_DATUM(HeapTupleGetDatum(htup)); +} + +/* + * get_controlfile() + * + * Get controlfile values. The result is returned as a palloc'd copy of the + * control file data. + * + * crc_ok_p can be used by the caller to see whether the CRC of the control + * file data is correct. + */ +ControlFileData* get_controlfile(const char *DataDir, bool *crc_ok_p) +{ + ControlFileData *ControlFile; + int fd; + char ControlFilePath[MAXPGPATH]; + pg_crc32c crc; + int r; + + AssertArg(crc_ok_p); + + ControlFile = (ControlFileData*)palloc(sizeof(ControlFileData)); + snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); + +#ifndef FRONTEND + if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR)) == -1) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", ControlFilePath))); +#else + if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1) + { + pg_log_fatal("could not open file \"%s\" for reading: %m", + ControlFilePath); + exit(EXIT_FAILURE); + } +#endif + + r = read(fd, ControlFile, sizeof(ControlFileData)); + if (r != sizeof(ControlFileData)) + { + if (r < 0) +#ifndef FRONTEND + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", ControlFilePath))); +#else + { + pg_log_fatal("could not read file \"%s\": %m", ControlFilePath); + exit(EXIT_FAILURE); + } +#endif + else +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not read file \"%s\": read %d of %zu", + ControlFilePath, r, sizeof(ControlFileData)))); +#else + { + pg_log_fatal("could not read file \"%s\": read %d of %zu", ControlFilePath, r, sizeof(ControlFileData)); + exit(EXIT_FAILURE); + } +#endif + } + +#ifndef FRONTEND + if (CloseTransientFile(fd)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", ControlFilePath))); +#else + if (close(fd)) + { + pg_log_fatal("could not close file \"%s\": %m", ControlFilePath); + exit(EXIT_FAILURE); + } +#endif + + /* Check the CRC. */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(crc); + + *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); + + /* Make sure the control file is valid byte order. */ + if (ControlFile->pg_control_version % 65536 == 0 && + ControlFile->pg_control_version / 65536 != 0) +#ifndef FRONTEND + elog(ERROR, _("byte ordering mismatch")); +#else + pg_log_warning("possible byte ordering mismatch\n" + "The byte ordering used to store the pg_control file might not match the one\n" + "used by this program. In that case the results below would be incorrect, and\n" + "the PostgreSQL installation would be incompatible with this data directory."); +#endif + + return ControlFile; +} + +Datum pg_control_checkpoint(PG_FUNCTION_ARGS) +{ + Datum values[12]; + bool nulls[12]; + TupleDesc tupdesc; + HeapTuple htup; + ControlFileData *controlFile; + XLogSegNo segno; + char xlogfilename[MAXFNAMELEN]; + bool crc_ok; + + /* + * Construct a tuple descriptor for the result row. This must match this + * function's pg_proc entry! + */ + tupdesc = CreateTemplateTupleDesc(12, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "checkpoint_lsn", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "redo_lsn", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "redo_wal_file", TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "timeline_id", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "full_page_writes", BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "next_oid", OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "next_multixact_id", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "next_multi_offset", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "oldest_xid", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "oldest_xid_dbid", OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "oldest_active_xid", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "checkpoint_time", TIMESTAMPTZOID, -1, 0); + tupdesc = BlessTupleDesc(tupdesc); + + /* Read the control file. */ + controlFile = get_controlfile(getDataDir(), &crc_ok); + if (!crc_ok) + ereport(ERROR, + (errmsg("calculated CRC checksum does not match value stored in file"))); + + /* + * Calculate name of the WAL file containing the latest checkpoint's REDO + * start point. + */ + XLByteToSeg(controlFile->checkPointCopy.redo, segno); + + XLogFileName(xlogfilename, controlFile->checkPointCopy.ThisTimeLineID, segno); + + /* Populate the values and null arrays */ + values[0] = LSNGetDatum(controlFile->checkPoint); + nulls[0] = false; + + values[1] = LSNGetDatum(controlFile->checkPointCopy.redo); + nulls[1] = false; + + values[2] = CStringGetTextDatum(xlogfilename); + nulls[2] = false; + + values[3] = Int32GetDatum(controlFile->checkPointCopy.ThisTimeLineID); + nulls[3] = false; + + values[4] = BoolGetDatum(controlFile->checkPointCopy.fullPageWrites); + nulls[4] = false; + + values[5] = ObjectIdGetDatum(controlFile->checkPointCopy.nextOid); + nulls[5] = false; + + values[6] = TransactionIdGetDatum(controlFile->checkPointCopy.nextMulti); + nulls[6] = false; + + values[7] = TransactionIdGetDatum(controlFile->checkPointCopy.nextMultiOffset); + nulls[7] = false; + + values[8] = TransactionIdGetDatum(controlFile->checkPointCopy.oldestXid); + nulls[8] = false; + + values[9] = ObjectIdGetDatum(controlFile->checkPointCopy.oldestXidDB); + nulls[9] = false; + + values[10] = TransactionIdGetDatum(controlFile->checkPointCopy.oldestActiveXid); + nulls[10] = false; + + values[11] = TimestampTzGetDatum(time_t_to_timestamptz(controlFile->checkPointCopy.time)); + nulls[11] = false; + + htup = heap_form_tuple(tupdesc, values, nulls); + + PG_RETURN_DATUM(HeapTupleGetDatum(htup)); +} \ No newline at end of file diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index 7b9d36e5b..a38c46db6 100755 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -3631,6 +3631,7 @@ int ProcessStartupPacket(Port* port, bool SSLdone) int channel_adapt = 0, i = 0; if (!IS_PGXC_COORDINATOR) { +#ifdef ENABLE_MULTIPLE_NODES if (NORMAL_MODE == hashmdata->current_mode) { if (!u_sess->proc_cxt.clientIsGsBasebackup && !AM_WAL_DB_SENDER) { ereport(elevel, @@ -3638,6 +3639,7 @@ int ProcessStartupPacket(Port* port, bool SSLdone) "could not accept HA connection."))); } } +#endif for (i = 1; i < MAX_REPLNODE_NUM; i++) { if (t_thrd.postmaster_cxt.ReplConnArray[i] != NULL && diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index f316d54b6..1516ab8a2 100755 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -62,6 +62,26 @@ securec_check_ss(nRet, "\0", "\0"); \ } while (0) #else +#define XLOG_FNAME_LEN 24 +#define IsXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN) + +#define IsPartialXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".partial") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".partial") == 0) + +#define IsTLHistoryFileName(fname) \ + (strlen(fname) == 8 + strlen(".history") && \ + strspn(fname, "0123456789ABCDEF") == 8 && \ + strcmp((fname) + 8, ".history") == 0) + +#define IsBackupHistoryFileName(fname) \ + (strlen(fname) > XLOG_FNAME_LEN && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + strlen(fname) - strlen(".backup"), ".backup") == 0) + #define XLogFileName(fname, tli, logSegNo) \ do { \ int nRet; \ diff --git a/src/include/pg_getopt.h b/src/include/pg_getopt.h new file mode 100644 index 000000000..639a1613c --- /dev/null +++ b/src/include/pg_getopt.h @@ -0,0 +1,56 @@ +/* + * Postgres files that use getopt(3) always include this file. + * We must cope with three different scenarios: + * 1. We're using the platform's getopt(), and we should just import the + * appropriate declarations. + * 2. The platform lacks getopt(), and we must declare everything. + * 3. The platform has getopt(), but we're not using it because we don't + * like its behavior. The declarations we make here must be compatible + * with both the platform's getopt() and our src/port/getopt.c. + * + * Portions Copyright (c) 1987, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Portions Copyright (c) 2003-2019, PostgreSQL Global Development Group + * + * src/include/pg_getopt.h + */ +#ifndef PG_GETOPT_H +#define PG_GETOPT_H + +/* POSIX says getopt() is provided by unistd.h */ +#include + +/* rely on the system's getopt.h if present */ +#ifdef HAVE_GETOPT_H +#include +#endif + +/* + * If we have , assume it declares these variables, else do that + * ourselves. (We used to just declare them unconditionally, but Cygwin + * doesn't like that.) + */ +#ifndef HAVE_GETOPT_H + +extern char *optarg; +extern int optind; +extern int opterr; +extern int optopt; + +#endif /* HAVE_GETOPT_H */ + +/* + * Some platforms have optreset but fail to declare it in , so cope. + * Cygwin, however, doesn't like this either. + */ +#if defined(HAVE_INT_OPTRESET) && !defined(__CYGWIN__) +extern int optreset; +#endif + +/* Provide getopt() declaration if the platform doesn't have it */ +#ifndef HAVE_GETOPT +extern int getopt(int nargc, char *const *nargv, const char *ostr); +#endif + +#endif /* PG_GETOPT_H */ diff --git a/src/include/pgtar.h b/src/include/pgtar.h new file mode 100644 index 000000000..e69d87d88 --- /dev/null +++ b/src/include/pgtar.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * pgtar.h + * Functions for manipulating tarfile datastructures (src/port/tar.c) + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/pgtar.h + * + *------------------------------------------------------------------------- + */ + +enum tarError +{ + TAR_OK = 0, + TAR_NAME_TOO_LONG, + TAR_SYMLINK_TOO_LONG +}; + +extern enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, + pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime); +extern uint64 read_tar_number(const char *s, int len); +extern void print_tar_number(char *s, int len, uint64 val); +extern int tarChecksum(char *header); diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 42cd13384..cdf754e0f 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -179,6 +179,11 @@ typedef HeapPageHeaderData* HeapPageHeader; #define SizeOfHeapPageUpgradeData MAXALIGN(offsetof(HeapPageHeaderData, pd_linp) - offsetof(PageHeaderData, pd_linp)) +#define PageXLogRecPtrGet(val) \ + ((uint64) (val).xlogid << 32 | (val).xrecoff) +#define PageXLogRecPtrSet(ptr, lsn) \ + ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn)) + /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 62a02bccd..c921a59fb 100755 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -1268,6 +1268,10 @@ extern Datum show_config_by_name(PG_FUNCTION_ARGS); extern Datum set_config_by_name(PG_FUNCTION_ARGS); extern Datum show_all_settings(PG_FUNCTION_ARGS); +/* pg_controldata.cpp */ +extern Datum pg_control_system(PG_FUNCTION_ARGS); +extern Datum pg_control_checkpoint(PG_FUNCTION_ARGS); + /* lockfuncs.c */ extern Datum pg_lock_status(PG_FUNCTION_ARGS); extern Datum pg_advisory_lock_int8(PG_FUNCTION_ARGS); diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h index 1d89b6def..fb261458c 100644 --- a/src/include/utils/pg_crc.h +++ b/src/include/utils/pg_crc.h @@ -49,7 +49,8 @@ typedef uint32 pg_crc32; * * using CRC32C instead */ - +#define INIT_TRADITIONAL_CRC32(crc) ((crc) = 0xFFFFFFFF) +#define FIN_TRADITIONAL_CRC32(crc) ((crc) ^= 0xFFFFFFFF) /* Initialize a CRC accumulator */ #define INIT_CRC32(crc) ((crc) = 0xFFFFFFFF) @@ -58,7 +59,7 @@ typedef uint32 pg_crc32; /* Check for equality of two CRCs */ #define EQ_CRC32(c1, c2) ((c1) == (c2)) - +#define EQ_TRADITIONAL_CRC32(c1, c2) ((c1) == (c2)) /* Accumulate some (more) bytes into a CRC */ #define COMP_CRC32(crc, data, len) \ do { \ @@ -74,6 +75,23 @@ typedef uint32 pg_crc32; /* Constant table for CRC calculation */ extern CRCDLLIMPORT uint32 pg_crc32_table[]; +#define COMP_TRADITIONAL_CRC32(crc, data, len) \ + COMP_CRC32_NORMAL_TABLE(crc, data, len, pg_crc32_table) + +/* Sarwate's algorithm, for use with a "normal" lookup table */ +#define COMP_CRC32_NORMAL_TABLE(crc, data, len, table) \ +do { \ + const unsigned char *__data = (const unsigned char *) (data); \ + uint32 __len = (len); \ +\ + while (__len-- > 0) \ + { \ + int __tab_index = ((int) (crc) ^ *__data++) & 0xFF; \ + (crc) = table[__tab_index] ^ ((crc) >> 8); \ + } \ +} while (0) + + #ifdef PROVIDE_64BIT_CRC /* diff --git a/src/lib/pgcommon/fe_memutils.cpp b/src/lib/pgcommon/fe_memutils.cpp index 077efef75..5f22201fb 100644 --- a/src/lib/pgcommon/fe_memutils.cpp +++ b/src/lib/pgcommon/fe_memutils.cpp @@ -46,6 +46,12 @@ void* pg_malloc(size_t size) return pg_malloc_internal(size, 0); } +void * +pg_malloc0(size_t size) +{ + return pg_malloc_internal(size, MCXT_ALLOC_ZERO); +} + void* pg_realloc(void* ptr, size_t size) { void* tmp = NULL; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 882d64f95..e71af0553 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1933,6 +1933,8 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 3332 | get_local_prepared_xact 3333 | get_remote_prepared_xacts 3334 | global_clean_prepared_xacts + 3441 | pg_control_system + 3442 | pg_control_checkpoint 3452 | bytea_sortsupport 3454 | pg_filenode_relation 3464 | gs_encrypt_aes128 @@ -2639,7 +2641,7 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 9016 | pg_advisory_lock 9017 | pgxc_unlock_for_sp_database 9999 | pg_test_err_contain_err -(2277 rows) +(2279 rows) -- **************** pg_cast **************** -- Catch bogus values in pg_cast columns (other than cases detected by diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index e43285c01..d1c367d30 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -17,6 +17,7 @@ SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%' ORDER BY name; enable_bitmapscan | on enable_bloom_filter | on enable_broadcast | on + enable_cbm_tracking | off enable_change_hjcost | off enable_codegen | on enable_codegen_print | off @@ -83,7 +84,7 @@ SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%' ORDER BY name; enable_vector_engine | on enable_wdr_snapshot | off enable_xlog_prune | on -(79 rows) +(80 rows) CREATE TABLE foo2(fooid int, f2 int); INSERT INTO foo2 VALUES(1, 11); diff --git a/src/test/regress/expected/single_node_opr_sanity.out b/src/test/regress/expected/single_node_opr_sanity.out index eeedc0bc8..db09ee78c 100755 --- a/src/test/regress/expected/single_node_opr_sanity.out +++ b/src/test/regress/expected/single_node_opr_sanity.out @@ -1972,6 +1972,8 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 3332 | get_local_prepared_xact 3333 | get_remote_prepared_xacts 3334 | global_clean_prepared_xacts + 3441 | pg_control_system + 3442 | pg_control_checkpoint 3452 | bytea_sortsupport 3454 | pg_filenode_relation 3464 | gs_encrypt_aes128 @@ -2678,7 +2680,7 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 9016 | pg_advisory_lock 9017 | pgxc_unlock_for_sp_database 9999 | pg_test_err_contain_err -(2277 rows) +(2279 rows) -- Check prokind select count(*) from pg_proc where prokind = 'a'; @@ -2696,7 +2698,7 @@ select count(*) from pg_proc where prokind = 'w'; select count(*) from pg_proc where prokind = 'f'; count ------- - 3151 + 3153 (1 row) select count(*) from pg_proc where prokind = 'p';