11112 lines
406 KiB
C++
Executable File
11112 lines
406 KiB
C++
Executable File
/* -------------------------------------------------------------------------
|
|
*
|
|
* postmaster.cpp
|
|
* This program acts as a clearing house for requests to the
|
|
* POSTGRES system. Frontend programs send a startup message
|
|
* to the Postmaster and the postmaster uses the info in the
|
|
* message to setup a backend process.
|
|
*
|
|
* The postmaster also manages system-wide operations such as
|
|
* startup and shutdown. The postmaster itself doesn't do those
|
|
* operations, mind you --- it just forks off a subprocess to do them
|
|
* at the right times. It also takes care of resetting the system
|
|
* if a backend crashes.
|
|
*
|
|
* The postmaster process creates the shared memory and semaphore
|
|
* pools during startup, but as a rule does not touch them itself.
|
|
* In particular, it is not a member of the PGPROC array of backends
|
|
* and so it cannot participate in lock-manager operations. Keeping
|
|
* the postmaster away from shared memory operations makes it simpler
|
|
* and more reliable. The postmaster is almost always able to recover
|
|
* from crashes of individual backends by resetting shared memory;
|
|
* if it did much with shared memory then it would be prone to crashing
|
|
* along with the backends.
|
|
*
|
|
* When a request message is received, we now fork() immediately.
|
|
* The child process performs authentication of the request, and
|
|
* then becomes a backend if successful. This allows the auth code
|
|
* to be written in a simple single-threaded style (as opposed to the
|
|
* crufty "poor man's multitasking" code that used to be needed).
|
|
* More importantly, it ensures that blockages in non-multithreaded
|
|
* libraries like SSL or PAM cannot cause denial of service to other
|
|
* clients.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
* Portions Copyright (c) 2010-2012 Postgres-XC Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/gausskernel/process/postmaster/postmaster.cpp
|
|
*
|
|
* NOTES
|
|
*
|
|
* Initialization:
|
|
* The Postmaster sets up shared memory data structures
|
|
* for the backends.
|
|
*
|
|
* Synchronization:
|
|
* The Postmaster shares memory with the backends but should avoid
|
|
* touching shared memory, so as not to become stuck if a crashing
|
|
* backend screws up locks or shared memory. Likewise, the Postmaster
|
|
* should never block on messages from frontend clients.
|
|
*
|
|
* Garbage Collection:
|
|
* The Postmaster cleans up after backends if they have an emergency
|
|
* exit and/or core dump.
|
|
*
|
|
* Error Reporting:
|
|
* Use write_stderr() only for reporting "interactive" errors
|
|
* (essentially, bogus arguments on the command line). Once the
|
|
* postmaster is launched, use ereport().
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
#include "knl/knl_variable.h"
|
|
#include "gs_bbox.h"
|
|
#include <sys/wait.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <openssl/rand.h>
|
|
#include <sys/param.h>
|
|
#include <arpa/inet.h>
|
|
#include <sys/resource.h>
|
|
|
|
#ifdef HAVE_POLL_H
|
|
#include <poll.h>
|
|
#endif
|
|
|
|
#include "access/cbmparsexlog.h"
|
|
#include "access/obs/obs_am.h"
|
|
#include "access/transam.h"
|
|
#include "access/xlog.h"
|
|
#include "access/xact.h"
|
|
#include "bootstrap/bootstrap.h"
|
|
#include "commands/matview.h"
|
|
#include "catalog/pg_control.h"
|
|
#include "dbmind/hypopg_index.h"
|
|
#include "instruments/instr_unique_sql.h"
|
|
#include "instruments/instr_user.h"
|
|
#include "instruments/percentile.h"
|
|
#include "instruments/ash.h"
|
|
#include "instruments/capture_view.h"
|
|
#include "opfusion/opfusion_util.h"
|
|
#include "instruments/instr_slow_query.h"
|
|
#include "instruments/instr_statement.h"
|
|
|
|
#include "lib/dllist.h"
|
|
#include "libpq/auth.h"
|
|
#include "libpq/ip.h"
|
|
#include "libpq/libpq.h"
|
|
#include "libpq/pqsignal.h"
|
|
#include "miscadmin.h"
|
|
#ifdef PGXC
|
|
#include "pgxc/csnminsync.h"
|
|
#include "pgxc/pgxc.h"
|
|
/* COORD */
|
|
#include "pgxc/locator.h"
|
|
#include "nodes/nodes.h"
|
|
#include "nodes/memnodes.h"
|
|
#include "pgxc/poolmgr.h"
|
|
#include "access/gtm.h"
|
|
#endif
|
|
#include "pgstat.h"
|
|
#include "instruments/snapshot.h"
|
|
#include "pgaudit.h"
|
|
#include "job/job_scheduler.h"
|
|
#include "job/job_worker.h"
|
|
#include "postmaster/autovacuum.h"
|
|
#include "postmaster/pagewriter.h"
|
|
#include "postmaster/fork_process.h"
|
|
#include "postmaster/pgarch.h"
|
|
#include "postmaster/postmaster.h"
|
|
#include "postmaster/syslogger.h"
|
|
#include "postmaster/alarmchecker.h"
|
|
#include "postmaster/aiocompleter.h"
|
|
#include "postmaster/fencedudf.h"
|
|
#include "postmaster/barrier_creator.h"
|
|
#include "replication/heartbeat.h"
|
|
#include "replication/catchup.h"
|
|
#include "replication/dataqueue.h"
|
|
#include "replication/datasender.h"
|
|
#include "replication/datasender_private.h"
|
|
#include "replication/datareceiver.h"
|
|
#include "replication/replicainternal.h"
|
|
#include "replication/walsender.h"
|
|
#include "replication/walsender_private.h"
|
|
#include "replication/walreceiver.h"
|
|
#include "postmaster/bgwriter.h"
|
|
#include "postmaster/cbmwriter.h"
|
|
#include "postmaster/remoteservice.h"
|
|
#include "postmaster/startup.h"
|
|
#include "postmaster/twophasecleaner.h"
|
|
#include "postmaster/licensechecker.h"
|
|
#include "postmaster/walwriter.h"
|
|
#include "postmaster/walwriterauxiliary.h"
|
|
#include "postmaster/lwlockmonitor.h"
|
|
#include "replication/walreceiver.h"
|
|
#include "replication/datareceiver.h"
|
|
#include "replication/slot.h"
|
|
#include "storage/fd.h"
|
|
#include "storage/ipc.h"
|
|
#include "storage/pg_shmem.h"
|
|
#include "storage/lock/pg_sema.h"
|
|
#include "storage/pmsignal.h"
|
|
#include "storage/proc.h"
|
|
#include "storage/remote_read.h"
|
|
#include "tcop/tcopprot.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/datetime.h"
|
|
#include "utils/guc.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/ps_status.h"
|
|
#include "utils/plog.h"
|
|
#include "utils/zfiles.h"
|
|
#include "utils/inval.h"
|
|
#ifdef PGXC
|
|
#include "utils/resowner.h"
|
|
#include "workload/cpwlm.h"
|
|
#include "workload/workload.h"
|
|
#endif
|
|
|
|
#ifdef EXEC_BACKEND
|
|
#include "storage/spin.h"
|
|
#endif
|
|
|
|
#include "access/dfs/dfs_insert.h"
|
|
#include "access/twophase.h"
|
|
#include "alarm/alarm.h"
|
|
#include "auditfuncs.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "common/config/cm_config.h"
|
|
#include "distributelayer/streamMain.h"
|
|
#include "executor/execStream.h"
|
|
#include "funcapi.h"
|
|
#include "gs_thread.h"
|
|
#include "gssignal/gs_signal.h"
|
|
#include "libcomm/libcomm.h"
|
|
#include "libpq/pqformat.h"
|
|
#include "postmaster/startup.h"
|
|
#include "storage/spin.h"
|
|
#include "threadpool/threadpool.h"
|
|
#include "utils/guc.h"
|
|
#include "utils/guc_tables.h"
|
|
#include "utils/mmpool.h"
|
|
#include "libcomm/libcomm.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "funcapi.h"
|
|
#include "utils/memprot.h"
|
|
#include "pgstat.h"
|
|
|
|
#include "distributelayer/streamMain.h"
|
|
#include "distributelayer/streamProducer.h"
|
|
#include "eSDKOBS.h"
|
|
#include "cjson/cJSON.h"
|
|
|
|
#include "tcop/stmt_retry.h"
|
|
#include "gaussdb_version.h"
|
|
#include "hotpatch/hotpatch.h"
|
|
#include "hotpatch/hotpatch_backend.h"
|
|
// Parallel recovery
|
|
#include "access/parallel_recovery/page_redo.h"
|
|
#include "access/multi_redo_api.h"
|
|
#include "postmaster/postmaster.h"
|
|
#include "access/parallel_recovery/dispatcher.h"
|
|
#include "utils/distribute_test.h"
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
#include "tsdb/compaction/compaction_entry.h"
|
|
#include "tsdb/compaction/compaction_worker_entry.h"
|
|
#include "tsdb/compaction/session_control.h"
|
|
#include "tsdb/compaction/compaction_auxiliary_entry.h"
|
|
#include "tsdb/cache/tags_cachemgr.h"
|
|
#include "tsdb/cache/part_cachemgr.h"
|
|
#include "tsdb/cache/partid_cachemgr.h"
|
|
#include "tsdb/storage/part.h"
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
#include "streaming/launcher.h"
|
|
#include "streaming/init.h"
|
|
#ifdef ENABLE_MOT
|
|
#include "storage/mot/mot_fdw.h"
|
|
#endif
|
|
#include "executor/nodeExtensible.h"
|
|
|
|
#ifdef ENABLE_UT
|
|
#define static
|
|
#endif
|
|
|
|
|
|
extern void auto_explain_init(void);
|
|
extern int S3_init();
|
|
static const int RECOVERY_PARALLELISM_DEFAULT = 1;
|
|
|
|
/* flag to get logic cluster name for dn alarm */
|
|
static bool isNeedGetLCName = true;
|
|
/* logic cluster name list file name */
|
|
#define LOGIC_CLUSTER_LIST "logic_cluster_name.txt"
|
|
/* judge whether a char is digital */
|
|
#define isDigital(_ch) (((_ch) >= '0') && ((_ch) <= '9'))
|
|
|
|
#define IS_FD_TO_RECV_GSSOCK(fd) \
|
|
((fd) == t_thrd.postmaster_cxt.sock_for_libcomm || (fd) == t_thrd.libpq_cxt.listen_fd_for_recv_flow_ctrl)
|
|
|
|
/* These two are only here before of the SSL multithread initialization of OpenSSL component */
|
|
#include "ssl/gs_openssl_client.h"
|
|
|
|
#define MAXLISTEN 64
|
|
|
|
#define PM_BUSY_ALARM_USED_US 30000000L
|
|
#define PM_BUSY_ALARM_US 1000000L
|
|
#define PM_POLL_TIMEOUT_SECOND 20
|
|
#define PM_POLL_TIMEOUT_MINUTE 58*SECS_PER_MINUTE*60*1000000L
|
|
#define CHECK_TIMES 10
|
|
#define SIGBUS_MCEERR_AR 4
|
|
#define SIGBUS_MCEERR_AO 5
|
|
static char gaussdb_state_file[MAXPGPATH] = {0};
|
|
|
|
uint32 noProcLogicTid = 0;
|
|
|
|
volatile int Shutdown = NoShutdown;
|
|
|
|
extern void gs_set_hs_shm_data(HaShmemData* ha_shm_data);
|
|
extern void ReaperBackendMain();
|
|
|
|
#define EXTERN_SLOTS_NUM 17
|
|
volatile PMState pmState = PM_INIT;
|
|
bool dummyStandbyMode = false;
|
|
volatile uint64 sync_system_identifier = 0;
|
|
bool FencedUDFMasterMode = false;
|
|
|
|
extern char* optarg;
|
|
extern int optind, opterr;
|
|
|
|
#ifdef HAVE_INT_OPTRESET
|
|
extern int optreset; /* might not be declared by system headers */
|
|
#endif
|
|
|
|
#ifdef USE_BONJOUR
|
|
static DNSServiceRef bonjour_sdref = NULL;
|
|
#endif
|
|
|
|
/* for backtrace function */
|
|
pthread_mutex_t bt_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
pthread_rwlock_t hba_rwlock = PTHREAD_RWLOCK_INITIALIZER;
|
|
|
|
extern bool data_catchup;
|
|
extern bool wal_catchup;
|
|
|
|
char g_bbox_dump_path[1024] = {0};
|
|
|
|
#define CHECK_FOR_PROCDIEPENDING() \
|
|
do { \
|
|
if (t_thrd.int_cxt.ProcDiePending) { \
|
|
if (t_thrd.storage_cxt.cancel_from_timeout) { \
|
|
ereport(FATAL, \
|
|
(errcode(ERRCODE_QUERY_CANCELED), \
|
|
errmsg("terminate because pooler connect timeout(%ds) when process startup packet", \
|
|
u_sess->attr.attr_network.PoolerConnectTimeout))); \
|
|
} else { \
|
|
proc_exit(1); \
|
|
} \
|
|
} \
|
|
} while (0)
|
|
|
|
#define DataRcvIsOnline() \
|
|
((IS_DN_DUMMY_STANDYS_MODE() ? (g_instance.pid_cxt.DataReceiverPID != 0 && t_thrd.datareceiver_cxt.DataRcv && \
|
|
t_thrd.datareceiver_cxt.DataRcv->isRuning) \
|
|
: true))
|
|
|
|
#define IsCascadeStandby() \
|
|
(t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE && \
|
|
t_thrd.postmaster_cxt.HaShmData->is_cascade_standby)
|
|
|
|
/*
|
|
* postmaster.c - function prototypes
|
|
*/
|
|
static void CloseServerPorts(int status, Datum arg);
|
|
static void getInstallationPaths(const char* argv0);
|
|
static void checkDataDir(void);
|
|
static void CheckGUCConflicts(void);
|
|
static Port* ConnCreateToRecvGssock(pollfd* ufds, int idx, int* nSockets);
|
|
static Port* ConnCreate(int serverFd);
|
|
static void reset_shared(int port);
|
|
static void SIGHUP_handler(SIGNAL_ARGS);
|
|
void SIGBUS_handler(SIGNAL_ARGS);
|
|
static void pmdie(SIGNAL_ARGS);
|
|
static void startup_alarm(SIGNAL_ARGS);
|
|
static void SetWalsndsNodeState(ClusterNodeState requester, ClusterNodeState others);
|
|
static void ProcessDemoteRequest(void);
|
|
static void reaper(SIGNAL_ARGS);
|
|
static void sigusr1_handler(SIGNAL_ARGS);
|
|
static void dummy_handler(SIGNAL_ARGS);
|
|
static void CleanupBackend(ThreadId pid, int exitstatus);
|
|
static const char* GetProcName(ThreadId pid);
|
|
static void LogChildExit(int lev, const char* procname, ThreadId pid, int exitstatus);
|
|
static void PostmasterStateMachineReadOnly(void);
|
|
static void PostmasterStateMachine(void);
|
|
static void BackendInitialize(Port* port);
|
|
static int BackendRun(Port* port);
|
|
static int ServerLoop(void);
|
|
static int BackendStartup(Port* port, bool isConnectHaPort);
|
|
static void processCancelRequest(Port* port, void* pkt);
|
|
static void processStopRequest(Port* port, void* pkt);
|
|
#ifndef HAVE_POLL
|
|
static int initMasks(fd_set* rmask);
|
|
#endif
|
|
static int initPollfd(struct pollfd* ufds);
|
|
static void report_fork_failure_to_client(Port* port, int errnum, const char* specialErrorInfo = NULL);
|
|
void signal_child(ThreadId pid, int signal, int be_mode = -1);
|
|
static bool SignalSomeChildren(int signal, int targets);
|
|
|
|
static bool IsChannelAdapt(Port* port, ReplConnInfo* repl);
|
|
static bool IsLocalPort(Port* port);
|
|
static bool IsLocalIp(const char* address);
|
|
static bool IsInplicitIp(const char* address);
|
|
static int NeedPoolerPort(const char* hostName);
|
|
static void SetHaShmemData(void);
|
|
static bool IsAlreadyListen(const char* ip, int port);
|
|
static void IntArrayRegulation(int array[], int len, int def);
|
|
static void ListenSocketRegulation(void);
|
|
static bool ParseHaListenAddr(LISTEN_ADDRS* pListenList);
|
|
static void CreateHaListenSocket(void);
|
|
static void RemoteHostInitilize(Port* port);
|
|
static int StartupPacketInitialize(Port* port);
|
|
static void PsDisplayInitialize(Port* port);
|
|
|
|
static ServerMode get_cur_mode(void);
|
|
static int get_cur_repl_num(void);
|
|
|
|
static void PMReadDBStateFile(GaussState* state);
|
|
static void PMSetDBStateFile(GaussState* state);
|
|
static void PMUpdateDBState(DbState db_state, ServerMode mode, int conn_num);
|
|
static void PMUpdateDBStateLSN(void);
|
|
|
|
static void PMUpdateDBStateHaRebuildReason(void);
|
|
|
|
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
|
|
static void StartPgjobWorker(void);
|
|
static void StartPoolCleaner(void);
|
|
static void StartCleanStatement(void);
|
|
|
|
static void check_and_reset_ha_listen_port(void);
|
|
static void* cJSON_internal_malloc(size_t size);
|
|
static bool NeedHeartbeat();
|
|
static ServerMode GetHaShmemMode(void);
|
|
|
|
bool PMstateIsRun(void);
|
|
|
|
/*
|
|
* Possible types of a backend. These are OR-able request flag bits
|
|
* for SignalSomeChildren() and CountChildren().
|
|
*/
|
|
#define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
|
|
#define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
|
|
#define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
|
|
#define BACKEND_TYPE_DATASND 0x0008 /* datasender process */
|
|
#define BACKEND_TYPE_TEMPBACKEND \
|
|
0x0010 /* temp thread processing cancel signal \
|
|
or stream connection */
|
|
#define BACKEND_TYPE_ALL 0x001F /* OR of all the above */
|
|
|
|
#define GTM_LITE_CN (GTM_LITE_MODE && IS_PGXC_COORDINATOR)
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
#define START_BARRIER_CREATOR IS_PGXC_COORDINATOR
|
|
#else
|
|
#define START_BARRIER_CREATOR IS_PGXC_DATANODE
|
|
#endif
|
|
|
|
static int CountChildren(int target);
|
|
static bool CreateOptsFile(int argc, const char* argv[], const char* fullprogname);
|
|
static void UpdateOptsFile(void);
|
|
static void StartAutovacuumWorker(void);
|
|
static ThreadId StartCatchupWorker(void);
|
|
static void InitPostmasterDeathWatchHandle(void);
|
|
static void NotifyShutdown(void);
|
|
static void NotifyProcessActive(void);
|
|
static int init_stream_comm(void);
|
|
|
|
int GaussDbThreadMain(knl_thread_arg* arg);
|
|
const char* GetThreadName(knl_thread_role role);
|
|
|
|
#ifdef EXEC_BACKEND
|
|
|
|
typedef int InheritableSocket;
|
|
typedef struct LWLock LWLock; /* ugly kluge */
|
|
|
|
/*
|
|
* Structure contains all variables passed to exec:ed backends
|
|
*/
|
|
typedef struct {
|
|
Port port;
|
|
pgsocket portsocket;
|
|
char DataDir[MAXPGPATH];
|
|
pgsocket ListenSocket[MAXLISTEN];
|
|
long MyCancelKey;
|
|
int MyPMChildSlot;
|
|
#ifndef WIN32
|
|
unsigned long UsedShmemSegID;
|
|
#else
|
|
HANDLE UsedShmemSegID;
|
|
#endif
|
|
void* UsedShmemSegAddr;
|
|
slock_t* ShmemLock;
|
|
VariableCache ShmemVariableCache;
|
|
LWLock* mainLWLockArray;
|
|
PMSignalData* PMSignalState;
|
|
|
|
char LocalAddrList[MAXLISTEN][IP_LEN];
|
|
int LocalIpNum;
|
|
HaShmemData* HaShmData;
|
|
|
|
TimestampTz PgStartTime;
|
|
TimestampTz PgReloadTime;
|
|
pg_time_t first_syslogger_file_time;
|
|
bool redirection_done;
|
|
bool IsBinaryUpgrade;
|
|
int max_safe_fds;
|
|
int max_files_per_process;
|
|
int max_userdatafiles;
|
|
int postmaster_alive_fds[2];
|
|
int syslogPipe[2];
|
|
char my_exec_path[MAXPGPATH];
|
|
char pkglib_path[MAXPGPATH];
|
|
Oid myTempNamespace;
|
|
Oid myTempToastNamespace;
|
|
bool comm_ipc_log;
|
|
} BackendParameters;
|
|
|
|
static BackendParameters backend_save_para;
|
|
|
|
static void read_backend_variables(char* id, Port* port);
|
|
static void restore_backend_variables(BackendParameters* param, Port* port);
|
|
#ifndef WIN32
|
|
static bool save_backend_variables(BackendParameters* param, Port* port);
|
|
#else
|
|
static bool save_backend_variables(BackendParameters* param, Port* port, HANDLE childProcess, pid_t childPid);
|
|
#endif
|
|
#endif /* EXEC_BACKEND */
|
|
|
|
static void BackendArrayAllocation(void);
|
|
static void BackendArrayRemove(Backend* bn);
|
|
|
|
PMStateInfo pmStateDescription[] = {{PM_INIT, "PM_INIT"},
|
|
{PM_STARTUP, "PM_STARTUP"},
|
|
{PM_RECOVERY, "PM_RECOVERY"},
|
|
{PM_HOT_STANDBY, "PM_HOT_STANDBY"},
|
|
{PM_RUN, "PM_RUN"},
|
|
{PM_WAIT_BACKUP, "PM_WAIT_BACKUP"},
|
|
{PM_WAIT_READONLY, "PM_WAIT_READONLY"},
|
|
{PM_WAIT_BACKENDS, "PM_WAIT_BACKENDS"},
|
|
{PM_SHUTDOWN, "PM_SHUTDOWN"},
|
|
{PM_SHUTDOWN_2, "PM_SHUTDOWN_2"},
|
|
{PM_WAIT_DEAD_END, "PM_WAIT_DEAD_END"},
|
|
{PM_NO_CHILDREN, "PM_NO_CHILDREN"}};
|
|
|
|
/* convert PM state to string */
|
|
const char* GetPMState(const PMState pmStateCode)
|
|
{
|
|
uint32_t count;
|
|
for (count = 0; count < lengthof(pmStateDescription); count++) {
|
|
if (pmStateCode == pmStateDescription[count].pmState) {
|
|
return pmStateDescription[count].pmStateMsg;
|
|
}
|
|
}
|
|
return "UNKOWN";
|
|
}
|
|
|
|
/*
|
|
* While adding a new node to the cluster we need to restore the schema of
|
|
* an existing database to the new node.
|
|
* If the new node is a datanode and we connect directly to it,
|
|
* it does not allow DDL, because it is in read only mode &
|
|
* If the new node is a coordinator it will send DDLs to all the other
|
|
* coordinators which we do not want it to do
|
|
* To provide ability to restore on the new node a new command line
|
|
* argument is provided called --restoremode
|
|
* It is to be provided in place of --coordinator OR --datanode.
|
|
* In restore mode both coordinator and datanode are internally
|
|
* treated as a datanode.
|
|
*/
|
|
bool isRestoreMode = false;
|
|
|
|
/*
|
|
*The securitymode is to satisfy the security request of database on
|
|
*cloud environment. In securitymode, some operations are carried out
|
|
*to satisfy the security request.
|
|
*/
|
|
bool isSecurityMode = false;
|
|
|
|
#define SECURITY_MODE_NAME "securitymode"
|
|
|
|
#define SECURITY_MODE_NAME_LEN 12
|
|
|
|
bool isSingleMode = false;
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
#define StartPoolManager() PoolManagerInit()
|
|
#endif
|
|
|
|
/* Macros to check exit status of a child process */
|
|
#define EXIT_STATUS_0(st) ((st) == 0)
|
|
#define EXIT_STATUS_1(st) (1 == (st))
|
|
|
|
/*
|
|
* @hdfs
|
|
* deleteHdfsUser() function is used to clean Hdfs User List and
|
|
* release alocated memory
|
|
*/
|
|
extern void deleteHdfsUser();
|
|
|
|
extern void CodeGenProcessInitialize();
|
|
extern void CodeGenProcessTearDown();
|
|
|
|
extern void CPmonitorMain(void);
|
|
|
|
extern void load_searchserver_library();
|
|
|
|
/*
|
|
* Protocol versions we supported:
|
|
* For x.y, if y < 50, it's from the postgres's front end.
|
|
* And for now, we only support versions: 2.0, 3.0, 3.50
|
|
* Attention:
|
|
* For protocol version x.y, x and y are integers eachly, and x.y is not a float.
|
|
* Which means that 3.50 != 3.5, 3.5 == 3.05
|
|
*
|
|
* Version desc:
|
|
* 2.0 and 3.0 are from postgres.
|
|
* 3.50: backend does not send signature to frontend anymore while authorization.
|
|
* 3.51: backend send iteration to frontend in the authenication.
|
|
*/
|
|
const unsigned short protoVersionList[][2] = {{2, 0}, {3, 0}, {3, 50}, {3, 51}};
|
|
/* transparent encryption database encryption key. */
|
|
extern bool getAndCheckTranEncryptDEK();
|
|
|
|
void SetServerMode(ServerMode mode)
|
|
{
|
|
t_thrd.xlog_cxt.server_mode = mode;
|
|
}
|
|
|
|
ServerMode GetServerMode()
|
|
{
|
|
return (ServerMode)t_thrd.xlog_cxt.server_mode;
|
|
}
|
|
|
|
void ReportAlarmAbnormalDataHAInstListeningSocket()
|
|
{
|
|
Alarm alarmItem[1];
|
|
AlarmAdditionalParam tempAdditionalParam;
|
|
|
|
// Initialize the alarm item
|
|
AlarmItemInitialize(alarmItem, ALM_AI_AbnormalDataHAInstListeningSocket, ALM_AS_Reported, NULL);
|
|
// fill the alarm message
|
|
WriteAlarmAdditionalInfo(&tempAdditionalParam,
|
|
g_instance.attr.attr_common.PGXCNodeName,
|
|
"",
|
|
"",
|
|
alarmItem,
|
|
ALM_AT_Fault,
|
|
g_instance.attr.attr_common.PGXCNodeName);
|
|
// report the alarm
|
|
AlarmReporter(alarmItem, ALM_AT_Fault, &tempAdditionalParam);
|
|
}
|
|
|
|
void ReportResumeAbnormalDataHAInstListeningSocket()
|
|
{
|
|
Alarm alarmItem[1];
|
|
AlarmAdditionalParam tempAdditionalParam;
|
|
|
|
// Initialize the alarm item
|
|
AlarmItemInitialize(alarmItem, ALM_AI_AbnormalDataHAInstListeningSocket, ALM_AS_Normal, NULL);
|
|
// fill the alarm message
|
|
WriteAlarmAdditionalInfo(
|
|
&tempAdditionalParam, g_instance.attr.attr_common.PGXCNodeName, "", "", alarmItem, ALM_AT_Resume);
|
|
// report the alarm
|
|
AlarmReporter(alarmItem, ALM_AT_Resume, &tempAdditionalParam);
|
|
}
|
|
|
|
void SetFlagForGetLCName(bool falg)
|
|
{
|
|
isNeedGetLCName = falg;
|
|
}
|
|
|
|
uint32 GetNodeId(const char* nodeIdStr)
|
|
{
|
|
if (nodeIdStr == NULL) {
|
|
return 0;
|
|
}
|
|
int index = 0;
|
|
while (nodeIdStr[index] != '\0') {
|
|
if (!isDigital(nodeIdStr[index])) {
|
|
return 0;
|
|
}
|
|
index++;
|
|
}
|
|
uint32 nodeId = (uint32)atoi(nodeIdStr);
|
|
return nodeId;
|
|
}
|
|
|
|
/*
|
|
* "Safe" wrapper around strdup()
|
|
*/
|
|
char* pg_strdup(const char* string)
|
|
{
|
|
char* tmp = NULL;
|
|
|
|
if (NULL == string) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OPERATION),
|
|
errmsg("pg_strdup: cannot duplicate null pointer (internal error)\n")));
|
|
}
|
|
tmp = strdup(string);
|
|
if (NULL == tmp) {
|
|
ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("out of memory\n")));
|
|
}
|
|
return tmp;
|
|
}
|
|
|
|
/* Init the hash table to save the debug query id and the used space */
|
|
static void InitDnHashTable(void)
|
|
{
|
|
HASHCTL hctl;
|
|
errno_t rc = 0;
|
|
|
|
rc = memset_s(&hctl, sizeof(HASHCTL), 0, sizeof(HASHCTL));
|
|
securec_check(rc, "", "");
|
|
|
|
hctl.keysize = sizeof(uint64);
|
|
hctl.entrysize = sizeof(DnUsedSpaceHashEntry);
|
|
hctl.hash = tag_hash;
|
|
hctl.hcxt = g_instance.instance_context;
|
|
|
|
g_instance.comm_cxt.usedDnSpace = hash_create("SQL used space on datanode",
|
|
g_instance.attr.attr_network.MaxConnections,
|
|
&hctl,
|
|
HASH_ELEM | HASH_FUNCTION | HASH_SHRCTX);
|
|
}
|
|
|
|
/*
|
|
*Get master node id from node name
|
|
*/
|
|
uint32 GetMasterIdByNodeName()
|
|
{
|
|
uint32 node_id = 0;
|
|
char* nodeName = pg_strdup(g_instance.attr.attr_common.PGXCNodeName);
|
|
const char delims[] = "_";
|
|
char* ptr = NULL;
|
|
char* outer_ptr = NULL;
|
|
ptr = strtok_r(nodeName, delims, &outer_ptr);
|
|
if (ptr != NULL) {
|
|
ptr = strtok_r(NULL, delims, &outer_ptr);
|
|
}
|
|
|
|
if (ptr != NULL) {
|
|
node_id = GetNodeId(ptr);
|
|
}
|
|
|
|
if (nodeName != NULL) {
|
|
free(nodeName);
|
|
nodeName = NULL;
|
|
}
|
|
return node_id;
|
|
}
|
|
|
|
/*
|
|
*Get Logic Cluster From LC StaticConfig file for given node
|
|
*
|
|
*return true if success else return false
|
|
*/
|
|
bool GetLogicClusterFromConfig(const char* filepath, char* lcname)
|
|
{
|
|
g_logicClusterName = lcname;
|
|
g_datanodeid = GetMasterIdByNodeName();
|
|
if (g_datanodeid == 0) {
|
|
ereport(LOG, (errmsg("cannot get node id of datanode %s\n", g_instance.attr.attr_common.PGXCNodeName)));
|
|
return false;
|
|
}
|
|
|
|
int status = 0;
|
|
int err_no = 0;
|
|
status = read_logic_cluster_config_files(filepath, &err_no);
|
|
switch (status) {
|
|
case OPEN_FILE_ERROR:
|
|
case READ_FILE_ERROR:
|
|
ereport(LOG, (errmsg("could not open or read logic cluster static config files: %s", gs_strerror(err_no))));
|
|
return false;
|
|
case OUT_OF_MEMORY:
|
|
ereport(LOG, (errmsg("out of memory")));
|
|
return false;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (g_logicClusterName[0] == '\0') {
|
|
int ret = snprintf_s(g_logicClusterName, CLUSTER_NAME_LEN, CLUSTER_NAME_LEN - 1, "elastic_group");
|
|
securec_check_ss_c(ret, "\0", "\0");
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
*Get logic cluster name for datanode alarm
|
|
*
|
|
*if exist logic_cluster_name.txt get logic cluster from LC static config
|
|
*else reset logic cluster name
|
|
*/
|
|
void GetLogicClusterForAlarm(char* lcname)
|
|
{
|
|
if (!IS_PGXC_DATANODE || !isNeedGetLCName)
|
|
return;
|
|
|
|
int ret;
|
|
char LCListfile[MAXPGPATH] = {'\0'};
|
|
char* gausshome = getGaussHome();
|
|
if (gausshome == NULL) {
|
|
return;
|
|
}
|
|
if (*gausshome == '\0') {
|
|
pfree(gausshome);
|
|
return;
|
|
}
|
|
|
|
ret = snprintf_s(LCListfile, MAXPGPATH, MAXPGPATH - 1, "%s/bin/%s", gausshome, LOGIC_CLUSTER_LIST);
|
|
securec_check_ss_c(ret, "\0", "\0");
|
|
pfree(gausshome);
|
|
gausshome = NULL;
|
|
|
|
if (0 == access(LCListfile, F_OK)) {
|
|
/* if false we need reset the logic cluster name */
|
|
if (!GetLogicClusterFromConfig(LCListfile, lcname))
|
|
lcname[0] = '\0';
|
|
else
|
|
SetFlagForGetLCName(false);
|
|
} else
|
|
lcname[0] = '\0';
|
|
}
|
|
|
|
/*
|
|
* get_coredump_pattern_path - get the core dump path from the file "/proc/sys/kernel/core_pattern"
|
|
*/
|
|
void get_coredump_pattern_path(char* path, Size len)
|
|
{
|
|
FILE* fp = NULL;
|
|
char* p = NULL;
|
|
struct stat stat_buf;
|
|
|
|
if (NULL == (fp = fopen("/proc/sys/kernel/core_pattern", "r"))) {
|
|
write_stderr("cannot open file: /proc/sys/kernel/core_pattern.\n");
|
|
return;
|
|
}
|
|
|
|
if (NULL == fgets(path, len, fp)) {
|
|
fclose(fp);
|
|
write_stderr("failed to get the core pattern path.\n ");
|
|
return;
|
|
}
|
|
fclose(fp);
|
|
|
|
if ((p = strrchr(path, '/')) == NULL) {
|
|
*path = '\0';
|
|
} else {
|
|
*(++p) = '\0';
|
|
}
|
|
|
|
if (0 != stat(path, &stat_buf) || !S_ISDIR(stat_buf.st_mode) || 0 != access(path, W_OK)) {
|
|
write_stderr("The core dump path is an invalid directory\n");
|
|
*path = '\0';
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Only update gaussdb.state file's state field.
|
|
*
|
|
* PARAMETERS:
|
|
* state: INPUT new state
|
|
* RETURN:
|
|
* true if success, otherwise false.
|
|
*
|
|
* NOTE: unsafe function is not expected here since it is referred in signal handler.
|
|
*/
|
|
bool SetDBStateFileState(DbState state, bool optional)
|
|
{
|
|
/* do nothing while core dump be appeared so early. */
|
|
if (strlen(gaussdb_state_file) > 0) {
|
|
char temppath[MAXPGPATH] = {0};
|
|
GaussState s;
|
|
|
|
/* zero it in case gaussdb.state doesn't exist. */
|
|
int rc = memset_s(&s, sizeof(GaussState), 0, sizeof(GaussState));
|
|
securec_check_c(rc, "\0", "\0");
|
|
|
|
rc = snprintf_s(temppath, MAXPGPATH, MAXPGPATH - 1, "%s.temp", gaussdb_state_file);
|
|
securec_check_intval(rc, , false);
|
|
|
|
/* Write the new content into a temp file and rename it at last. */
|
|
int fd = open(gaussdb_state_file, O_RDONLY);
|
|
if (fd == -1) {
|
|
if (errno == ENOENT && optional) {
|
|
write_stderr("gaussdb.state does not exist, and skipt setting since it is optional.");
|
|
return true;
|
|
} else {
|
|
write_stderr("Failed to open gaussdb.state.temp: %d", errno);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* Read old content from file. */
|
|
int len = read(fd, &s, sizeof(GaussState));
|
|
if (len != sizeof(GaussState)) {
|
|
write_stderr("Failed to read gaussdb.state: %d", errno);
|
|
(void)close(fd);
|
|
return false;
|
|
}
|
|
|
|
if (close(fd) != 0) {
|
|
write_stderr("Failed to close gaussdb.state: %d", errno);
|
|
return false;
|
|
}
|
|
|
|
/* replace state with the new value. */
|
|
s.state = state;
|
|
|
|
fd = open(temppath, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
|
|
if (fd == -1) {
|
|
write_stderr("Failed to open gaussdb.state.temp: %d", errno);
|
|
return false;
|
|
}
|
|
|
|
len = write(fd, &s, sizeof(GaussState));
|
|
if (len != sizeof(GaussState)) {
|
|
write_stderr("Failed to write gaussdb.state.temp: %d", errno);
|
|
(void)close(fd);
|
|
return false;
|
|
}
|
|
|
|
if (close(fd) != 0) {
|
|
write_stderr("Failed to close gaussdb.state.temp: %d", errno);
|
|
return false;
|
|
}
|
|
|
|
if (rename(temppath, gaussdb_state_file) != 0) {
|
|
write_stderr("Failed to rename gaussdb.state.temp: %s", gs_strerror(errno));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void gs_hotpatch_log_callback(int level, char* logstr)
|
|
{
|
|
ereport(level, (errmsg("%s", logstr)));
|
|
}
|
|
|
|
void signal_sysloger_flush(void)
|
|
{
|
|
if (t_thrd.postmaster_cxt.redirection_done == false) {
|
|
fflush(stdout);
|
|
fflush(stderr);
|
|
} else if (g_instance.pid_cxt.SysLoggerPID != 0) {
|
|
set_flag_to_flush_buffer();
|
|
signal_child(g_instance.pid_cxt.SysLoggerPID, SIGUSR1);
|
|
pg_usleep(100000);
|
|
}
|
|
}
|
|
|
|
void SetShmemCxt(void)
|
|
{
|
|
int thread_pool_worker_num = 0;
|
|
|
|
if (g_threadPoolControler != NULL) {
|
|
thread_pool_worker_num = g_threadPoolControler->GetThreadNum();
|
|
g_instance.shmem_cxt.ThreadPoolGroupNum = g_threadPoolControler->GetGroupNum();
|
|
} else {
|
|
g_instance.shmem_cxt.ThreadPoolGroupNum = 0;
|
|
}
|
|
|
|
/* Keep enough slot for thread pool. */
|
|
g_instance.shmem_cxt.MaxConnections =
|
|
Max((g_instance.attr.attr_network.MaxConnections + g_instance.attr.attr_network.maxInnerToolConnections), thread_pool_worker_num);
|
|
|
|
g_instance.shmem_cxt.MaxBackends = g_instance.shmem_cxt.MaxConnections +
|
|
g_instance.attr.attr_sql.job_queue_processes +
|
|
g_instance.attr.attr_storage.autovacuum_max_workers +
|
|
AUXILIARY_BACKENDS +
|
|
AV_LAUNCHER_PROCS;
|
|
g_instance.shmem_cxt.MaxReserveBackendId = g_instance.attr.attr_sql.job_queue_processes +
|
|
g_instance.attr.attr_storage.autovacuum_max_workers +
|
|
(thread_pool_worker_num * STREAM_RESERVE_PROC_TIMES) +
|
|
AUXILIARY_BACKENDS +
|
|
AV_LAUNCHER_PROCS;
|
|
|
|
Assert(g_instance.shmem_cxt.MaxBackends <= MAX_BACKENDS);
|
|
}
|
|
|
|
static void print_port_info()
|
|
{
|
|
FILE* fp = NULL;
|
|
StringInfoData strinfo;
|
|
initStringInfo(&strinfo);
|
|
char buf[MAXPGPATH];
|
|
|
|
appendStringInfo(&strinfo, "lsof -i:%d", g_instance.attr.attr_network.PostPortNumber);
|
|
fp = popen(strinfo.data, "r");
|
|
if (fp == NULL) {
|
|
ereport(LOG, (errmsg("Unable to use 'lsof' to read port info.")));
|
|
} else {
|
|
ereport(LOG, (errmsg("exec cmd: %s", strinfo.data)));
|
|
while (fgets(buf, sizeof(buf), fp) != NULL) {
|
|
ereport(LOG, (errmsg("<lsof>:%s", buf)));
|
|
}
|
|
pclose(fp);
|
|
}
|
|
|
|
resetStringInfo(&strinfo);
|
|
appendStringInfo(&strinfo, "netstat -anp | grep %d", g_instance.attr.attr_network.PostPortNumber);
|
|
fp = popen(strinfo.data, "r");
|
|
if (fp == NULL) {
|
|
ereport(LOG, (errmsg("Unable to use 'netstat' to read port info.")));
|
|
} else {
|
|
ereport(LOG, (errmsg("exec cmd: %s", strinfo.data)));
|
|
while (fgets(buf, sizeof(buf), fp) != NULL) {
|
|
ereport(LOG, (errmsg("<netstat>:%s", buf)));
|
|
}
|
|
pclose(fp);
|
|
}
|
|
|
|
pfree_ext(strinfo.data);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Postmaster main entry point
|
|
*/
|
|
int PostmasterMain(int argc, char* argv[])
|
|
{
|
|
int opt;
|
|
int status = STATUS_OK;
|
|
char* output_config_variable = NULL;
|
|
char* userDoption = NULL;
|
|
bool listen_addr_saved = false;
|
|
int use_pooler_port = -1;
|
|
int i;
|
|
GaussState state;
|
|
OptParseContext optCtxt;
|
|
errno_t rc = 0;
|
|
|
|
t_thrd.proc_cxt.MyProcPid = PostmasterPid = gs_thread_self();
|
|
|
|
t_thrd.proc_cxt.MyStartTime = time(NULL);
|
|
|
|
IsPostmasterEnvironment = true;
|
|
|
|
t_thrd.proc_cxt.MyProgName = "gaussmaster";
|
|
|
|
/*
|
|
* for security, no dir or file created can be group or other accessible
|
|
*/
|
|
umask(S_IRWXG | S_IRWXO);
|
|
|
|
/*
|
|
* Fire up essential subsystems: memory management
|
|
*/
|
|
|
|
/*
|
|
* By default, palloc() requests in the postmaster will be allocated in
|
|
* the t_thrd.mem_cxt.postmaster_mem_cxt, which is space that can be recycled by backends.
|
|
* Allocated data that needs to be available to backends should be
|
|
* allocated in t_thrd.top_mem_cxt.
|
|
*/
|
|
t_thrd.mem_cxt.postmaster_mem_cxt = AllocSetContextCreate(t_thrd.top_mem_cxt,
|
|
"Postmaster",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
MemoryContextSwitchTo(t_thrd.mem_cxt.postmaster_mem_cxt);
|
|
|
|
/**
|
|
* initialize version info.
|
|
*/
|
|
initialize_feature_flags();
|
|
|
|
/*
|
|
* @OBS
|
|
* Create a global OBS CA object shared among threads
|
|
*/
|
|
initOBSCacheObject();
|
|
|
|
S3_init();
|
|
|
|
/* set memory manager for minizip libs */
|
|
pm_set_unzip_memfuncs();
|
|
|
|
/* set memory manager for cJSON */
|
|
cJSON_Hooks hooks = {cJSON_internal_malloc, cJSON_internal_free};
|
|
cJSON_InitHooks(&hooks);
|
|
|
|
#ifdef ENABLE_LLVM_COMPILE
|
|
/*
|
|
* Prepare codegen enviroment.
|
|
*/
|
|
CodeGenProcessInitialize();
|
|
#endif
|
|
|
|
/* Initialize paths to installation files */
|
|
getInstallationPaths(argv[0]);
|
|
|
|
/*
|
|
* Options setup
|
|
*/
|
|
InitializeGUCOptions();
|
|
|
|
/*
|
|
*Initialize Callback function type of cb_for_getlc
|
|
*/
|
|
SetcbForGetLCName(GetLogicClusterForAlarm);
|
|
|
|
optCtxt.opterr = 1;
|
|
|
|
/*
|
|
* Parse command-line options. CAUTION: keep this in sync with
|
|
* tcop/postgres.c (the option sets should not conflict) and with the
|
|
* common help() function in main/main.c.
|
|
*/
|
|
initOptParseContext(&optCtxt);
|
|
while ((opt = getopt_r(argc, argv, "A:B:bc:C:D:d:EeFf:h:ijk:lM:N:nOo:Pp:Rr:S:sTt:u:W:-:", &optCtxt)) != -1) {
|
|
switch (opt) {
|
|
case 'A':
|
|
SetConfigOption("debug_assertions", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'B':
|
|
SetConfigOption("shared_buffers", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'b':
|
|
/* Undocumented flag used for binary upgrades */
|
|
u_sess->proc_cxt.IsBinaryUpgrade = true;
|
|
break;
|
|
|
|
case 'C':
|
|
output_config_variable = optCtxt.optarg;
|
|
break;
|
|
|
|
case 'D':
|
|
userDoption = optCtxt.optarg;
|
|
break;
|
|
|
|
case 'd':
|
|
set_debug_options(atoi(optCtxt.optarg), PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'E':
|
|
SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'e':
|
|
SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'F':
|
|
SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'f':
|
|
if (!set_plan_disabling_options(optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV)) {
|
|
write_stderr("%s: invalid argument for option -f: \"%s\"\n", progname, optCtxt.optarg);
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
break;
|
|
|
|
case 'h':
|
|
SetConfigOption("listen_addresses", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'i':
|
|
SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'j':
|
|
/* only used by interactive backend */
|
|
break;
|
|
|
|
case 'k':
|
|
SetConfigOption("unix_socket_directory", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'l':
|
|
SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'M':
|
|
if (0 == strncmp(optCtxt.optarg, "primary", strlen("primary")) &&
|
|
'\0' == optCtxt.optarg[strlen("primary")]) {
|
|
t_thrd.xlog_cxt.server_mode = PRIMARY_MODE;
|
|
} else if (0 == strncmp(optCtxt.optarg, "standby", strlen("standby")) &&
|
|
'\0' == optCtxt.optarg[strlen("standby")]) {
|
|
t_thrd.xlog_cxt.server_mode = STANDBY_MODE;
|
|
} else if (0 == strncmp(optCtxt.optarg, "pending", strlen("pending")) &&
|
|
'\0' == optCtxt.optarg[strlen("pending")]) {
|
|
t_thrd.xlog_cxt.server_mode = PENDING_MODE;
|
|
} else if (0 == strncmp(optCtxt.optarg, "normal", strlen("normal")) &&
|
|
'\0' == optCtxt.optarg[strlen("normal")]) {
|
|
t_thrd.xlog_cxt.server_mode = NORMAL_MODE;
|
|
} else if (0 == strncmp(optCtxt.optarg, "cascade_standby", strlen("cascade_standby")) &&
|
|
'\0' == optCtxt.optarg[strlen("cascade_standby")]) {
|
|
t_thrd.xlog_cxt.server_mode = STANDBY_MODE;
|
|
t_thrd.xlog_cxt.is_cascade_standby = true;
|
|
} else {
|
|
ereport(FATAL, (errmsg("the options of -M is not recognized")));
|
|
}
|
|
break;
|
|
|
|
case 'N':
|
|
SetConfigOption("max_connections", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'n':
|
|
/* Don't reinit shared mem after abnormal exit */
|
|
ereport(FATAL, (errmsg("the options of -n is deprecated")));
|
|
break;
|
|
|
|
case 'O':
|
|
SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'o':
|
|
/* Other options to pass to the backend on the command line */
|
|
rc = snprintf_s(g_instance.ExtraOptions + strlen(g_instance.ExtraOptions),
|
|
sizeof(g_instance.ExtraOptions) - strlen(g_instance.ExtraOptions),
|
|
sizeof(g_instance.ExtraOptions) - strlen(g_instance.ExtraOptions) - 1,
|
|
" %s",
|
|
optCtxt.optarg);
|
|
securec_check_ss(rc, "", "");
|
|
break;
|
|
|
|
case 'P':
|
|
SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'p':
|
|
SetConfigOption("port", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
case 'R':
|
|
/* indicate run as xlogreiver.Only used with -M standby */
|
|
dummyStandbyMode = true;
|
|
break;
|
|
#endif
|
|
case 'r':
|
|
/* only used by single-user backend */
|
|
break;
|
|
|
|
case 'S':
|
|
SetConfigOption("work_mem", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 's':
|
|
SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'T':
|
|
|
|
/*
|
|
* In the event that some backend dumps core, send SIGSTOP,
|
|
* rather than SIGQUIT, to all its peers. This lets the wily
|
|
* post_hacker collect core dumps from everyone.
|
|
*/
|
|
ereport(FATAL, (errmsg("the options of -T is deprecated")));
|
|
break;
|
|
|
|
case 't': {
|
|
const char* tmp = get_stats_option_name(optCtxt.optarg);
|
|
|
|
if (tmp != NULL) {
|
|
SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
|
|
} else {
|
|
write_stderr("%s: invalid argument for option -t: \"%s\"\n", progname, optCtxt.optarg);
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case 'u':
|
|
/* Undocumented version used for inplace or online upgrades */
|
|
errno = 0;
|
|
pg_atomic_write_u32(&WorkingGrandVersionNum, (uint32)strtoul(optCtxt.optarg, NULL, 10));
|
|
if (errno != 0 || pg_atomic_read_u32(&WorkingGrandVersionNum) > GRAND_VERSION_NUM) {
|
|
write_stderr("%s: invalid argument for option -u: \"%s\", GRAND_VERSION_NUM is %u\n",
|
|
progname,
|
|
optCtxt.optarg,
|
|
(uint32)GRAND_VERSION_NUM);
|
|
ExitPostmaster(1);
|
|
} else if (pg_atomic_read_u32(&WorkingGrandVersionNum) == INPLACE_UPGRADE_PRECOMMIT_VERSION) {
|
|
pg_atomic_write_u32(&WorkingGrandVersionNum, GRAND_VERSION_NUM);
|
|
InplaceUpgradePrecommit = true;
|
|
g_instance.comm_cxt.force_cal_space_info = true;
|
|
}
|
|
break;
|
|
|
|
case 'W':
|
|
SetConfigOption("post_auth_delay", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
break;
|
|
|
|
case 'c':
|
|
case '-': {
|
|
char* name = NULL;
|
|
char* value = NULL;
|
|
|
|
ParseLongOption(optCtxt.optarg, &name, &value);
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
if (opt == '-' && (strcmp(name, "coordinator") == 0 || strcmp(name, "datanode") == 0)) {
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("Single node mode: must start as single node (--single_node)\n")));
|
|
}
|
|
#endif
|
|
/* A Coordinator is being activated */
|
|
if (name != NULL && strcmp(name, "coordinator") == 0 && value == NULL)
|
|
g_instance.role = VCOORDINATOR;
|
|
else if (name != NULL && strcmp(name, "datanode") == 0 && value == NULL)
|
|
g_instance.role = VDATANODE;
|
|
/* A SingleDN mode is being activated */
|
|
else if (name != NULL && strcmp(name, "single_node") == 0 && value == NULL) {
|
|
g_instance.role = VSINGLENODE;
|
|
useLocalXid = true;
|
|
} else if (name != NULL && strcmp(name, "restoremode") == 0 && value == NULL) {
|
|
/*
|
|
* In restore mode both coordinator and datanode
|
|
* are internally treated as datanodes
|
|
*/
|
|
isRestoreMode = true;
|
|
g_instance.role = VDATANODE;
|
|
} else if (name != NULL && 0 == strncasecmp(name, "fenced", strlen(name)) && value == NULL)
|
|
FencedUDFMasterMode = true;
|
|
else if (name != NULL && strlen(name) == SECURITY_MODE_NAME_LEN &&
|
|
strncmp(name, SECURITY_MODE_NAME, SECURITY_MODE_NAME_LEN) == 0 && value == NULL) {
|
|
/* In securitymode, safety strategy is opened */
|
|
isSecurityMode = true;
|
|
} else {
|
|
/* default case */
|
|
if (value == NULL) {
|
|
if (opt == '-')
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR), errmsg("--%s requires a value", optCtxt.optarg)));
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR), errmsg("-c %s requires a value", optCtxt.optarg)));
|
|
}
|
|
|
|
SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
|
|
}
|
|
pfree(name);
|
|
|
|
if (value != NULL) {
|
|
pfree(value);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
write_stderr("Try \"%s --help\" for more information.\n", progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
}
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
/* single_node mode default role is VSINGLENODE and must be it */
|
|
if (g_instance.role == VUNKNOWN) {
|
|
g_instance.role = VSINGLENODE;
|
|
useLocalXid = true;
|
|
} else if (g_instance.role != VSINGLENODE) {
|
|
write_stderr("Single node mode: must start as single node.\n");
|
|
ExitPostmaster(1);
|
|
}
|
|
#else
|
|
|
|
if (!IS_PGXC_COORDINATOR && !IS_PGXC_DATANODE && !FencedUDFMasterMode) {
|
|
write_stderr(
|
|
"%s: Postgres-XC: must start as either a Coordinator (--coordinator) or Datanode (--datanode)\n", progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
#endif
|
|
/*
|
|
* Postmaster accepts no non-option switch arguments.
|
|
*/
|
|
if (optCtxt.optind < argc) {
|
|
write_stderr("%s: invalid argument: \"%s\"\n", progname, argv[optCtxt.optind]);
|
|
write_stderr("Try \"%s --help\" for more information.\n", progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
/*
|
|
* Locate the proper configuration files and data directory, and read
|
|
* postgresql.conf for the first time.
|
|
*/
|
|
if (FencedUDFMasterMode) {
|
|
/* disable bbox for fenced UDF process */
|
|
SetConfigOption("enable_bbox_dump", "false", PGC_POSTMASTER, PGC_S_ARGV);
|
|
} else if (!SelectConfigFiles(userDoption, progname)) {
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
if ((g_instance.attr.attr_security.transparent_encrypted_string != NULL &&
|
|
g_instance.attr.attr_security.transparent_encrypted_string[0] != '\0') &&
|
|
(g_instance.attr.attr_common.transparent_encrypt_kms_url != NULL &&
|
|
g_instance.attr.attr_common.transparent_encrypt_kms_url[0] != '\0') &&
|
|
(g_instance.attr.attr_security.transparent_encrypt_kms_region != NULL &&
|
|
g_instance.attr.attr_security.transparent_encrypt_kms_region[0] != '\0')) {
|
|
isSecurityMode = true;
|
|
}
|
|
|
|
/*
|
|
* Initiaize Postmaster level GUC option.
|
|
*
|
|
* Note that some guc can't get right value because of some global var not init,
|
|
* for example single_node mode,
|
|
* so need this function to init postmaster level guc.
|
|
*/
|
|
InitializePostmasterGUC();
|
|
|
|
t_thrd.myLogicTid = noProcLogicTid + POSTMASTER_LID;
|
|
if (output_config_variable != NULL) {
|
|
/*
|
|
* permission is handled because the user is reading inside the data
|
|
* dir
|
|
*/
|
|
puts(GetConfigOption(output_config_variable, false, false));
|
|
ExitPostmaster(0);
|
|
}
|
|
|
|
InitializeNumLwLockPartitions();
|
|
|
|
noProcLogicTid = GLOBAL_ALL_PROCS;
|
|
|
|
/* Run as FencedUDF master */
|
|
if (FencedUDFMasterMode) {
|
|
/* t_thrd.proc_cxt.DataDir must be set */
|
|
if (userDoption != NULL && userDoption[0] == '/') {
|
|
if (chdir(userDoption) == -1)
|
|
ExitPostmaster(1);
|
|
SetDataDir(userDoption);
|
|
} else {
|
|
ExitPostmaster(1);
|
|
}
|
|
/* init thread args pool for ever sub threads except signal moniter */
|
|
gs_thread_args_pool_init(GLOBAL_ALL_PROCS + EXTERN_SLOTS_NUM, sizeof(BackendParameters));
|
|
/* Init signal manage struct */
|
|
gs_signal_slots_init(GLOBAL_ALL_PROCS + EXTERN_SLOTS_NUM);
|
|
gs_signal_startup_siginfo("PostmasterMain");
|
|
|
|
gs_signal_monitor_startup();
|
|
} else {
|
|
/* Verify that t_thrd.proc_cxt.DataDir looks reasonable */
|
|
checkDataDir();
|
|
|
|
/* And switch working directory into it */
|
|
ChangeToDataDir();
|
|
/*
|
|
* Check for invalid combinations of GUC settings.
|
|
*/
|
|
CheckGUCConflicts();
|
|
|
|
/* Set parallel recovery config */
|
|
ConfigRecoveryParallelism();
|
|
/*
|
|
* Other one-time internal sanity checks can go here, if they are fast.
|
|
* (Put any slow processing further down, after postmaster.pid creation.)
|
|
*/
|
|
if (!CheckDateTokenTables()) {
|
|
write_stderr("%s: invalid datetoken tables, please fix\n", progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
/*
|
|
* Now that we are done processing the postmaster arguments, reset
|
|
* getopt(3) library so that it will work correctly in subprocesses.
|
|
*/
|
|
optCtxt.optind = 1;
|
|
#ifdef HAVE_INT_OPTRESET
|
|
optreset = 1; /* some systems need this too */
|
|
#endif
|
|
|
|
int rc1 = snprintf_s(
|
|
gaussdb_state_file, sizeof(gaussdb_state_file), MAXPGPATH - 1, "%s/gaussdb.state", t_thrd.proc_cxt.DataDir);
|
|
securec_check_intval(rc1, , -1);
|
|
gaussdb_state_file[MAXPGPATH - 1] = '\0';
|
|
if (!SetDBStateFileState(UNKNOWN_STATE, true)) {
|
|
write_stderr("Failed to set gaussdb.state with UNKNOWN_STATE");
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
/* For debugging: display postmaster environment */
|
|
{
|
|
extern char** environ;
|
|
char** p;
|
|
|
|
ereport(DEBUG3, (errmsg_internal("%s: PostmasterMain: initial environment dump:", progname)));
|
|
ereport(DEBUG3, (errmsg_internal("-----------------------------------------")));
|
|
|
|
for (p = environ; *p; ++p)
|
|
ereport(DEBUG3, (errmsg_internal("\t%s", *p)));
|
|
|
|
ereport(DEBUG3, (errmsg_internal("-----------------------------------------")));
|
|
}
|
|
|
|
rc = memcpy_s(g_alarmComponentPath, MAXPGPATH - 1, Alarm_component, strlen(Alarm_component));
|
|
securec_check_c(rc, "\0", "\0");
|
|
g_alarmReportInterval = AlarmReportInterval;
|
|
AlarmEnvInitialize();
|
|
|
|
/* check if the dek for transparent is correct */
|
|
if (IS_PGXC_COORDINATOR || IS_PGXC_DATANODE) {
|
|
if (!getAndCheckTranEncryptDEK()) {
|
|
ExitPostmaster(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Create lockfile for data directory.
|
|
*
|
|
* We want to do this before we try to grab the input sockets, because the
|
|
* data directory interlock is more reliable than the socket-file
|
|
* interlock (thanks to whoever decided to put socket files in /tmp :-().
|
|
* For the same reason, it's best to grab the TCP socket(s) before the
|
|
* Unix socket.
|
|
*/
|
|
CreateDataDirLockFile(true);
|
|
|
|
/* Module load callback */
|
|
pgaudit_agent_init();
|
|
auto_explain_init();
|
|
|
|
/*
|
|
* process any libraries that should be preloaded at postmaster start
|
|
*/
|
|
process_shared_preload_libraries();
|
|
|
|
/*
|
|
* Establish input sockets.
|
|
*/
|
|
for (i = 0; i < MAXLISTEN; i++)
|
|
t_thrd.postmaster_cxt.ListenSocket[i] = PGINVALID_SOCKET;
|
|
|
|
if (g_instance.attr.attr_network.ListenAddresses && !dummyStandbyMode) {
|
|
char* rawstring = NULL;
|
|
List* elemlist = NULL;
|
|
ListCell* l = NULL;
|
|
int success = 0;
|
|
|
|
/* Need a modifiable copy of g_instance.attr.attr_network.ListenAddresses */
|
|
rawstring = pstrdup(g_instance.attr.attr_network.ListenAddresses);
|
|
|
|
/* Parse string into list of identifiers */
|
|
if (!SplitIdentifierString(rawstring, ',', &elemlist)) {
|
|
/* syntax error in list */
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid list syntax for \"listen_addresses\"")));
|
|
}
|
|
|
|
foreach (l, elemlist) {
|
|
char* curhost = (char*)lfirst(l);
|
|
|
|
if (strcmp(curhost, "*") == 0)
|
|
status = StreamServerPort(AF_UNSPEC,
|
|
NULL,
|
|
(unsigned short)g_instance.attr.attr_network.PostPortNumber,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
true,
|
|
true,
|
|
false);
|
|
else
|
|
status = StreamServerPort(AF_UNSPEC,
|
|
curhost,
|
|
(unsigned short)g_instance.attr.attr_network.PostPortNumber,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
true,
|
|
true,
|
|
false);
|
|
|
|
if (status == STATUS_OK)
|
|
success++;
|
|
else {
|
|
print_port_info();
|
|
ereport(FATAL,
|
|
(errmsg("could not create listen socket for \"%s:%d\"",
|
|
curhost,
|
|
g_instance.attr.attr_network.PostPortNumber)));
|
|
}
|
|
|
|
/* At present, we do not listen replconn channels under NORMAL_MODE, so pooler port is needed */
|
|
use_pooler_port = NeedPoolerPort(curhost);
|
|
if (t_thrd.xlog_cxt.server_mode == NORMAL_MODE || use_pooler_port == -1) {
|
|
/* In om and other maintenance tools, pooler port is hardwired to be gsql port plus one */
|
|
if (g_instance.attr.attr_network.PoolerPort != (g_instance.attr.attr_network.PostPortNumber + 1)) {
|
|
ereport(FATAL, (errmsg("pooler_port must equal to gsql listen port plus one!")));
|
|
}
|
|
|
|
if (strcmp(curhost, "*") == 0) {
|
|
status = StreamServerPort(AF_UNSPEC,
|
|
NULL,
|
|
(unsigned short)g_instance.attr.attr_network.PoolerPort,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
false,
|
|
false,
|
|
false);
|
|
} else {
|
|
status = StreamServerPort(AF_UNSPEC,
|
|
curhost,
|
|
(unsigned short)g_instance.attr.attr_network.PoolerPort,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
false,
|
|
false,
|
|
false);
|
|
}
|
|
|
|
if (status != STATUS_OK)
|
|
ereport(FATAL,
|
|
(errmsg("could not create ha listen socket for \"%s:%d\"",
|
|
curhost,
|
|
g_instance.attr.attr_network.PoolerPort)));
|
|
|
|
/*
|
|
* Record the first successful host addr which does not mean 'localhost' in lockfile.
|
|
* Inner maintanence tools, such as cm_agent and gs_ctl, will use that host for connecting cn.
|
|
*/
|
|
if (!listen_addr_saved && !IsInplicitIp(curhost)) {
|
|
AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
|
|
listen_addr_saved = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!success && list_length(elemlist))
|
|
ereport(FATAL, (errmsg("could not create any TCP/IP sockets")));
|
|
|
|
list_free(elemlist);
|
|
pfree(rawstring);
|
|
}
|
|
|
|
if (t_thrd.xlog_cxt.server_mode != NORMAL_MODE) {
|
|
int i = 0;
|
|
int success = 0;
|
|
|
|
for (i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
if (t_thrd.postmaster_cxt.ReplConnArray[i] != NULL) {
|
|
if (!listen_addr_saved &&
|
|
!IsInplicitIp(t_thrd.postmaster_cxt.ReplConnArray[i]->localhost)) {
|
|
AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR,
|
|
t_thrd.postmaster_cxt.ReplConnArray[i]->localhost);
|
|
listen_addr_saved = true;
|
|
}
|
|
if (IsAlreadyListen(t_thrd.postmaster_cxt.ReplConnArray[i]->localhost,
|
|
t_thrd.postmaster_cxt.ReplConnArray[i]->localport)) {
|
|
success++;
|
|
continue;
|
|
}
|
|
|
|
status = StreamServerPort(AF_UNSPEC,
|
|
t_thrd.postmaster_cxt.ReplConnArray[i]->localhost,
|
|
(unsigned short)t_thrd.postmaster_cxt.ReplConnArray[i]->localport,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
false,
|
|
false,
|
|
false);
|
|
if (status == STATUS_OK) {
|
|
success++;
|
|
} else {
|
|
ReportAlarmAbnormalDataHAInstListeningSocket();
|
|
ereport(FATAL,
|
|
(errmsg("could not create Ha listen socket for ReplConnInfoArr[%d]\"%s:%d\"",
|
|
i,
|
|
t_thrd.postmaster_cxt.ReplConnArray[i]->localhost,
|
|
t_thrd.postmaster_cxt.ReplConnArray[i]->localport)));
|
|
}
|
|
}
|
|
}
|
|
if (success == 0) {
|
|
ReportAlarmAbnormalDataHAInstListeningSocket();
|
|
|
|
ereport(WARNING, (errmsg("could not create any HA TCP/IP sockets")));
|
|
}
|
|
ReportResumeAbnormalDataHAInstListeningSocket();
|
|
}
|
|
|
|
#ifdef USE_BONJOUR
|
|
|
|
/* Register for Bonjour only if we opened TCP socket(s) */
|
|
if (g_instance.attr.attr_common.enable_bonjour && t_thrd.postmaster_cxt.ListenSocket[0] != PGINVALID_SOCKET) {
|
|
DNSServiceErrorType err;
|
|
|
|
/*
|
|
* We pass 0 for interface_index, which will result in registering on
|
|
* all "applicable" interfaces. It's not entirely clear from the
|
|
* DNS-SD docs whether this would be appropriate if we have bound to
|
|
* just a subset of the available network interfaces.
|
|
*/
|
|
err = DNSServiceRegister(&bonjour_sdref,
|
|
0,
|
|
0,
|
|
g_instance.attr.attr_common.bonjour_name,
|
|
"_postgresql._tcp.",
|
|
NULL,
|
|
NULL,
|
|
htons(g_instance.attr.attr_network.PostPortNumber),
|
|
0,
|
|
NULL,
|
|
NULL,
|
|
NULL);
|
|
|
|
if (err != kDNSServiceErr_NoError)
|
|
ereport(LOG, (errmsg("DNSServiceRegister() failed: error code %ld", (long)err)));
|
|
|
|
/*
|
|
* We don't bother to read the mDNS daemon's reply, and we expect that
|
|
* it will automatically terminate our registration when the socket is
|
|
* closed at postmaster termination. So there's nothing more to be
|
|
* done here. However, the bonjour_sdref is kept around so that
|
|
* forked children can close their copies of the socket.
|
|
*/
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_UNIX_SOCKETS
|
|
if (!dummyStandbyMode) {
|
|
/* unix socket for gsql port */
|
|
status = StreamServerPort(AF_UNIX,
|
|
NULL,
|
|
(unsigned short)g_instance.attr.attr_network.PostPortNumber,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
false,
|
|
true,
|
|
false);
|
|
|
|
if (status != STATUS_OK)
|
|
ereport(FATAL,
|
|
(errmsg("could not create Unix-domain socket for \"%s:%d\"",
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
g_instance.attr.attr_network.PostPortNumber)));
|
|
|
|
/* unix socket for ha port */
|
|
status = StreamServerPort(AF_UNIX,
|
|
NULL,
|
|
(unsigned short)g_instance.attr.attr_network.PoolerPort,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
false,
|
|
false,
|
|
false);
|
|
|
|
if (status != STATUS_OK)
|
|
ereport(FATAL,
|
|
(errmsg("could not create Unix-domain socket for \"%s:%d\"",
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
g_instance.attr.attr_network.PoolerPort)));
|
|
|
|
/*
|
|
* create listened unix domain socket to receive gs_sock
|
|
* from receiver_loop thread.
|
|
* NOTE: as we will use global variable sock_path to init libcomm later,
|
|
* so this socket must be the last created unix domain socket.
|
|
* otherwise, the sock_path will be replace by other path.
|
|
*/
|
|
if (g_instance.attr.attr_storage.comm_cn_dn_logic_conn && !isRestoreMode && !IS_SINGLE_NODE) {
|
|
status = StreamServerPort(AF_UNIX,
|
|
NULL,
|
|
(unsigned short)g_instance.attr.attr_network.comm_sctp_port,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
false,
|
|
true,
|
|
true);
|
|
|
|
if (status != STATUS_OK)
|
|
ereport(WARNING, (errmsg("could not create Unix-domain for comm socket")));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* check that we have some socket to listen on
|
|
*/
|
|
if (t_thrd.postmaster_cxt.ListenSocket[0] == PGINVALID_SOCKET) {
|
|
ereport(FATAL, (errmsg("no socket created for listening")));
|
|
}
|
|
|
|
/*
|
|
* Set up an on_proc_exit function that's charged with closing the sockets
|
|
* again at postmaster shutdown. You might think we should have done this
|
|
* earlier, but we want it to run before not after the proc_exit callback
|
|
* that will remove the Unix socket file.
|
|
*/
|
|
on_proc_exit(CloseServerPorts, 0);
|
|
|
|
/*
|
|
* If no valid TCP ports, write an empty line for listen address,
|
|
* indicating the Unix socket must be used. Note that this line is not
|
|
* added to the lock file until there is a socket backing it.
|
|
*/
|
|
if (!listen_addr_saved) {
|
|
AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
|
|
ereport(WARNING, (errmsg("No explicit IP is configured for listen_addresses GUC.")));
|
|
}
|
|
|
|
if (g_instance.attr.attr_common.enable_thread_pool) {
|
|
/* No need to start thread pool for dummy standby node. */
|
|
if (!g_instance.attr.attr_storage.comm_cn_dn_logic_conn && !dummyStandbyMode) {
|
|
g_threadPoolControler = (ThreadPoolControler*)
|
|
New(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR)) ThreadPoolControler();
|
|
g_threadPoolControler->SetThreadPoolInfo();
|
|
} else {
|
|
g_instance.attr.attr_common.enable_thread_pool = false;
|
|
g_threadPoolControler = NULL;
|
|
}
|
|
}
|
|
|
|
InitGlobalBcm();
|
|
|
|
/*
|
|
* Set up shared memory and semaphores.
|
|
*/
|
|
reset_shared(g_instance.attr.attr_network.PostPortNumber);
|
|
|
|
/* Alloc array for backend record. */
|
|
BackendArrayAllocation();
|
|
|
|
/* init thread args pool for ever sub threads except signal moniter */
|
|
gs_thread_args_pool_init(GLOBAL_ALL_PROCS + EXTERN_SLOTS_NUM, sizeof(BackendParameters));
|
|
// 1.init signal manage struct
|
|
//
|
|
gs_signal_slots_init(GLOBAL_ALL_PROCS + EXTERN_SLOTS_NUM);
|
|
gs_signal_startup_siginfo("PostmasterMain");
|
|
|
|
gs_signal_monitor_startup();
|
|
|
|
/*
|
|
* Estimate number of openable files. This must happen after setting up
|
|
* semaphores, because on some platforms semaphores count as open files.
|
|
*/
|
|
SetHaShmemData();
|
|
|
|
rc = memset_s(&state, sizeof(state), 0, sizeof(state));
|
|
securec_check(rc, "", "");
|
|
state.conn_num = t_thrd.postmaster_cxt.HaShmData->repl_list_num;
|
|
state.mode = t_thrd.postmaster_cxt.HaShmData->current_mode;
|
|
state.state = STARTING_STATE;
|
|
state.lsn = 0;
|
|
state.term = 0;
|
|
state.sync_stat = false;
|
|
state.ha_rebuild_reason = NONE_REBUILD;
|
|
PMSetDBStateFile(&state);
|
|
ereport(LOG,
|
|
(errmsg("create gaussdb state file success: db state(STARTING_STATE), server mode(%s)",
|
|
wal_get_role_string(t_thrd.postmaster_cxt.HaShmData->current_mode))));
|
|
set_max_safe_fds();
|
|
|
|
/*
|
|
* Set reference point for stack-depth checking.
|
|
*/
|
|
set_stack_base();
|
|
|
|
/*
|
|
* Initialize the list of active backends.
|
|
*/
|
|
g_instance.backend_list = DLNewList();
|
|
|
|
/*
|
|
* Initialize pipe (or process handle on Windows) that allows children to
|
|
* wake up from sleep on postmaster death.
|
|
*/
|
|
InitPostmasterDeathWatchHandle();
|
|
|
|
#ifdef WIN32
|
|
|
|
/*
|
|
* Initialize I/O completion port used to deliver list of dead children.
|
|
*/
|
|
win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
|
|
|
|
if (win32ChildQueue == NULL)
|
|
ereport(FATAL, (errmsg("could not create I/O completion port for child queue")));
|
|
|
|
#endif
|
|
|
|
/*
|
|
* Record postmaster options. We delay this till now to avoid recording
|
|
* bogus options (eg, NBuffers too high for available memory).
|
|
*/
|
|
if (!CreateOptsFile(argc, (const char**)argv, (const char*)my_exec_path))
|
|
ExitPostmaster(1);
|
|
|
|
#ifdef EXEC_BACKEND
|
|
/* Write out nondefault GUC settings for child processes to use */
|
|
write_nondefault_variables(PGC_POSTMASTER);
|
|
#endif
|
|
}
|
|
#if defined (ENABLE_MULTIPLE_NODES) || defined (ENABLE_PRIVATEGAUSS)
|
|
/* init hotpatch */
|
|
if (hotpatch_remove_signal_file(t_thrd.proc_cxt.DataDir) == HP_OK) {
|
|
int ret;
|
|
ret = hotpatch_init(t_thrd.proc_cxt.DataDir, (HOTPATCH_LOG_FUNC)gs_hotpatch_log_callback);
|
|
if (ret != HP_OK) {
|
|
write_stderr("hotpatch init failed ret is %d!\n", ret);
|
|
}
|
|
}
|
|
#endif
|
|
/*
|
|
* Write the external PID file if requested
|
|
*/
|
|
if (g_instance.attr.attr_common.external_pid_file) {
|
|
FILE* fpidfile = fopen(g_instance.attr.attr_common.external_pid_file, "w");
|
|
|
|
if (fpidfile != NULL) {
|
|
fprintf(fpidfile, "%lu\n", t_thrd.proc_cxt.MyProcPid);
|
|
fclose(fpidfile);
|
|
/* Should we remove the pid file on postmaster exit? */
|
|
|
|
/* Make PID file world readable */
|
|
if (chmod(g_instance.attr.attr_common.external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
|
|
write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
|
|
progname,
|
|
g_instance.attr.attr_common.external_pid_file,
|
|
gs_strerror(errno));
|
|
} else
|
|
write_stderr("%s: could not write external PID file \"%s\": %s\n",
|
|
progname,
|
|
g_instance.attr.attr_common.external_pid_file,
|
|
gs_strerror(errno));
|
|
}
|
|
|
|
/* If start with fenced mode, we just startup as fenced mode */
|
|
if (FencedUDFMasterMode) {
|
|
/*
|
|
* If enabled, start up syslogger collection subprocess
|
|
*/
|
|
g_instance.attr.attr_common.Logging_collector = true;
|
|
g_instance.pid_cxt.SysLoggerPID = SysLogger_Start();
|
|
FencedUDFMasterMain(0, NULL);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Set up signal handlers for the postmaster process.
|
|
*
|
|
* CAUTION: when changing this list, check for side-effects on the signal
|
|
* handling setup of child processes. See tcop/postgres.c,
|
|
* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
|
|
* postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
|
|
* postmaster/syslogger.c and postmaster/checkpointer.c.
|
|
*/
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
gs_signal_block_sigusr2();
|
|
|
|
(void)gspqsignal(SIGHUP, SIGHUP_handler); /* reread config file and have
|
|
* children do same */
|
|
(void)gspqsignal(SIGINT, pmdie); /* send SIGTERM and shut down */
|
|
(void)gspqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */
|
|
(void)gspqsignal(SIGTERM, pmdie); /* wait for children and shut down */
|
|
|
|
pqsignal(SIGALRM, SIG_IGN); /* ignored */
|
|
pqsignal(SIGPIPE, SIG_IGN); /* ignored */
|
|
pqsignal(SIGFPE, FloatExceptionHandler);
|
|
|
|
(void)gspqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
|
|
(void)gspqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
|
|
(void)gspqsignal(SIGCHLD, reaper); /* handle child termination */
|
|
(void)gspqsignal(SIGTTIN, SIG_IGN); /* ignored */
|
|
(void)gspqsignal(SIGTTOU, SIG_IGN); /* ignored */
|
|
|
|
/* ignore SIGXFSZ, so that ulimit violations work like disk full */
|
|
#ifdef SIGXFSZ
|
|
(void)gspqsignal(SIGXFSZ, SIG_IGN); /* ignored */
|
|
#endif
|
|
|
|
/* core dump injection */
|
|
bbox_initialize();
|
|
|
|
/*
|
|
* Initialize stats collection subsystem (this does NOT start the
|
|
* collector process!)
|
|
*/
|
|
pgstat_init();
|
|
|
|
/* initialize workload manager */
|
|
InitializeWorkloadManager();
|
|
|
|
/* Init proc's subxid cache context, parent is g_instance.instance_context */
|
|
ProcSubXidCacheContext = AllocSetContextCreate(g_instance.instance_context,
|
|
"ProcSubXidCacheContext",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE,
|
|
SHARED_CONTEXT);
|
|
|
|
/*
|
|
* Create StreamInfoContext for stream thread connection, parent is g_instance.instance_context.
|
|
* All stream threads will share this context.
|
|
*/
|
|
StreamInfoContext = AllocSetContextCreate(g_instance.instance_context,
|
|
"StreamInfoContext",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE,
|
|
SHARED_CONTEXT);
|
|
|
|
/* create global cache memory context */
|
|
knl_g_cachemem_create();
|
|
|
|
/* create node group cache hash table */
|
|
ngroup_info_hash_create();
|
|
/* init unique sql */
|
|
InitUniqueSQL();
|
|
/* init hypo index */
|
|
InitHypopg();
|
|
InitAsp();
|
|
/* init instr user */
|
|
InitInstrUser();
|
|
/* init Opfusion function id */
|
|
InitOpfusionFunctionId();
|
|
/* init capture view */
|
|
init_capture_view();
|
|
/* init percentile */
|
|
InitPercentile();
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
/* init compaction */
|
|
CompactionProcess::init_instance();
|
|
/*
|
|
* Set up TsStoreTagsCache
|
|
*/
|
|
if (g_instance.attr.attr_common.enable_tsdb) {
|
|
TagsCacheMgr::GetInstance().init();
|
|
PartIdMgr::GetInstance().init();
|
|
Tsdb::PartCacheMgr::GetInstance().init();
|
|
InitExtensiblePlanMethodsHashTable();
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* If enabled, start up syslogger collection subprocess
|
|
*/
|
|
g_instance.pid_cxt.SysLoggerPID = SysLogger_Start();
|
|
|
|
if (IS_PGXC_DATANODE && !dummyStandbyMode && !isRestoreMode) {
|
|
StreamObj::startUp();
|
|
StreamNodeGroup::StartUp();
|
|
pthread_mutex_init(&nodeDefCopyLock, NULL);
|
|
}
|
|
|
|
/* init the usedDnSpace hash table */
|
|
InitDnHashTable();
|
|
|
|
/*
|
|
* Load configuration files for client authentication.
|
|
* Load pg_hba.conf before communication thread.
|
|
*/
|
|
int loadhbaCount = 0;
|
|
while (!load_hba()) {
|
|
loadhbaCount++;
|
|
pg_usleep(200000L); // slepp 200ms for reload
|
|
if (loadhbaCount >= 3) {
|
|
/*
|
|
* It makes no sense to continue if we fail to load the HBA file,
|
|
* since there is no way to connect to the database in this case.
|
|
*/
|
|
ereport(FATAL, (errmsg("could not load pg_hba.conf")));
|
|
}
|
|
}
|
|
|
|
if ((!IS_SINGLE_NODE) &&
|
|
((IS_PGXC_DATANODE && !dummyStandbyMode && !isRestoreMode) ||
|
|
(IS_PGXC_COORDINATOR && g_instance.attr.attr_storage.comm_cn_dn_logic_conn && !isRestoreMode))) {
|
|
status = init_stream_comm();
|
|
if (status != STATUS_OK)
|
|
ereport(FATAL, (errmsg("Init libcomm for stream failed, maybe listen port already in use")));
|
|
}
|
|
|
|
if (g_instance.attr.attr_storage.enable_adio_function)
|
|
AioResourceInitialize();
|
|
/* start alarm checker thread. */
|
|
if (!dummyStandbyMode)
|
|
g_instance.pid_cxt.AlarmCheckerPID = startAlarmChecker();
|
|
|
|
/* start reaper backend thread which is always alive. */
|
|
g_instance.pid_cxt.ReaperBackendPID = initialize_util_thread(REAPER);
|
|
|
|
/*
|
|
* Reset whereToSendOutput from DestDebug (its starting state) to
|
|
* DestNone. This stops ereport from sending log messages to stderr unless
|
|
* t_thrd.aes_cxt.Log_destination permits. We don't do this until the postmaster is
|
|
* fully launched, since startup failures may as well be reported to
|
|
* stderr.
|
|
*/
|
|
t_thrd.postgres_cxt.whereToSendOutput = DestNone;
|
|
|
|
/*
|
|
* Initialize the autovacuum subsystem (again, no process start yet)
|
|
*/
|
|
autovac_init();
|
|
|
|
load_ident();
|
|
|
|
/*
|
|
* Remove old temporary files. At this point there can be no other
|
|
* Postgres processes running in this directory, so this should be safe.
|
|
*/
|
|
RemovePgTempFiles();
|
|
|
|
RemoveErrorCacheFiles();
|
|
/*
|
|
* Remember postmaster startup time
|
|
*/
|
|
t_thrd.time_cxt.pg_start_time = GetCurrentTimestamp();
|
|
|
|
/* node stat validation timestamp */
|
|
gs_lock_test_and_set_64(&g_instance.stat_cxt.NodeStatResetTime, GetCurrentTimestamp());
|
|
|
|
/* PostmasterRandom wants its own copy */
|
|
gettimeofday(&t_thrd.postmaster_cxt.random_start_time, NULL);
|
|
|
|
/*
|
|
* We're ready to rock and roll...
|
|
*/
|
|
g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP);
|
|
Assert(g_instance.pid_cxt.StartupPID != 0);
|
|
pmState = PM_STARTUP;
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
|
|
if (IS_PGXC_COORDINATOR) {
|
|
MemoryContext oldcontext = MemoryContextSwitchTo(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT));
|
|
|
|
/*
|
|
* Initialize the Data Node connection pool.
|
|
* pooler thread don't exist any more, StartPoolManager() is an alias of
|
|
* PoolManagerInit().
|
|
*/
|
|
StartPoolManager();
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
}
|
|
|
|
load_searchserver_library();
|
|
#endif
|
|
|
|
if (status == STATUS_OK)
|
|
status = ServerLoop();
|
|
|
|
/*
|
|
* ServerLoop probably shouldn't ever return, but if it does, close down.
|
|
*/
|
|
ExitPostmaster(status != STATUS_OK);
|
|
|
|
return 0; /* not reached */
|
|
}
|
|
|
|
/*
|
|
* Compute and check the directory paths to files that are part of the
|
|
* installation (as deduced from the postgres executable's own location)
|
|
*/
|
|
static void getInstallationPaths(const char* argv0)
|
|
{
|
|
DIR* pdir = NULL;
|
|
|
|
/* Locate the postgres executable itself */
|
|
if (find_my_exec(argv0, my_exec_path) < 0) {
|
|
ereport(FATAL, (errmsg("%s: could not locate my own executable path", argv0)));
|
|
}
|
|
|
|
#ifdef EXEC_BACKEND
|
|
|
|
/* Locate executable backend before we change working directory */
|
|
if (find_other_exec(argv0, "gaussdb", PG_BACKEND_VERSIONSTR, t_thrd.proc_cxt.postgres_exec_path) < 0) {
|
|
ereport(FATAL, (errmsg("%s: could not locate matching postgres executable", argv0)));
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* Locate the pkglib directory --- this has to be set early in case we try
|
|
* to load any modules from it in response to postgresql.conf entries.
|
|
*/
|
|
get_pkglib_path(my_exec_path, t_thrd.proc_cxt.pkglib_path);
|
|
|
|
/*
|
|
* Verify that there's a readable directory there; otherwise the Postgres
|
|
* installation is incomplete or corrupt. (A typical cause of this
|
|
* failure is that the postgres executable has been moved or hardlinked to
|
|
* some directory that's not a sibling of the installation lib/
|
|
* directory.)
|
|
*/
|
|
pdir = AllocateDir(t_thrd.proc_cxt.pkglib_path);
|
|
|
|
if (pdir == NULL) {
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not open directory \"%s\": %m", t_thrd.proc_cxt.pkglib_path),
|
|
errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been "
|
|
"moved away from its proper location.",
|
|
my_exec_path)));
|
|
}
|
|
|
|
FreeDir(pdir);
|
|
|
|
/*
|
|
* XXX is it worth similarly checking the share/ directory? If the lib/
|
|
* directory is there, then share/ probably is too.
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* Validate the proposed data directory
|
|
*/
|
|
static void checkDataDir(void)
|
|
{
|
|
#define BUILD_TAG_START "build_completed.start"
|
|
char path[MAXPGPATH];
|
|
char identFile[MAXPGPATH] = {0};
|
|
FILE* fp = NULL;
|
|
struct stat stat_buf;
|
|
|
|
Assert(t_thrd.proc_cxt.DataDir);
|
|
|
|
int rc = snprintf_s(identFile, sizeof(identFile), MAXPGPATH - 1, "%s/%s", t_thrd.proc_cxt.DataDir, BUILD_TAG_START);
|
|
securec_check_intval(rc, , );
|
|
if (0 == stat(identFile, &stat_buf)) {
|
|
write_stderr(
|
|
"%s: Uncompleted build is detected, please build again and then start database after its success.\n",
|
|
progname);
|
|
ExitPostmaster(2);
|
|
}
|
|
|
|
if (stat(t_thrd.proc_cxt.DataDir, &stat_buf) != 0) {
|
|
if (errno == ENOENT)
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(), errmsg("data directory \"%s\" does not exist", t_thrd.proc_cxt.DataDir)));
|
|
else
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not read permissions of directory \"%s\": %m", t_thrd.proc_cxt.DataDir)));
|
|
}
|
|
|
|
/* eventual chdir would fail anyway, but let's test ... */
|
|
if (!S_ISDIR(stat_buf.st_mode))
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
errmsg("specified data directory \"%s\" is not a directory", t_thrd.proc_cxt.DataDir)));
|
|
|
|
/*
|
|
* Check that the directory belongs to my userid; if not, reject.
|
|
*
|
|
* This check is an essential part of the interlock that prevents two
|
|
* postmasters from starting in the same directory (see CreateLockFile()).
|
|
* Do not remove or weaken it.
|
|
*
|
|
* XXX can we safely enable this check on Windows?
|
|
*/
|
|
#if !defined(WIN32) && !defined(__CYGWIN__)
|
|
|
|
if (stat_buf.st_uid != geteuid())
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
errmsg("data directory \"%s\" has wrong ownership", t_thrd.proc_cxt.DataDir),
|
|
errhint("The server must be started by the user that owns the data directory.")));
|
|
|
|
#endif
|
|
|
|
/*
|
|
* Check if the directory has group or world access. If so, reject.
|
|
*
|
|
* It would be possible to allow weaker constraints (for example, allow
|
|
* group access) but we cannot make a general assumption that that is
|
|
* okay; for example there are platforms where nearly all users
|
|
* customarily belong to the same group. Perhaps this test should be
|
|
* configurable.
|
|
*
|
|
* XXX temporarily suppress check when on Windows, because there may not
|
|
* be proper support for Unix-y file permissions. Need to think of a
|
|
* reasonable check to apply on Windows.
|
|
*/
|
|
#if !defined(WIN32) && !defined(__CYGWIN__)
|
|
|
|
if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
errmsg("data directory \"%s\" has group or world access", t_thrd.proc_cxt.DataDir),
|
|
errdetail("Permissions should be u=rwx (0700).")));
|
|
|
|
#endif
|
|
|
|
/* Look for PG_VERSION before looking for pg_control */
|
|
ValidatePgVersion(t_thrd.proc_cxt.DataDir);
|
|
|
|
int ret = snprintf_s(path, sizeof(path), MAXPGPATH - 1, "%s/global/pg_control", t_thrd.proc_cxt.DataDir);
|
|
securec_check_intval(ret, , );
|
|
fp = AllocateFile(path, PG_BINARY_R);
|
|
|
|
if (fp == NULL) {
|
|
write_stderr("%s: could not find the database system\n"
|
|
"Expected to find it in the directory \"%s\",\n"
|
|
"but could not open file \"%s\": %s\n",
|
|
progname,
|
|
t_thrd.proc_cxt.DataDir,
|
|
path,
|
|
gs_strerror(errno));
|
|
ExitPostmaster(2);
|
|
}
|
|
|
|
FreeFile(fp);
|
|
}
|
|
|
|
static void CheckExtremeRtoGUCConflicts(void)
|
|
{
|
|
const int minReceiverBufSize = 32 * 1024;
|
|
if ((g_instance.attr.attr_storage.recovery_parse_workers > 1) && IS_DN_DUMMY_STANDYS_MODE()) {
|
|
ereport(WARNING,
|
|
(errcode(ERRCODE_SYSTEM_ERROR),
|
|
errmsg("when starting as dummy_standby mode, we couldn't support extreme rto."),
|
|
errhint("so down extreme rto")));
|
|
g_instance.attr.attr_storage.recovery_parse_workers = 1;
|
|
}
|
|
|
|
if ((g_instance.attr.attr_storage.recovery_parse_workers > 1) &&
|
|
g_instance.attr.attr_storage.WalReceiverBufSize < minReceiverBufSize) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYSTEM_ERROR),
|
|
errmsg("when starting extreme rto, wal receiver buf should not smaller than %dMB",
|
|
minReceiverBufSize / 1024),
|
|
errhint("recommend config \"wal_receiver_buffer_size=64MB\"")));
|
|
}
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
if ((g_instance.attr.attr_storage.recovery_parse_workers > 1) && g_instance.attr.attr_storage.EnableHotStandby) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYSTEM_ERROR),
|
|
errmsg("extreme rto could not support hot standby."),
|
|
errhint("Either turn off extreme rto, or turn off hot_standby.")));
|
|
}
|
|
#endif
|
|
}
|
|
static void CheckRecoveryParaConflict()
|
|
{
|
|
if (g_instance.attr.attr_storage.max_recovery_parallelism > RECOVERY_PARALLELISM_DEFAULT
|
|
&& IS_DN_DUMMY_STANDYS_MODE()) {
|
|
ereport(WARNING,
|
|
(errmsg("when starting as dummy_standby mode, we couldn't support parallel redo, down it")));
|
|
g_instance.attr.attr_storage.max_recovery_parallelism = RECOVERY_PARALLELISM_DEFAULT;
|
|
}
|
|
}
|
|
|
|
static void CheckGUCConflictsMaxConnections()
|
|
{
|
|
if (g_instance.attr.attr_network.ReservedBackends >= g_instance.attr.attr_network.MaxConnections) {
|
|
write_stderr("%s: sysadmin_reserved_connections must be less than max_connections\n", progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
if (g_instance.attr.attr_storage.max_wal_senders >= g_instance.attr.attr_network.MaxConnections) {
|
|
write_stderr("%s: max_wal_senders must be less than max_connections\n", progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Check for invalid combinations of GUC settings during starting up.
|
|
*/
|
|
static void CheckGUCConflicts(void)
|
|
{
|
|
CheckGUCConflictsMaxConnections();
|
|
if (dummyStandbyMode && STANDBY_MODE != t_thrd.xlog_cxt.server_mode) {
|
|
write_stderr("%s: dummy standby should be running under standby mode\n", progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
if (u_sess->attr.attr_common.XLogArchiveMode && g_instance.attr.attr_storage.wal_level == WAL_LEVEL_MINIMAL)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYSTEM_ERROR),
|
|
errmsg(
|
|
"WAL archival (archive_mode=on) requires wal_level \"archive\", \"hot_standby\" or \"logical\"")));
|
|
|
|
if (g_instance.attr.attr_storage.max_wal_senders > 0 && g_instance.attr.attr_storage.wal_level == WAL_LEVEL_MINIMAL)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYSTEM_ERROR),
|
|
errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"archive\", \"hot_standby\" or "
|
|
"\"logical\"")));
|
|
|
|
if (g_instance.attr.attr_storage.EnableHotStandby == true &&
|
|
g_instance.attr.attr_storage.wal_level < WAL_LEVEL_HOT_STANDBY)
|
|
ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR),
|
|
errmsg("hot standby is not possible because wal_level was not set to \"hot_standby\""),
|
|
errhint("Either set wal_level to \"hot_standby\", or turn off hot_standby.")));
|
|
|
|
if ((g_instance.attr.attr_storage.wal_level == WAL_LEVEL_MINIMAL ||
|
|
g_instance.attr.attr_storage.max_wal_senders < 1) &&
|
|
(t_thrd.xlog_cxt.server_mode == PRIMARY_MODE || t_thrd.xlog_cxt.server_mode == PENDING_MODE ||
|
|
t_thrd.xlog_cxt.server_mode == STANDBY_MODE))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYSTEM_ERROR),
|
|
errmsg("when starting as dual mode, we must ensure wal_level was not \"minimal\" and max_wal_senders "
|
|
"was set at least 1")));
|
|
|
|
if (u_sess->attr.attr_storage.enable_data_replicate && IS_DN_MULTI_STANDYS_MODE()) {
|
|
/* when init, we should force change the option */
|
|
ereport(LOG, (errmsg("when starting as multi_standby mode, we couldn't support data replicaton.")));
|
|
u_sess->attr.attr_storage.enable_data_replicate = false;
|
|
}
|
|
|
|
CheckRecoveryParaConflict();
|
|
|
|
if (g_instance.attr.attr_storage.enable_mix_replication &&
|
|
g_instance.attr.attr_storage.MaxSendSize >= g_instance.attr.attr_storage.DataQueueBufSize) {
|
|
write_stderr("%s: the data queue buffer size must be larger than the wal sender max send size for the "
|
|
"replication data synchronized by the WAL streaming.\n",
|
|
progname);
|
|
ExitPostmaster(1);
|
|
}
|
|
CheckExtremeRtoGUCConflicts();
|
|
}
|
|
|
|
static bool save_backend_variables_for_callback_thread()
|
|
{
|
|
Port port;
|
|
|
|
/* This entry point passes dummy values for the Port variables */
|
|
int ss_rc = memset_s(&port, sizeof(port), 0, sizeof(port));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
|
|
/*
|
|
* Socket 0 may be closed if we do not use it, so we
|
|
* must set socket to invalid socket instead of 0.
|
|
*/
|
|
port.sock = PGINVALID_SOCKET;
|
|
|
|
return save_backend_variables(&backend_save_para, &port);
|
|
}
|
|
|
|
/*
|
|
* Main idle loop of postmaster
|
|
*/
|
|
static int ServerLoop(void)
|
|
{
|
|
fd_set readmask;
|
|
ReplicationSlot *obs_slot = NULL;
|
|
int nSockets;
|
|
uint64 this_start_poll_time, last_touch_time, last_start_loop_time, last_start_poll_time;
|
|
/* Database Security: Support database audit */
|
|
char details[PGAUDIT_MAXLENGTH] = {0};
|
|
bool threadPoolActivated =
|
|
g_instance.attr.attr_common.enable_thread_pool && !g_instance.attr.attr_storage.comm_cn_dn_logic_conn;
|
|
|
|
/* make sure gaussdb can receive request */
|
|
DISABLE_MEMORY_PROTECT();
|
|
|
|
#ifdef HAVE_POLL
|
|
struct pollfd ufds[MAXLISTEN * 2 + 1];
|
|
#endif
|
|
|
|
FD_ZERO(&readmask);
|
|
last_start_loop_time = last_touch_time = last_start_poll_time = mc_timers_us();
|
|
|
|
#ifdef HAVE_POLL
|
|
nSockets = initPollfd(ufds);
|
|
#else
|
|
nSockets = initMasks(&readmask);
|
|
#endif
|
|
|
|
/* for rpc function call */
|
|
if (!save_backend_variables_for_callback_thread()) {
|
|
ereport(LOG, (errmsg("save_backend_variables_for_callback_thread error")));
|
|
return STATUS_ERROR;
|
|
}
|
|
ereport(LOG, (errmsg("start create thread!")));
|
|
|
|
/* Init backend thread pool */
|
|
if (threadPoolActivated) {
|
|
bool enableNumaDistribute = (g_instance.shmem_cxt.numaNodeNum > 1);
|
|
g_threadPoolControler->Init(enableNumaDistribute);
|
|
}
|
|
ereport(LOG, (errmsg("create thread end!")));
|
|
|
|
for (;;) {
|
|
fd_set rmask;
|
|
int selres;
|
|
|
|
if (t_thrd.postmaster_cxt.HaShmData->current_mode != NORMAL_MODE) {
|
|
check_and_reset_ha_listen_port();
|
|
|
|
#ifdef HAVE_POLL
|
|
nSockets = initPollfd(ufds);
|
|
#else
|
|
nSockets = initMasks(&readmask);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Wait for a connection request to arrive.
|
|
*
|
|
* We wait at most one minute, to ensure that the other background
|
|
* tasks handled below get done even when no requests are arriving.
|
|
*
|
|
* If we are in PM_WAIT_DEAD_END state, then we don't want to accept
|
|
* any new connections, so we don't call select() at all; just sleep
|
|
* for a little bit with signals unblocked.
|
|
*/
|
|
int ss_rc = memcpy_s((char*)&rmask, sizeof(fd_set), (char*)&readmask, sizeof(fd_set));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
|
(void)gs_signal_unblock_sigusr2();
|
|
|
|
this_start_poll_time = mc_timers_us();
|
|
if ((this_start_poll_time - last_start_loop_time) != 0) {
|
|
gs_set_libcomm_used_rate(
|
|
(this_start_poll_time - last_start_poll_time) * 100 / (this_start_poll_time - last_start_loop_time));
|
|
}
|
|
|
|
/*
|
|
* check how many seconds has took in this loop
|
|
* Detail: If work time > PM_BUSY_ALARM_USED_US, we think the ServerLoop is busy.
|
|
*/
|
|
if (this_start_poll_time - last_start_poll_time > PM_BUSY_ALARM_USED_US) {
|
|
/* Prevent interrupts while cleaning up */
|
|
HOLD_INTERRUPTS();
|
|
ereport(WARNING, (errmsg("postmaster is busy, this cycle used %lu seconds.",
|
|
(this_start_poll_time - last_start_loop_time) / PM_BUSY_ALARM_US)));
|
|
/* Now we can allow interrupts again */
|
|
RESUME_INTERRUPTS();
|
|
}
|
|
last_start_loop_time = this_start_poll_time;
|
|
|
|
/*
|
|
* Touch the socket and lock file every 58 minutes, to ensure that
|
|
* they are not removed by overzealous /tmp-cleaning tasks. We assume
|
|
* no one runs cleaners with cutoff times of less than an hour ...
|
|
*/
|
|
if (this_start_poll_time - last_touch_time >= PM_POLL_TIMEOUT_MINUTE) {
|
|
TouchSocketFile();
|
|
TouchSocketLockFile();
|
|
last_touch_time = this_start_poll_time;
|
|
}
|
|
|
|
if (pmState == PM_WAIT_DEAD_END) {
|
|
pg_usleep(100000L); /* 100 msec seems reasonable */
|
|
selres = 0;
|
|
} else {
|
|
/* must set timeout each time; some OSes change it! */
|
|
struct timeval timeout;
|
|
|
|
timeout.tv_sec = PM_POLL_TIMEOUT_SECOND;
|
|
timeout.tv_usec = 0;
|
|
|
|
#ifdef HAVE_POLL
|
|
selres = poll(ufds, nSockets, timeout.tv_sec * 1000);
|
|
last_start_poll_time = mc_timers_us();
|
|
#else
|
|
int ss_rc = memcpy_s((char*)&rmask, sizeof(fd_set), (char*)&readmask, sizeof(fd_set));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
selres = select(nSockets, &rmask, NULL, NULL, &timeout);
|
|
last_start_poll_time = mc_timers_us();
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Block all signals until we wait again. (This makes it safe for our
|
|
* signal handlers to do nontrivial work.)
|
|
*/
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
gs_signal_block_sigusr2();
|
|
|
|
/* Now check the select() result */
|
|
if (selres < 0) {
|
|
if (errno != EINTR && errno != EWOULDBLOCK) {
|
|
ereport(LOG, (errcode_for_socket_access(), errmsg("select()/poll() failed in postmaster: %m")));
|
|
return STATUS_ERROR;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* New connection pending on any of our sockets? If so, fork a child
|
|
* process to deal with it.
|
|
*/
|
|
if (selres > 0) {
|
|
int i;
|
|
|
|
for (i = 0; i < (MAXLISTEN * 2); i++) {
|
|
if (ufds[i].fd == PGINVALID_SOCKET)
|
|
break;
|
|
|
|
#ifdef HAVE_POLL
|
|
|
|
if (ufds[i].revents & POLLIN) {
|
|
ufds[i].revents = 0;
|
|
#else
|
|
|
|
if (FD_ISSET(t_thrd.postmaster_cxt.ListenSocket[i], &rmask)) {
|
|
#endif
|
|
Port* port = NULL;
|
|
|
|
ufds[i].revents = 0;
|
|
|
|
if (IS_FD_TO_RECV_GSSOCK(ufds[i].fd)) {
|
|
port = ConnCreateToRecvGssock(ufds, i, &nSockets);
|
|
} else {
|
|
port = ConnCreate(ufds[i].fd);
|
|
}
|
|
|
|
if (port != NULL) {
|
|
int result = STATUS_OK;
|
|
bool isConnectHaPort =
|
|
(i < MAXLISTEN) && (t_thrd.postmaster_cxt.listen_sock_type[i] == HA_LISTEN_SOCKET);
|
|
/*
|
|
* Since at present, HA only uses TCP sockets, we can directly compare
|
|
* the corresponding enty in t_thrd.postmaster_cxt.listen_sock_type, even
|
|
* though ufds are not one-to-one mapped to tcp and sctp socket array.
|
|
* If HA adopts STCP sockets later, we will need to maintain socket type
|
|
* array for ufds in initPollfd.
|
|
*/
|
|
if (threadPoolActivated && !isConnectHaPort) {
|
|
result = g_threadPoolControler->DispatchSession(port);
|
|
} else {
|
|
result = BackendStartup(port, isConnectHaPort);
|
|
}
|
|
|
|
if (result != STATUS_OK) {
|
|
if (port->is_logic_conn)
|
|
gs_close_gsocket(&port->gs_sock);
|
|
else
|
|
closesocket(port->sock);
|
|
}
|
|
|
|
/* do not free port for unix domain conn from receiver flow control */
|
|
if (!IS_FD_TO_RECV_GSSOCK(ufds[i].fd))
|
|
/*
|
|
* We no longer need the open socket or port structure
|
|
* in this process
|
|
*/
|
|
ConnFree((void*)port);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the AioCompleters have not been started start them.
|
|
* These should remain run indefinitely.
|
|
*/
|
|
ADIO_RUN()
|
|
{
|
|
if (!g_instance.pid_cxt.AioCompleterStarted && !dummyStandbyMode) {
|
|
int aioStartErr = 0;
|
|
if ((aioStartErr = AioCompltrStart()) == 0) {
|
|
g_instance.pid_cxt.AioCompleterStarted = 1;
|
|
} else {
|
|
ereport(LOG, (errmsg_internal("Cannot start AIO completer threads error=%d", aioStartErr)));
|
|
/*
|
|
* If we failed to fork a aio process, just shut down.
|
|
* Any required cleanup will happen at next restart. We
|
|
* set g_instance.fatal_error so that an "abnormal shutdown" message
|
|
* gets logged when we exit.
|
|
*/
|
|
g_instance.fatal_error = true;
|
|
HandleChildCrash(g_instance.pid_cxt.AioCompleterStarted, 1, "AIO process");
|
|
}
|
|
}
|
|
}
|
|
ADIO_END();
|
|
|
|
if (threadPoolActivated && (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
|
|
g_threadPoolControler->AddWorkerIfNecessary();
|
|
|
|
/* If we have lost the log collector, try to start a new one */
|
|
if (g_instance.pid_cxt.SysLoggerPID == 0 && g_instance.attr.attr_common.Logging_collector)
|
|
g_instance.pid_cxt.SysLoggerPID = SysLogger_Start();
|
|
|
|
/* start auditor process */
|
|
/* If we have lost the audit collector, try to start a new one */
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
if (g_instance.pid_cxt.PgAuditPID == 0 && u_sess->attr.attr_security.Audit_enabled &&
|
|
(pmState == PM_RUN || pmState == PM_HOT_STANDBY) && !dummyStandbyMode) {
|
|
g_instance.pid_cxt.PgAuditPID = pgaudit_start();
|
|
ereport(LOG, (errmsg("auditor process started, pid=%lu", g_instance.pid_cxt.PgAuditPID)));
|
|
}
|
|
#else
|
|
if (g_instance.pid_cxt.PgAuditPID == 0 && u_sess->attr.attr_security.Audit_enabled && pmState == PM_RUN &&
|
|
!dummyStandbyMode) {
|
|
g_instance.pid_cxt.PgAuditPID = pgaudit_start();
|
|
ereport(LOG, (errmsg("auditor process started, pid=%lu", g_instance.pid_cxt.PgAuditPID)));
|
|
}
|
|
#endif
|
|
/* If u_sess->attr.attr_security.Audit_enabled is set to false, terminate auditor process. */
|
|
if (g_instance.pid_cxt.PgAuditPID != 0 && !u_sess->attr.attr_security.Audit_enabled) {
|
|
signal_child(g_instance.pid_cxt.PgAuditPID, SIGQUIT);
|
|
ereport(LOG, (errmsg("parameter audit_enabled is set to false, terminate auditor process.")));
|
|
}
|
|
|
|
if (g_instance.pid_cxt.AlarmCheckerPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.AlarmCheckerPID = startAlarmChecker();
|
|
|
|
/* If we have lost the reaper backend thread, try to start a new one */
|
|
if (g_instance.pid_cxt.ReaperBackendPID == 0)
|
|
g_instance.pid_cxt.ReaperBackendPID = initialize_util_thread(REAPER);
|
|
|
|
/*
|
|
* If no background writer process is running, and we are not in a
|
|
* state that prevents it, start one. It doesn't matter if this
|
|
* fails, we'll just try again later. Likewise for the checkpointer.
|
|
*/
|
|
if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY) {
|
|
if (g_instance.pid_cxt.CheckpointerPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.CheckpointerPID = initialize_util_thread(CHECKPOINT_THREAD);
|
|
|
|
if (g_instance.pid_cxt.BgWriterPID == 0 && !dummyStandbyMode &&
|
|
!g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
g_instance.pid_cxt.BgWriterPID = initialize_util_thread(BGWRITER);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CBMWriterPID == 0 && !dummyStandbyMode &&
|
|
u_sess->attr.attr_storage.enable_cbm_tracking)
|
|
g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER);
|
|
|
|
if (!dummyStandbyMode && g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
for (int i = 0; i < g_instance.attr.attr_storage.pagewriter_thread_num; i++) {
|
|
if (g_instance.pid_cxt.PageWriterPID[i] == 0) {
|
|
g_instance.pid_cxt.PageWriterPID[i] = initialize_util_thread(PAGEWRITER_THREAD);
|
|
}
|
|
}
|
|
int thread_num = g_instance.attr.attr_storage.bgwriter_thread_num;
|
|
thread_num = thread_num > 0 ? thread_num : 1;
|
|
for (int i = 0; i < thread_num; i++) {
|
|
if (g_instance.pid_cxt.CkptBgWriterPID[i] == 0) {
|
|
g_instance.pid_cxt.CkptBgWriterPID[i] = initialize_util_thread(BGWRITER);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (g_instance.pid_cxt.RemoteServicePID == 0 && !dummyStandbyMode && IS_PGXC_DATANODE &&
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode != NORMAL_MODE && !IS_DN_WITHOUT_STANDBYS_MODE() &&
|
|
IsRemoteReadModeOn() && get_cur_repl_num())
|
|
g_instance.pid_cxt.RemoteServicePID = initialize_util_thread(RPC_SERVICE);
|
|
}
|
|
/*
|
|
* Likewise, if we have lost the walwriter process, try to start a new
|
|
* one. But this is needed only in normal operation (else we cannot
|
|
* be writing any new WAL).
|
|
*/
|
|
if (g_instance.pid_cxt.WalWriterPID == 0 && pmState == PM_RUN) {
|
|
g_instance.pid_cxt.WalWriterPID = initialize_util_thread(WALWRITER);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WalWriterAuxiliaryPID == 0 && (pmState == PM_RUN ||
|
|
((pmState == PM_HOT_STANDBY || pmState == PM_RECOVERY) && g_instance.pid_cxt.WalRcvWriterPID != 0 &&
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE))) {
|
|
g_instance.pid_cxt.WalWriterAuxiliaryPID = initialize_util_thread(WALWRITERAUXILIARY);
|
|
ereport(LOG,
|
|
(errmsg("ServerLoop create WalWriterAuxiliary(%lu) for pmState:%u, ServerMode:%u.",
|
|
g_instance.pid_cxt.WalWriterAuxiliaryPID, pmState, t_thrd.postmaster_cxt.HaShmData->current_mode)));
|
|
}
|
|
|
|
/*
|
|
* let cbm writer thread exit if enable_cbm_track gus is switched off
|
|
*/
|
|
if (!u_sess->attr.attr_storage.enable_cbm_tracking && g_instance.pid_cxt.CBMWriterPID != 0 &&
|
|
pmState == PM_RUN) {
|
|
ereport(LOG,
|
|
(errmsg("stop cbm writer thread because enable_cbm_tracking is switched off, "
|
|
"cbm writer thread pid=%lu",
|
|
g_instance.pid_cxt.CBMWriterPID)));
|
|
signal_child(g_instance.pid_cxt.CBMWriterPID, SIGTERM);
|
|
}
|
|
|
|
/*
|
|
* If we have lost the autovacuum launcher, try to start a new one. We
|
|
* don't want autovacuum to run in binary upgrade mode because
|
|
* autovacuum might update relfrozenxid64 for empty tables before the
|
|
* physical files are put in place.
|
|
*/
|
|
if (!u_sess->proc_cxt.IsBinaryUpgrade && g_instance.pid_cxt.AutoVacPID == 0 &&
|
|
(AutoVacuumingActive() || t_thrd.postmaster_cxt.start_autovac_launcher) && pmState == PM_RUN &&
|
|
!dummyStandbyMode && u_sess->attr.attr_common.upgrade_mode != 1 && !InplaceUpgradePrecommit) {
|
|
g_instance.pid_cxt.AutoVacPID = initialize_util_thread(AUTOVACUUM_LAUNCHER);
|
|
|
|
if (g_instance.pid_cxt.AutoVacPID != 0)
|
|
t_thrd.postmaster_cxt.start_autovac_launcher = false; /* signal processed */
|
|
}
|
|
|
|
/*
|
|
* If we have lost the job scheduler, try to start a new one.
|
|
*
|
|
* Before GRAND VERSION NUM 81000, we do not support scheduled job.
|
|
*/
|
|
if (g_instance.pid_cxt.PgJobSchdPID == 0 && pmState == PM_RUN &&
|
|
(g_instance.attr.attr_sql.job_queue_processes || t_thrd.postmaster_cxt.start_job_scheduler) &&
|
|
u_sess->attr.attr_common.upgrade_mode != 1) {
|
|
g_instance.pid_cxt.PgJobSchdPID = initialize_util_thread(JOB_SCHEDULER);
|
|
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0) {
|
|
t_thrd.postmaster_cxt.start_job_scheduler = false; /* signal processed */
|
|
ereport(LOG, (errmsg("job scheduler started, pid=%lu", g_instance.pid_cxt.PgJobSchdPID)));
|
|
}
|
|
}
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if ((IS_PGXC_COORDINATOR) && g_instance.pid_cxt.CommPoolerCleanPID == 0 && pmState == PM_RUN &&
|
|
u_sess->attr.attr_common.upgrade_mode != 1) {
|
|
StartPoolCleaner();
|
|
}
|
|
#endif
|
|
/* If g_instance.attr.attr_sql.job_queue_processes set to 0, terminate jobscheduler thread. */
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0 &&
|
|
!g_instance.attr.attr_sql.job_queue_processes) {
|
|
signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM);
|
|
}
|
|
|
|
/* If we have lost the archiver, try to start a new one */
|
|
if (XLogArchivingActive() && g_instance.pid_cxt.PgArchPID == 0 && !dummyStandbyMode) {
|
|
if (pmState == PM_RUN || pmState == PM_HOT_STANDBY || pmState == PM_RECOVERY) {
|
|
g_instance.pid_cxt.PgArchPID = pgarch_start();
|
|
}
|
|
}
|
|
|
|
/* If we have lost the stats collector, try to start a new one */
|
|
if (g_instance.pid_cxt.PgStatPID == 0 && (pmState == PM_RUN || pmState == PM_HOT_STANDBY) && !dummyStandbyMode)
|
|
g_instance.pid_cxt.PgStatPID = pgstat_start();
|
|
|
|
/* If we have lost the stats collector, try to start a new one */
|
|
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && g_instance.pid_cxt.SnapshotPID == 0 &&
|
|
pmState == PM_RUN)
|
|
g_instance.pid_cxt.SnapshotPID = snapshot_start();
|
|
|
|
if (ENABLE_ASP && g_instance.pid_cxt.AshPID == 0 && pmState == PM_RUN && !dummyStandbyMode)
|
|
g_instance.pid_cxt.AshPID = initialize_util_thread(ASH_WORKER);
|
|
|
|
/* If we have lost the full sql flush thread, try to start a new one */
|
|
if (ENABLE_STATEMENT_TRACK && g_instance.pid_cxt.StatementPID == 0 && pmState == PM_RUN)
|
|
g_instance.pid_cxt.StatementPID = initialize_util_thread(TRACK_STMT_WORKER);
|
|
|
|
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && g_instance.pid_cxt.PercentilePID == 0 &&
|
|
pmState == PM_RUN)
|
|
g_instance.pid_cxt.PercentilePID = initialize_util_thread(PERCENTILE_WORKER);
|
|
|
|
/* if workload manager is off, we still use this thread to build user hash table */
|
|
if ((ENABLE_WORKLOAD_CONTROL || !WLMIsInfoInit()) && g_instance.pid_cxt.WLMCollectPID == 0 &&
|
|
pmState == PM_RUN && !dummyStandbyMode)
|
|
g_instance.pid_cxt.WLMCollectPID = initialize_util_thread(WLM_WORKER);
|
|
|
|
if (ENABLE_WORKLOAD_CONTROL && (g_instance.pid_cxt.WLMMonitorPID == 0) && (pmState == PM_RUN) &&
|
|
!dummyStandbyMode)
|
|
g_instance.pid_cxt.WLMMonitorPID = initialize_util_thread(WLM_MONITOR);
|
|
|
|
if (ENABLE_WORKLOAD_CONTROL && (g_instance.pid_cxt.WLMArbiterPID == 0) && (pmState == PM_RUN) &&
|
|
!dummyStandbyMode)
|
|
g_instance.pid_cxt.WLMArbiterPID = initialize_util_thread(WLM_ARBITER);
|
|
|
|
if (IS_PGXC_COORDINATOR && g_instance.attr.attr_sql.max_resource_package &&
|
|
(g_instance.pid_cxt.CPMonitorPID == 0) && (pmState == PM_RUN) && !dummyStandbyMode)
|
|
g_instance.pid_cxt.CPMonitorPID = initialize_util_thread(WLM_CPMONITOR);
|
|
|
|
/* If we have lost the twophase cleaner, try to start a new one */
|
|
if (
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
IS_PGXC_COORDINATOR &&
|
|
#else
|
|
(t_thrd.postmaster_cxt.HaShmData->current_mode == NORMAL_MODE ||
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode == PRIMARY_MODE) &&
|
|
#endif
|
|
u_sess->attr.attr_common.upgrade_mode != 1 &&
|
|
g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && pmState == PM_RUN)
|
|
g_instance.pid_cxt.TwoPhaseCleanerPID = initialize_util_thread(TWOPASECLEANER);
|
|
|
|
/* If we have lost the LWLock monitor, try to start a new one */
|
|
if (g_instance.pid_cxt.FaultMonitorPID == 0 && pmState == PM_RUN)
|
|
g_instance.pid_cxt.FaultMonitorPID = initialize_util_thread(FAULTMONITOR);
|
|
|
|
/* If we have lost the heartbeat service, try to start a new one */
|
|
if (NeedHeartbeat())
|
|
g_instance.pid_cxt.HeartbeatPID = initialize_util_thread(HEARTBEAT);
|
|
|
|
/* If we have lost the csnmin sync thread, try to start a new one */
|
|
if (GTM_LITE_CN && g_instance.csnminsync_cxt.is_fcn_ccn &&
|
|
g_instance.pid_cxt.CsnminSyncPID == 0 && pmState == PM_RUN) {
|
|
g_instance.pid_cxt.CsnminSyncPID = initialize_util_thread(CSNMIN_SYNC);
|
|
}
|
|
|
|
/* If we have lost the barrier creator thread, try to start a new one */
|
|
if (START_BARRIER_CREATOR && g_instance.pid_cxt.BarrierCreatorPID == 0 &&
|
|
pmState == PM_RUN && XLogArchivingActive()) {
|
|
obs_slot = getObsReplicationSlot();
|
|
if (obs_slot != NULL) {
|
|
g_instance.pid_cxt.BarrierCreatorPID = initialize_util_thread(BARRIER_CREATOR);
|
|
}
|
|
}
|
|
|
|
/* If we need to signal the autovacuum launcher, do so now */
|
|
if (t_thrd.postmaster_cxt.avlauncher_needs_signal) {
|
|
t_thrd.postmaster_cxt.avlauncher_needs_signal = false;
|
|
|
|
if (g_instance.pid_cxt.AutoVacPID != 0)
|
|
gs_signal_send(g_instance.pid_cxt.AutoVacPID, SIGUSR2);
|
|
}
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (IS_PGXC_DATANODE && g_instance.attr.attr_common.enable_tsdb &&
|
|
g_instance.pid_cxt.TsCompactionPID == 0 && pmState == PM_RUN &&
|
|
u_sess->attr.attr_common.enable_ts_compaction) {
|
|
g_instance.pid_cxt.TsCompactionPID = initialize_util_thread(TS_COMPACTION);
|
|
}
|
|
|
|
if (IS_PGXC_DATANODE && g_instance.attr.attr_common.enable_tsdb &&
|
|
u_sess->attr.attr_common.enable_ts_compaction && pmState == PM_RUN &&
|
|
g_instance.pid_cxt.TsCompactionAuxiliaryPID == 0) {
|
|
g_instance.pid_cxt.TsCompactionAuxiliaryPID = initialize_util_thread(TS_COMPACTION_AUXILIAY);
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
/* If job worker failed to run, postmaster need send signal SIGUSR2 to job scheduler thread. */
|
|
if (t_thrd.postmaster_cxt.jobscheduler_needs_signal) {
|
|
t_thrd.postmaster_cxt.jobscheduler_needs_signal = false;
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0) {
|
|
gs_signal_send(g_instance.pid_cxt.PgJobSchdPID, SIGUSR2);
|
|
}
|
|
}
|
|
|
|
/* Database Security: Support database audit */
|
|
if (pmState == PM_RUN) {
|
|
if (t_thrd.postmaster_cxt.audit_primary_failover) {
|
|
int rcs = snprintf_s(details,
|
|
sizeof(details),
|
|
sizeof(details) - 1,
|
|
"the standby do failover success,now it is primary!");
|
|
securec_check_ss(rcs, "", "");
|
|
pgaudit_system_switchover_ok(details);
|
|
t_thrd.postmaster_cxt.audit_primary_failover = false;
|
|
}
|
|
if (t_thrd.postmaster_cxt.audit_standby_switchover) {
|
|
int rcs = snprintf_s(details,
|
|
sizeof(details),
|
|
sizeof(details) - 1,
|
|
"the standby do switchover success,now it is primary!");
|
|
securec_check_ss(rcs, "", "");
|
|
pgaudit_system_switchover_ok(details);
|
|
t_thrd.postmaster_cxt.audit_standby_switchover = false;
|
|
}
|
|
}
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (PMstateIsRun()) {
|
|
(void)streaming_backend_manager(STREAMING_BACKEND_INIT);
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialise the ufds for poll() for the ports we are listening on.
|
|
* Return the number of sockets to listen on.
|
|
*/
|
|
#ifdef HAVE_POLL
|
|
static int initPollfd(struct pollfd* ufds)
|
|
{
|
|
int i, cnt = 0;
|
|
int fd;
|
|
|
|
/* set default value for all pollfds */
|
|
for (i = 0; i < MAXLISTEN * 2 + 1; i++) {
|
|
ufds[i].fd = PGINVALID_SOCKET;
|
|
ufds[cnt].events = 0;
|
|
}
|
|
|
|
for (i = 0; i < MAXLISTEN; i++) {
|
|
fd = t_thrd.postmaster_cxt.ListenSocket[i];
|
|
|
|
if (fd == PGINVALID_SOCKET)
|
|
break;
|
|
|
|
ufds[cnt].fd = fd;
|
|
ufds[cnt].events = POLLIN | POLLPRI;
|
|
cnt++;
|
|
}
|
|
if (t_thrd.postmaster_cxt.sock_for_libcomm != PGINVALID_SOCKET) {
|
|
ufds[cnt].fd = t_thrd.postmaster_cxt.sock_for_libcomm;
|
|
ufds[cnt].events = POLLIN | POLLPRI;
|
|
cnt++;
|
|
}
|
|
return cnt;
|
|
}
|
|
#else
|
|
/*
|
|
* Initialise the masks for select() for the ports we are listening on.
|
|
* Return the number of sockets to listen on.
|
|
*/
|
|
static int initMasks(fd_set* rmask)
|
|
{
|
|
int maxsock = -1;
|
|
int i;
|
|
|
|
FD_ZERO(rmask);
|
|
|
|
for (i = 0; i < MAXLISTEN; i++) {
|
|
int fd = t_thrd.postmaster_cxt.ListenSocket[i];
|
|
|
|
if (fd == PGINVALID_SOCKET) {
|
|
continue;
|
|
}
|
|
|
|
FD_SET(fd, rmask);
|
|
|
|
if (fd > maxsock) {
|
|
maxsock = fd;
|
|
}
|
|
}
|
|
|
|
return maxsock + 1;
|
|
}
|
|
#endif // end of HAVE_POLL
|
|
|
|
/*
|
|
* Read a client's startup packet and do something according to it.
|
|
*
|
|
* Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
|
|
* not return at all.
|
|
*
|
|
* (Note that ereport(FATAL) stuff is sent to the client, so only use it
|
|
* if that's what you want. Return STATUS_ERROR if you don't want to
|
|
* send anything to the client, which would typically be appropriate
|
|
* if we detect a communications failure.)
|
|
*/
|
|
int ProcessStartupPacket(Port* port, bool SSLdone)
|
|
{
|
|
const int tvFactor = 5;
|
|
int32 len;
|
|
void* buf = NULL;
|
|
ProtocolVersion proto;
|
|
MemoryContext oldcontext;
|
|
volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
bool isMaintenanceConnection = false;
|
|
bool clientIsGsql = false;
|
|
bool clientIsCmAgent = false;
|
|
bool clientIsGsClean = false;
|
|
bool clientIsOM = false;
|
|
bool clientIsWDRXdb = false;
|
|
bool findProtoVer = false;
|
|
int elevel = (IS_THREAD_POOL_WORKER ? ERROR : FATAL);
|
|
struct timeval tv = {tvFactor * u_sess->attr.attr_network.PoolerConnectTimeout, 0};
|
|
struct timeval oldTv = {0, 0};
|
|
socklen_t oldTvLen = sizeof(oldTv);
|
|
bool isTvSeted = false;
|
|
|
|
CHECK_FOR_PROCDIEPENDING();
|
|
|
|
/* Set recv timeout on coordinator in case of connected from external application */
|
|
if (IS_PGXC_COORDINATOR && !is_cluster_internal_IP(*(struct sockaddr*)&port->raddr.addr)) {
|
|
if (getsockopt(port->sock, SOL_SOCKET, SO_RCVTIMEO, &oldTv, &oldTvLen)) {
|
|
ereport(LOG, (errmsg("getsockopt(SO_RCVTIMEO) failed: %m")));
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
if (setsockopt(port->sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval)) < 0) {
|
|
ereport(LOG, (errmsg("setsockopt(SO_RCVTIMEO) failed: %m")));
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
isTvSeted = true;
|
|
}
|
|
|
|
if (pq_getbytes((char*)&len, 4) == EOF) {
|
|
/*
|
|
* EOF after SSLdone probably means the client didn't like our
|
|
* response to NEGOTIATE_SSL_CODE. That's not an error condition, so
|
|
* don't clutter the log with a complaint.
|
|
*/
|
|
if (!SSLdone)
|
|
ereport(DEBUG1, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete startup packet")));
|
|
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
len = ntohl(len);
|
|
len -= 4;
|
|
|
|
if (len < (int32)sizeof(ProtocolVersion) || len > MAX_STARTUP_PACKET_LENGTH) {
|
|
ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid length of startup packet")));
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/*
|
|
* Allocate at least the size of an old-style startup packet, plus one
|
|
* extra byte, and make sure all are zeroes. This ensures we will have
|
|
* null termination of all strings, in both fixed- and variable-length
|
|
* packet layouts.
|
|
*/
|
|
if (len <= (int32)sizeof(StartupPacket))
|
|
buf = palloc0(sizeof(StartupPacket) + 1);
|
|
else
|
|
buf = palloc0(len + 1);
|
|
|
|
if (pq_getbytes((char*)buf, len) == EOF) {
|
|
ereport(COMMERROR,
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
errmsg("incomplete startup packet, remote_host[%s], remote_port[%s].",
|
|
u_sess->proc_cxt.MyProcPort->remote_host,
|
|
u_sess->proc_cxt.MyProcPort->remote_port)));
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/*
|
|
* If we're going to reject the connection due to database state, say so
|
|
* now instead of wasting cycles on an authentication exchange. (This also
|
|
* allows a pg_ping utility to be written.)
|
|
*/
|
|
switch (port->canAcceptConnections) {
|
|
case CAC_STARTUP:
|
|
ereport(elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("the database system is starting up")));
|
|
break;
|
|
|
|
case CAC_SHUTDOWN:
|
|
ereport(elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("the database system is shutting down")));
|
|
break;
|
|
|
|
case CAC_RECOVERY:
|
|
ereport(elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("the database system is in recovery mode")));
|
|
break;
|
|
|
|
case CAC_TOOMANY:
|
|
ereport(elevel, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already")));
|
|
break;
|
|
|
|
case CAC_WAITBACKUP:
|
|
/* OK for now, will check in InitPostgres */
|
|
break;
|
|
|
|
case CAC_OK:
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* The first field is either a protocol version number or a special
|
|
* request code.
|
|
*/
|
|
port->proto = proto = ntohl(*((ProtocolVersion*)buf));
|
|
|
|
if (proto == CANCEL_REQUEST_CODE) {
|
|
/*
|
|
* Mark it is a temp thread which will exit quickly itself.
|
|
* Then PM need not wait and retry to send SIGTERM signal to it.
|
|
* We can not mark it as temp thread in thread pool mode, because
|
|
* the thread will be reused later.
|
|
*/
|
|
if (!ENABLE_THREAD_POOL)
|
|
MarkPostmasterTempBackend();
|
|
|
|
processCancelRequest(port, buf);
|
|
/* Not really an error, but we don't want to proceed further */
|
|
return STATUS_ERROR;
|
|
} else if (proto == STOP_REQUEST_CODE) {
|
|
processStopRequest(port, buf);
|
|
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
if (proto == NEGOTIATE_SSL_CODE && !SSLdone) {
|
|
char SSLok;
|
|
|
|
#ifdef USE_SSL
|
|
|
|
/* No SSL when disabled or on Unix sockets */
|
|
if (!g_instance.attr.attr_security.EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
|
|
SSLok = 'N';
|
|
else
|
|
SSLok = 'S'; /* Support for SSL */
|
|
|
|
#else
|
|
SSLok = 'N'; /* No support for SSL */
|
|
#endif
|
|
|
|
retry1:
|
|
errno = 0;
|
|
if (send(port->sock, &SSLok, 1, 0) <= 0) {
|
|
if (errno == EINTR)
|
|
goto retry1; /* if interrupted, just retry */
|
|
|
|
ereport(COMMERROR, (errcode_for_socket_access(), errmsg("failed to send SSL negotiation response: %m")));
|
|
return STATUS_ERROR; /* close the connection */
|
|
}
|
|
|
|
#ifdef USE_SSL
|
|
|
|
if (SSLok == 'S' && secure_open_server(port) == -1)
|
|
return STATUS_ERROR;
|
|
|
|
#endif
|
|
/* regular startup packet, cancel, etc packet should follow... */
|
|
/* but not another SSL negotiation request */
|
|
return ProcessStartupPacket(port, true);
|
|
}
|
|
|
|
/* Could add additional special packet types here */
|
|
|
|
/*
|
|
* Set FrontendProtocol now so that ereport() knows what format to send if
|
|
* we fail during startup.
|
|
*/
|
|
FrontendProtocol = proto;
|
|
|
|
/* Check we can handle the protocol the frontend is using. */
|
|
for (size_t i = 0; i < sizeof(protoVersionList) / sizeof(protoVersionList[0]); i++) {
|
|
if (PG_PROTOCOL_MAJOR(proto) == protoVersionList[i][0] && PG_PROTOCOL_MINOR(proto) == protoVersionList[i][1]) {
|
|
findProtoVer = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!findProtoVer) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("unsupported frontend protocol %u.%u.", PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto))));
|
|
}
|
|
|
|
/*
|
|
* Now fetch parameters out of startup packet and save them into the Port
|
|
* structure. All data structures attached to the Port struct must be
|
|
* allocated in t_thrd.top_mem_cxt so that they will remain available in a
|
|
* running backend (even after t_thrd.mem_cxt.postmaster_mem_cxt is destroyed). We need
|
|
* not worry about leaking this storage on failure, since we aren't in the
|
|
* postmaster process anymore.
|
|
*/
|
|
oldcontext = MemoryContextSwitchTo(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR));
|
|
|
|
if (PG_PROTOCOL_MAJOR(proto) >= 3) {
|
|
int32 offset = sizeof(ProtocolVersion);
|
|
|
|
/*
|
|
* Scan packet body for name/option pairs. We can assume any string
|
|
* beginning within the packet body is null-terminated, thanks to
|
|
* zeroing extra byte above.
|
|
*/
|
|
port->guc_options = NIL;
|
|
|
|
while (offset < len) {
|
|
char* nameptr = ((char*)buf) + offset;
|
|
int32 valoffset;
|
|
char* valptr = NULL;
|
|
|
|
if (*nameptr == '\0')
|
|
break; /* found packet terminator */
|
|
|
|
valoffset = offset + strlen(nameptr) + 1;
|
|
|
|
if (valoffset >= len)
|
|
break; /* missing value, will complain below */
|
|
|
|
valptr = ((char*)buf) + valoffset;
|
|
|
|
if (strcmp(nameptr, "database") == 0)
|
|
port->database_name = pstrdup(valptr);
|
|
else if (strcmp(nameptr, "user") == 0)
|
|
port->user_name = pstrdup(valptr);
|
|
else if (strcmp(nameptr, "options") == 0) {
|
|
port->cmdline_options = pstrdup(valptr);
|
|
|
|
if (strstr(port->cmdline_options, "xc_maintenance_mode=on") != NULL) {
|
|
isMaintenanceConnection = true;
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
} else if (strstr(port->cmdline_options, "remotetype=internaltool") != NULL) {
|
|
u_sess->proc_cxt.IsInnerMaintenanceTools = true;
|
|
#endif
|
|
}
|
|
} else if (strcmp(nameptr, "replication") == 0) {
|
|
if (!IsHAPort(u_sess->proc_cxt.MyProcPort) && g_instance.attr.attr_common.enable_thread_pool) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("replication should connect HA port in thread_pool")));
|
|
}
|
|
|
|
/*
|
|
* Due to backward compatibility concerns the replication
|
|
* parameter is a hybrid beast which allows the value to be
|
|
* either boolean or the string 'database'. The latter
|
|
* connects to a specific database which is e.g. required for
|
|
* logical decoding while.
|
|
*/
|
|
/* Add data replication */
|
|
if (strcmp(valptr, "data") == 0) {
|
|
/* mark the data sender as a wal sender for some common management */
|
|
t_thrd.role = WAL_NORMAL_SENDER;
|
|
|
|
if (!g_instance.attr.attr_storage.enable_mix_replication)
|
|
t_thrd.datasender_cxt.am_datasender = true;
|
|
} else if (strcmp(valptr, "database") == 0) {
|
|
if (!IsHAPort(u_sess->proc_cxt.MyProcPort) && g_instance.attr.attr_common.enable_thread_pool) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("logical replication should connect HA port in thread_pool")));
|
|
}
|
|
t_thrd.role = WAL_DB_SENDER;
|
|
} else {
|
|
bool _am_normal_walsender = false;
|
|
if (!parse_bool(valptr, &_am_normal_walsender)) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid value for parameter \"replication\""),
|
|
errhint("Valid values are: false, 0, true, 1, data, database.")));
|
|
} else if (_am_normal_walsender) {
|
|
t_thrd.role = WAL_NORMAL_SENDER;
|
|
}
|
|
}
|
|
|
|
} else if (strcmp(nameptr, "backend_version") == 0) {
|
|
errno = 0;
|
|
port->SessionVersionNum = (uint32)strtoul(valptr, NULL, 10);
|
|
if (errno != 0)
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid value for parameter \"backend_version\""),
|
|
errhint("Valid values are of uint32 type.")));
|
|
|
|
if (port->SessionVersionNum > GRAND_VERSION_NUM)
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("requested backend version is larger than grand version.")));
|
|
} else if (strcmp(nameptr, "enable_full_encryption") == 0) {
|
|
bool enable_ce = false;
|
|
if (!parse_bool(valptr, &enable_ce)) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid value for parameter \"enable_full_encryption\""),
|
|
errhint("Valid values are: 0, 1.")));
|
|
}
|
|
u_sess->attr.attr_common.enable_full_encryption = enable_ce;
|
|
} else if (strcmp(nameptr, "connect_timeout") == 0) {
|
|
errno = 0;
|
|
const uint32 poolerConnectTimeoutMaxValue = 7200; /* Max value of pooler_connect_timeout */
|
|
uint32 poolerConnectTimeout = (uint32) strtoul(valptr, NULL, 10);
|
|
if (errno != 0) {
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid value[%d] for parameter \"connect_timeout\"",
|
|
u_sess->attr.attr_network.PoolerConnectTimeout),
|
|
errhint("Valid values are of uint32 type.")));
|
|
}
|
|
u_sess->attr.attr_network.PoolerConnectTimeout =
|
|
(poolerConnectTimeout < poolerConnectTimeoutMaxValue) ?
|
|
poolerConnectTimeout : poolerConnectTimeoutMaxValue;
|
|
} else {
|
|
if (strcmp(nameptr, "application_name") == 0) {
|
|
/* check if remote is dummystandby */
|
|
if (strcmp(valptr, "gs_ctl") == 0) {
|
|
/* mark remote as gs_ctl build */
|
|
t_thrd.postmaster_cxt.senderToBuildStandby = true;
|
|
u_sess->proc_cxt.clientIsGsCtl = true;
|
|
ereport(DEBUG5, (errmsg("gs_ctl connected")));
|
|
} else if (strcmp(valptr, "cm_agent") == 0) {
|
|
/* mark remote as cm_agent */
|
|
clientIsCmAgent = true;
|
|
u_sess->libpq_cxt.IsConnFromCmAgent = true;
|
|
ereport(DEBUG5, (errmsg("cm_agent connected")));
|
|
} else if (strcmp(valptr, "gs_clean") == 0) {
|
|
clientIsGsClean = true;
|
|
ereport(DEBUG5, (errmsg("gs_clean connected")));
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
} else if (strcmp(valptr, "dummystandby") == 0) {
|
|
/* mark remote as dummystandby */
|
|
t_thrd.postmaster_cxt.senderToDummyStandby = true;
|
|
ereport(DEBUG5, (errmsg("secondary standby connected")));
|
|
} else if (strcmp(valptr, "gs_roach") == 0) {
|
|
u_sess->proc_cxt.clientIsGsroach = true;
|
|
ereport(DEBUG5, (errmsg("gs_roach connected")));
|
|
} else if (strcmp(valptr, "gs_redis") == 0) {
|
|
u_sess->proc_cxt.clientIsGsredis = true;
|
|
ereport(DEBUG5, (errmsg("gs_redis connected")));
|
|
#endif
|
|
} else if (strcmp(valptr, "WDRXdb") == 0) {
|
|
clientIsWDRXdb = true;
|
|
ereport(DEBUG5, (errmsg("WDRXdb connected")));
|
|
} else if (strcmp(valptr, "gs_rewind") == 0) {
|
|
/*
|
|
* mark remote as gs_rewind.
|
|
* in single-node mode, gs_ctl need gs_rewind to do inc-build.
|
|
*/
|
|
u_sess->proc_cxt.clientIsGsrewind = true;
|
|
ereport(DEBUG5, (errmsg("gs_rewind connected")));
|
|
} else if (strcmp(valptr, "gsql") == 0) {
|
|
/* mark remote as gsql */
|
|
clientIsGsql = true;
|
|
ereport(DEBUG5, (errmsg("gsql connected")));
|
|
} else if (strcmp(valptr, "OM") == 0) {
|
|
clientIsOM = true;
|
|
ereport(DEBUG5, (errmsg("OM connected")));
|
|
} else if (strcmp(valptr, "gs_dump") == 0) {
|
|
u_sess->proc_cxt.clientIsGsdump = true;
|
|
ereport(DEBUG5, (errmsg("gs_dump connected")));
|
|
} else if (strcmp(valptr, "gs_basebackup") == 0) {
|
|
u_sess->proc_cxt.clientIsGsBasebackup = true;
|
|
ereport(LOG, (errmsg("gs_basebackup connected")));
|
|
} else if (strcmp(valptr, "gs_restore") == 0) {
|
|
u_sess->proc_cxt.clientIsGsRestore = true;
|
|
ereport(DEBUG5, (errmsg("gs_restore connected")));
|
|
} else {
|
|
ereport(DEBUG5, (errmsg("application %s connected", valptr)));
|
|
}
|
|
|
|
errno_t ssrc = strncpy_s(u_sess->proc_cxt.applicationName, NAMEDATALEN, valptr, NAMEDATALEN - 1);
|
|
if (ssrc != EOK) {
|
|
ereport(WARNING, (errmsg("Save app name %s failed in receive startup packet", valptr)));
|
|
}
|
|
}
|
|
/* Assume it's a generic GUC option */
|
|
port->guc_options = lappend(port->guc_options, pstrdup(nameptr));
|
|
port->guc_options = lappend(port->guc_options, pstrdup(valptr));
|
|
}
|
|
|
|
offset = valoffset + strlen(valptr) + 1;
|
|
}
|
|
|
|
/*
|
|
* If we didn't find a packet terminator exactly at the end of the
|
|
* given packet length, complain.
|
|
*/
|
|
if (offset != len - 1)
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
errmsg("invalid startup packet layout: expected terminator as last byte")));
|
|
} else {
|
|
/*
|
|
* Get the parameters from the old-style, fixed-width-fields startup
|
|
* packet as C strings. The packet destination was cleared first so a
|
|
* short packet has zeros silently added. We have to be prepared to
|
|
* truncate the pstrdup result for oversize fields, though.
|
|
*/
|
|
StartupPacket* packet = (StartupPacket*)buf;
|
|
|
|
port->database_name = pstrdup(packet->database);
|
|
|
|
if (strlen(port->database_name) > sizeof(packet->database))
|
|
port->database_name[sizeof(packet->database)] = '\0';
|
|
|
|
port->user_name = pstrdup(packet->user);
|
|
|
|
if (strlen(port->user_name) > sizeof(packet->user))
|
|
port->user_name[sizeof(packet->user)] = '\0';
|
|
|
|
port->cmdline_options = pstrdup(packet->options);
|
|
|
|
if (strlen(port->cmdline_options) > sizeof(packet->options))
|
|
port->cmdline_options[sizeof(packet->options)] = '\0';
|
|
|
|
port->guc_options = NIL;
|
|
}
|
|
|
|
/* Inner tool with local sha256 will not be authenicated. */
|
|
if (clientIsCmAgent || clientIsGsClean || clientIsOM || u_sess->proc_cxt.clientIsGsroach || clientIsWDRXdb ||
|
|
u_sess->proc_cxt.clientIsGsCtl || u_sess->proc_cxt.clientIsGsrewind || u_sess->proc_cxt.clientIsGsredis) {
|
|
u_sess->proc_cxt.IsInnerMaintenanceTools = true;
|
|
}
|
|
/* cm_agent and gs_clean should not be controlled by workload manager */
|
|
if (clientIsCmAgent || clientIsGsClean) {
|
|
u_sess->proc_cxt.IsWLMWhiteList = true;
|
|
}
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (clientIsCmAgent) {
|
|
#ifdef ENABLE_DISTRIBUTE_TEST
|
|
if (TEST_STUB(DN_CM_NEW_CONN, stub_sleep_emit)) {
|
|
ereport(get_distribute_test_param()->elevel,
|
|
(errmsg("sleep_emit happen during ProcessStartupPacket time:%ds, stub_name:%s",
|
|
get_distribute_test_param()->sleep_time,
|
|
get_distribute_test_param()->test_stub_name)));
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/* Check a user name was given. */
|
|
if (port->user_name == NULL || port->user_name[0] == '\0')
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
|
|
errmsg("no PostgreSQL user name specified in startup packet")));
|
|
|
|
/* The database defaults to the user name. */
|
|
if (port->database_name == NULL || port->database_name[0] == '\0')
|
|
port->database_name = pstrdup(port->user_name);
|
|
|
|
/*
|
|
* Truncate given database and user names to length of a Postgres name.
|
|
* This avoids lookup failures when overlength names are given.
|
|
*/
|
|
if (strlen(port->database_name) >= NAMEDATALEN)
|
|
port->database_name[NAMEDATALEN - 1] = '\0';
|
|
|
|
if (strlen(port->user_name) >= NAMEDATALEN)
|
|
port->user_name[NAMEDATALEN - 1] = '\0';
|
|
|
|
/*
|
|
* Normal walsender backends, e.g. for streaming replication, are not
|
|
* connected to a particular database. But walsenders used for logical
|
|
* replication need to connect to a specific database. We allow streaming
|
|
* replication commands to be issued even if connected to a database as it
|
|
* can make sense to first make a basebackup and then stream changes
|
|
* starting from that.
|
|
*/
|
|
if (AM_WAL_SENDER && !AM_WAL_DB_SENDER)
|
|
port->database_name[0] = '\0';
|
|
|
|
/* set special tcp keepalive parameters for build senders */
|
|
if (AM_WAL_SENDER && t_thrd.postmaster_cxt.senderToBuildStandby) {
|
|
if (!IS_AF_UNIX(port->laddr.addr.ss_family)) {
|
|
pq_setkeepalivesidle(7200, port);
|
|
pq_setkeepalivesinterval(75, port);
|
|
pq_setkeepalivescount(9, port);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Done putting stuff in t_thrd.top_mem_cxt.
|
|
*/
|
|
(void)MemoryContextSwitchTo(oldcontext);
|
|
|
|
if (AM_WAL_SENDER) {
|
|
int channel_adapt = 0, i = 0;
|
|
|
|
if (!IS_PGXC_COORDINATOR) {
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (NORMAL_MODE == hashmdata->current_mode) {
|
|
if (!u_sess->proc_cxt.clientIsGsBasebackup && !AM_WAL_DB_SENDER) {
|
|
ereport(elevel,
|
|
(errmsg("the current t_thrd.postmaster_cxt.server_mode is NORMAL, "
|
|
"could not accept HA connection.")));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
for (i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
if (t_thrd.postmaster_cxt.ReplConnArray[i] != NULL &&
|
|
IsChannelAdapt(port, t_thrd.postmaster_cxt.ReplConnArray[i])) {
|
|
channel_adapt++;
|
|
}
|
|
}
|
|
|
|
if (0 == channel_adapt) {
|
|
int elevel = comm_client_bind ? FATAL : WARNING;
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("the ha connection is not in the channel list")));
|
|
}
|
|
}
|
|
} else {
|
|
if (dummyStandbyMode)
|
|
ereport(
|
|
elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("Secondary Standby does not accept connection")));
|
|
|
|
/*
|
|
* clients other than inner maintenance tools and remote coordinators are only
|
|
* allowed to connect through gsql port, gsql port unix socket and ha port unix socket
|
|
*/
|
|
if (!u_sess->proc_cxt.IsInnerMaintenanceTools && !IsConnPortFromCoord(port) &&
|
|
(!IsLocalAddr(port) || !IsLocalPort(port))) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_CANNOT_CONNECT_NOW),
|
|
errmsg("the local listen ip and port is not for the gsql client")));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We need to check whether the connection can be accepted, standby and pending
|
|
* mode can not accept connection.
|
|
* Do not accept gsql connection when promote primary node does not write empty log.
|
|
* 1: when the client is walsender we do not check, except sender to dummy standby.
|
|
* 2: when the client is gsql, and use -m(it will make xc_maintenance_mode to on)
|
|
* to connect, we do not check.
|
|
*/
|
|
if ((!AM_WAL_SENDER && !(isMaintenanceConnection &&
|
|
(clientIsGsql || clientIsCmAgent || t_thrd.postmaster_cxt.senderToBuildStandby))) ||
|
|
t_thrd.postmaster_cxt.senderToDummyStandby) {
|
|
|
|
if (PENDING_MODE == hashmdata->current_mode && !IS_PGXC_COORDINATOR) {
|
|
ereport(elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW),
|
|
errmsg("can not accept connection in pending mode.")));
|
|
} else {
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (STANDBY_MODE == hashmdata->current_mode) {
|
|
ereport(elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW),
|
|
errmsg("can not accept connection in standby mode.")));
|
|
}
|
|
#else
|
|
if (hashmdata->current_mode == STANDBY_MODE && !g_instance.attr.attr_storage.EnableHotStandby) {
|
|
ereport(elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW),
|
|
errmsg("can not accept connection if hot standby off")));
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
if ((PM_RUN != pmState) && t_thrd.postmaster_cxt.senderToDummyStandby) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("can not accept dummy standby connection in standby mode.")));
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
#ifdef USE_SSL
|
|
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && g_instance.attr.attr_security.EnableSSL &&
|
|
u_sess->attr.attr_security.RequireSSL && !IS_AF_UNIX(port->laddr.addr.ss_family) && !SSLdone) {
|
|
/*
|
|
* only deal with connections between client and server.
|
|
* for connections between coordinators, we should not use SSL.
|
|
*/
|
|
if (!u_sess->proc_cxt.IsInnerMaintenanceTools && !IsConnPortFromCoord(port)) {
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("SSL connection is required by the database system")));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (port->cmdline_options != NULL && strstr(port->cmdline_options, "remotetype=coordinator") != NULL) {
|
|
u_sess->attr.attr_common.remoteConnType = REMOTE_CONN_COORD;
|
|
} else
|
|
#endif
|
|
{
|
|
u_sess->attr.attr_common.remoteConnType = REMOTE_CONN_APP;
|
|
}
|
|
|
|
/* We need to restore the socket settings to prevent unexpected errors. */
|
|
if (isTvSeted && (setsockopt(port->sock, SOL_SOCKET, SO_RCVTIMEO, &oldTv, oldTvLen) < 0)) {
|
|
ereport(LOG, (errmsg("setsockopt(SO_RCVTIMEO) failed: %m")));
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
return STATUS_OK;
|
|
}
|
|
|
|
/*
|
|
* The client has sent a stop query request, not a normal
|
|
* start-a-new-connection packet.
|
|
*/
|
|
static void processStopRequest(Port* port, void* pkt)
|
|
{
|
|
StopRequestPacket* csp = (StopRequestPacket*)pkt;
|
|
ThreadId backendPID;
|
|
int logictid = 0;
|
|
uint64 query_id = 0;
|
|
// get thread id from logic thread id
|
|
//
|
|
logictid = (int)ntohl(csp->backendPID);
|
|
query_id = (((uint64)ntohl(csp->query_id_first)) << 32) + (uint32)ntohl(csp->query_id_end);
|
|
|
|
if (ENABLE_THREAD_POOL) {
|
|
g_threadPoolControler->GetSessionCtrl()->SendProcSignal(logictid, PROCSIG_EXECUTOR_FLAG, query_id);
|
|
return;
|
|
}
|
|
|
|
backendPID = getThreadIdFromLogicThreadId(logictid);
|
|
|
|
/* If the mian thread already exit, no need to stop. */
|
|
if (0 != backendPID)
|
|
StreamNodeGroup::stopAllThreadInNodeGroup(backendPID, query_id);
|
|
}
|
|
|
|
/*
|
|
* The client has sent a cancel request packet, not a normal
|
|
* start-a-new-connection packet. Perform the necessary processing.
|
|
* Nothing is sent back to the client.
|
|
*/
|
|
static void processCancelRequest(Port* port, void* pkt)
|
|
{
|
|
CancelRequestPacket* canc = (CancelRequestPacket*)pkt;
|
|
long cancelAuthCode = (long)ntohl(canc->cancelAuthCode);
|
|
|
|
if (((unsigned long)cancelAuthCode & 0x1) == 0) {
|
|
if (ENABLE_THREAD_POOL) {
|
|
int sess_ctrl_id = (int)ntohl(canc->backendPID);
|
|
ThreadPoolSessControl *sess_ctrl = g_threadPoolControler->GetSessionCtrl();
|
|
knl_session_context *sess = sess_ctrl->GetSessionByIdx(sess_ctrl_id);
|
|
|
|
if (sess == NULL || sess->cancel_key != cancelAuthCode) {
|
|
ereport(LOG,(errmsg("Don't found the match sess, the session slot(%d)", sess_ctrl_id)));
|
|
return;
|
|
}
|
|
|
|
int err = sess_ctrl->SendSignal((int)sess_ctrl_id, SIGINT);
|
|
if (err != 0) {
|
|
ereport(WARNING,
|
|
(errmsg("kill(session %ld, signal %d) failed: \"%s\", pmState %d, Demotion %d, Shutdown %d",
|
|
(long)sess_ctrl_id, SIGINT, gs_strerror(err), pmState, g_instance.demotion, Shutdown)));
|
|
}
|
|
} else {
|
|
ereport(WARNING, (errmsg("Receive invalid cancel key, which suppose to be thread pool mode.")));
|
|
}
|
|
} else {
|
|
int backendSlot = 0;
|
|
// get thread id from logic thread id
|
|
backendSlot = (int)ntohl(canc->backendPID);
|
|
Backend* bn = GetBackend(backendSlot);
|
|
|
|
if (bn == NULL || bn->pid == 0 || bn->pid == InvalidTid) {
|
|
ereport(LOG,
|
|
(errmsg(
|
|
"Don't found the match process, the backend slot(%d), pid(%lu)", backendSlot, bn ? bn->pid : 0)));
|
|
return;
|
|
}
|
|
|
|
if (bn->cancel_key == cancelAuthCode) {
|
|
/* Found a match; signal that backend to cancel current op */
|
|
ereport(DEBUG2, (errmsg_internal("processing cancel request: sending SIGINT to process %lu", bn->pid)));
|
|
signal_child(bn->pid, SIGINT);
|
|
} else
|
|
/* Right PID, wrong key: no way, Jose */
|
|
ereport(LOG, (errmsg("wrong key in cancel request for process %lu", bn->pid)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* canAcceptConnections --- check to see if database state allows connections.
|
|
*/
|
|
CAC_state canAcceptConnections(bool isSession)
|
|
{
|
|
CAC_state result = CAC_OK;
|
|
|
|
/*
|
|
* Can't start backends when in startup/shutdown/inconsistent recovery
|
|
* state.
|
|
*
|
|
* In state PM_WAIT_BACKUP only superusers can connect (this must be
|
|
* allowed so that a superuser can end online backup mode); we return
|
|
* CAC_WAITBACKUP code to indicate that this must be checked later. Note
|
|
* that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
|
|
* have checked for too many children.
|
|
*/
|
|
if (pmState != PM_RUN) {
|
|
if (pmState == PM_WAIT_BACKUP)
|
|
result = CAC_WAITBACKUP; /* allow superusers only */
|
|
else if (g_instance.status > NoShutdown || g_instance.demotion > NoDemote)
|
|
return CAC_SHUTDOWN; /* shutdown is pending */
|
|
else if (!g_instance.fatal_error && (pmState == PM_STARTUP || pmState == PM_RECOVERY))
|
|
return CAC_STARTUP; /* normal startup */
|
|
else if (!g_instance.fatal_error && pmState == PM_HOT_STANDBY)
|
|
result = CAC_OK; /* connection OK during hot standby */
|
|
else
|
|
return CAC_RECOVERY; /* else must be crash recovery */
|
|
}
|
|
|
|
if (isSession)
|
|
return result;
|
|
|
|
/*
|
|
* Don't start too many children.
|
|
*
|
|
* We allow more connections than we can have backends here because some
|
|
* might still be authenticating; they might fail auth, or some existing
|
|
* backend might exit before the auth cycle is completed. The exact
|
|
* g_instance.shmem_cxt.MaxBackends limit is enforced when a new backend tries to join the
|
|
* shared-inval backend array.
|
|
*
|
|
* The limit here must match the sizes of the per-child-process arrays;
|
|
* see comments for MaxLivePostmasterChildren().
|
|
*/
|
|
if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
|
|
result = CAC_TOOMANY;
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* ConnCreateToRecvGssock -- For logic connection from CN, we generate gs_socket
|
|
* for logic connection in receiver flow control thread, we use this function to generate
|
|
* a port and receive gs_socket from receiver flow control thread by unix domain socket.
|
|
*
|
|
* Returns NULL on failure, Returns Port with gs_sock on succeed.
|
|
*/
|
|
static Port* ConnCreateToRecvGssock(pollfd* ufds, int idx, int* nSockets)
|
|
{
|
|
Port* port = NULL;
|
|
int error;
|
|
/*
|
|
* receiver flow ctrl receive logic connection request
|
|
* and no unix domain socket between receiver flow ctrl and server loop
|
|
* so receiver flow ctrl make connection and listening fd is polled up
|
|
*/
|
|
if (ufds[idx].fd == t_thrd.libpq_cxt.listen_fd_for_recv_flow_ctrl) {
|
|
port = ConnCreate(ufds[idx].fd);
|
|
|
|
if (port == NULL)
|
|
return port;
|
|
|
|
/* clean old socket if exist */
|
|
if (t_thrd.postmaster_cxt.sock_for_libcomm != PGINVALID_SOCKET) {
|
|
ConnFree((void*)t_thrd.postmaster_cxt.port_for_libcomm);
|
|
close(t_thrd.postmaster_cxt.sock_for_libcomm);
|
|
t_thrd.postmaster_cxt.sock_for_libcomm = PGINVALID_SOCKET;
|
|
t_thrd.postmaster_cxt.port_for_libcomm = NULL;
|
|
ufds[--(*nSockets)].fd = PGINVALID_SOCKET;
|
|
}
|
|
|
|
/* save the port and socket for connection between receiver flow ctrl and server loop */
|
|
t_thrd.postmaster_cxt.port_for_libcomm = port;
|
|
t_thrd.postmaster_cxt.sock_for_libcomm = port->sock;
|
|
/* add in ufds and it will polled up when receive a new gs_sock */
|
|
ufds[*nSockets].events = POLLIN | POLLPRI;
|
|
ufds[*nSockets].revents = 0;
|
|
ufds[(*nSockets)++].fd = t_thrd.postmaster_cxt.sock_for_libcomm;
|
|
}
|
|
/* sock_for_libcomm is polled up when recv flow ctrl send gs_sock by unix_domain sock */
|
|
else {
|
|
port = t_thrd.postmaster_cxt.port_for_libcomm;
|
|
if (port == NULL) {
|
|
return NULL;
|
|
}
|
|
port->sock = t_thrd.postmaster_cxt.sock_for_libcomm;
|
|
}
|
|
|
|
/* receive gs_sock by unix domain sock */
|
|
port->is_logic_conn = true;
|
|
error = gs_recv_msg_by_unix_domain(port->sock, &port->gs_sock);
|
|
|
|
/*
|
|
* for logic connection gs_sock is used for communication
|
|
* and port->sock is used to receive gs_sock, as we have already get gs_sock
|
|
* port->sock is unneeded for current connection, so set to -1.
|
|
* note: port->sock is an long_term socket, only one for each process
|
|
* it will be closed when receive error.
|
|
*/
|
|
if ((error > 0)) {
|
|
port->sock = PGINVALID_SOCKET;
|
|
return port;
|
|
}
|
|
|
|
ConnFree((void*)t_thrd.postmaster_cxt.port_for_libcomm);
|
|
t_thrd.postmaster_cxt.sock_for_libcomm = PGINVALID_SOCKET;
|
|
t_thrd.postmaster_cxt.port_for_libcomm = NULL;
|
|
port = NULL;
|
|
ufds[--(*nSockets)].fd = PGINVALID_SOCKET;
|
|
|
|
return port;
|
|
}
|
|
|
|
/*
|
|
* ConnCreate -- create a local connection data structure
|
|
*
|
|
* Returns NULL on failure, other than out-of-memory which is fatal.
|
|
*/
|
|
static Port* ConnCreate(int serverFd)
|
|
{
|
|
Port* port = NULL;
|
|
|
|
port =
|
|
(Port*)MemoryContextAllocZero(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR), 1 * sizeof(Port));
|
|
|
|
if (port == NULL) {
|
|
ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
port->sock = PGINVALID_SOCKET;
|
|
port->gs_sock = GS_INVALID_GSOCK;
|
|
|
|
if (StreamConnection(serverFd, port) != STATUS_OK) {
|
|
if (port->sock >= 0)
|
|
StreamClose(port->sock);
|
|
|
|
ConnFree((void*)port);
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Precompute password salt values to use for this connection. It's
|
|
* slightly annoying to do this long in advance of knowing whether we'll
|
|
* need 'em or not, but we must do the random() calls before we fork, not
|
|
* after. Else the postmaster's random sequence won't get advanced, and
|
|
* all backends would end up using the same salt...
|
|
* Use openssl RAND_priv_bytes interface to generate random salt, cast char to
|
|
* unsigned char here.
|
|
*/
|
|
int retval = RAND_priv_bytes((unsigned char*)port->md5Salt, sizeof(port->md5Salt));
|
|
if (retval != 1) {
|
|
ereport(ERROR, (errmsg("Failed to Generate the random number,errcode:%d", retval)));
|
|
}
|
|
|
|
/*
|
|
* Allocate GSSAPI specific state struct
|
|
*/
|
|
#ifndef EXEC_BACKEND
|
|
#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
|
|
port->gss = (pg_gssinfo*)MemoryContextAllocZero(
|
|
SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR), 1 * sizeof(pg_gssinfo));
|
|
|
|
if (!port->gss) {
|
|
ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
ExitPostmaster(1);
|
|
}
|
|
|
|
#endif
|
|
#endif
|
|
port->is_logic_conn = false;
|
|
port->gs_sock.type = GSOCK_INVALID;
|
|
|
|
return port;
|
|
}
|
|
|
|
/*
|
|
* ConnFree -- free a local connection data structure
|
|
*/
|
|
void ConnFree(void* conn)
|
|
{
|
|
int at_main_thread = (!IsPostmasterEnvironment || t_thrd.proc_cxt.MyProcPid == PostmasterPid) ? 1 : 0;
|
|
|
|
Port* tmp_conn = (Port*)conn;
|
|
#ifdef USE_SSL
|
|
secure_close(tmp_conn);
|
|
#endif
|
|
|
|
if (tmp_conn != NULL && tmp_conn->gss != NULL) {
|
|
if (0 != at_main_thread) {
|
|
pfree(tmp_conn->gss);
|
|
} else {
|
|
free(tmp_conn->gss);
|
|
}
|
|
}
|
|
|
|
if (0 != at_main_thread) {
|
|
pfree(tmp_conn);
|
|
} else {
|
|
free(tmp_conn);
|
|
tmp_conn = NULL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* ClosePostmasterPorts -- close all the postmaster's open sockets
|
|
*
|
|
* This is called during child process startup to release file descriptors
|
|
* that are not needed by that child process. The postmaster still has
|
|
* them open, of course.
|
|
*
|
|
* Note: we pass am_syslogger as a boolean because we don't want to set
|
|
* the global variable yet when this is called.
|
|
*/
|
|
void ClosePostmasterPorts(bool am_syslogger)
|
|
{
|
|
int i;
|
|
|
|
#ifndef WIN32
|
|
|
|
/*
|
|
* Close the write end of postmaster death watch pipe. It's important to
|
|
* do this as early as possible, so that if postmaster dies, others won't
|
|
* think that it's still running because we're holding the pipe open.
|
|
*/
|
|
if (close(t_thrd.postmaster_cxt.postmaster_alive_fds[POSTMASTER_FD_OWN]))
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(),
|
|
errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
|
|
|
|
t_thrd.postmaster_cxt.postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
|
|
#endif
|
|
|
|
/* Close the listen sockets */
|
|
for (i = 0; i < MAXLISTEN; i++) {
|
|
if (t_thrd.postmaster_cxt.ListenSocket[i] != PGINVALID_SOCKET) {
|
|
StreamClose(t_thrd.postmaster_cxt.ListenSocket[i]);
|
|
t_thrd.postmaster_cxt.ListenSocket[i] = PGINVALID_SOCKET;
|
|
}
|
|
}
|
|
/* If using syslogger, close the read side of the pipe */
|
|
if (!am_syslogger) {
|
|
#ifndef WIN32
|
|
|
|
if (t_thrd.postmaster_cxt.syslogPipe[0] >= 0)
|
|
close(t_thrd.postmaster_cxt.syslogPipe[0]);
|
|
|
|
t_thrd.postmaster_cxt.syslogPipe[0] = -1;
|
|
#else
|
|
|
|
if (t_thrd.postmaster_cxt.syslogPipe[0])
|
|
CloseHandle(t_thrd.postmaster_cxt.syslogPipe[0]);
|
|
|
|
t_thrd.postmaster_cxt.syslogPipe[0] = 0;
|
|
#endif
|
|
}
|
|
|
|
#ifdef USE_BONJOUR
|
|
|
|
/* If using Bonjour, close the connection to the mDNS daemon */
|
|
if (bonjour_sdref)
|
|
close(DNSServiceRefSockFD(bonjour_sdref));
|
|
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* on_proc_exit callback to close server's listen sockets
|
|
*/
|
|
static void CloseServerPorts(int status, Datum arg)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* First, explicitly close all the socket FDs. We used to just let this
|
|
* happen implicitly at postmaster exit, but it's better to close them
|
|
* before we remove the postmaster.pid lockfile; otherwise there's a race
|
|
* condition if a new postmaster wants to re-use the TCP port number.
|
|
*/
|
|
for (i = 0; i < MAXLISTEN; i++) {
|
|
if (t_thrd.postmaster_cxt.ListenSocket[i] != PGINVALID_SOCKET) {
|
|
StreamClose(t_thrd.postmaster_cxt.ListenSocket[i]);
|
|
t_thrd.postmaster_cxt.ListenSocket[i] = PGINVALID_SOCKET;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Removal of the Unix socket file and socket lockfile will happen in
|
|
* later on_proc_exit callbacks.
|
|
*/
|
|
}
|
|
|
|
void socket_close_on_exec(void)
|
|
{
|
|
/* Close the listen sockets */
|
|
for (int i = 0; i < MAXLISTEN; i++) {
|
|
if (t_thrd.postmaster_cxt.ListenSocket[i] != PGINVALID_SOCKET) {
|
|
int flags = fcntl(t_thrd.postmaster_cxt.ListenSocket[i], F_GETFD);
|
|
if (flags < 0)
|
|
ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("fcntl F_GETFD failed!")));
|
|
|
|
flags |= FD_CLOEXEC;
|
|
if (fcntl(t_thrd.postmaster_cxt.ListenSocket[i], F_SETFD, flags) < 0)
|
|
ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("fcntl F_SETFD failed!")));
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* reset_shared -- reset shared memory and semaphores
|
|
*/
|
|
static void reset_shared(int port)
|
|
{
|
|
/*
|
|
* Create or re-create shared memory and semaphores.
|
|
*
|
|
* Note: in each "cycle of life" we will normally assign the same IPC keys
|
|
* (if using SysV shmem and/or semas), since the port number is used to
|
|
* determine IPC keys. This helps ensure that we will clean up dead IPC
|
|
* objects if the postmaster crashes and is restarted.
|
|
*/
|
|
CreateSharedMemoryAndSemaphores(false, port);
|
|
}
|
|
|
|
/*
|
|
* SIGHUP -- reread config files, and tell children to do same
|
|
*/
|
|
static void SIGHUP_handler(SIGNAL_ARGS)
|
|
{
|
|
int save_errno = errno;
|
|
ConfFileLock filelock = {NULL, 0};
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
|
|
if (g_instance.status <= SmartShutdown && g_instance.demotion <= SmartDemote) {
|
|
|
|
char gucconf_lock_file[MAXPGPATH] = {0};
|
|
ereport(LOG, (errmsg("received SIGHUP, reloading configuration files")));
|
|
int rc = snprintf_s(gucconf_lock_file,
|
|
sizeof(gucconf_lock_file),
|
|
MAXPGPATH - 1,
|
|
"%s/postgresql.conf.lock",
|
|
t_thrd.proc_cxt.DataDir);
|
|
securec_check_intval(rc, , );
|
|
if (get_file_lock(gucconf_lock_file, &filelock) != CODE_OK) {
|
|
ereport(WARNING, (errmsg("the last sigup signal is processing,get file lock failed.")));
|
|
(void)PG_SETMASK(&t_thrd.libpq_cxt.UnBlockSig);
|
|
errno = save_errno;
|
|
return;
|
|
}
|
|
|
|
ProcessConfigFile(PGC_SIGHUP);
|
|
release_file_lock(&filelock);
|
|
|
|
(void)SignalChildren(SIGHUP);
|
|
if (ENABLE_THREAD_POOL) {
|
|
g_threadPoolControler->GetSessionCtrl()->SigHupHandler();
|
|
g_threadPoolControler->GetScheduler()->SigHupHandler();
|
|
}
|
|
|
|
if (g_instance.pid_cxt.StartupPID != 0)
|
|
signal_child(g_instance.pid_cxt.StartupPID, SIGHUP);
|
|
|
|
#ifdef PGXC /* PGXC_COORD */
|
|
if (
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
IS_PGXC_COORDINATOR &&
|
|
#endif
|
|
g_instance.pid_cxt.TwoPhaseCleanerPID != 0)
|
|
signal_child(g_instance.pid_cxt.TwoPhaseCleanerPID, SIGHUP);
|
|
|
|
if (GTM_LITE_CN && g_instance.pid_cxt.CsnminSyncPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CsnminSyncPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.FaultMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.FaultMonitorPID, SIGHUP);
|
|
#endif
|
|
|
|
if (START_BARRIER_CREATOR && g_instance.pid_cxt.BarrierCreatorPID != 0) {
|
|
signal_child(g_instance.pid_cxt.BarrierCreatorPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.BgWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.BgWriterPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CheckpointerPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.CheckpointerPID, SIGHUP);
|
|
}
|
|
if (g_instance.pid_cxt.PageWriterPID != NULL) {
|
|
int i;
|
|
for (i = 0; i < g_instance.attr.attr_storage.pagewriter_thread_num; i++) {
|
|
if (g_instance.pid_cxt.PageWriterPID[i] != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.PageWriterPID[i], SIGHUP);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CkptBgWriterPID != NULL) {
|
|
int thread_num = g_instance.attr.attr_storage.bgwriter_thread_num;
|
|
thread_num = thread_num > 0 ? thread_num : 1;
|
|
for (int i = 0; i < thread_num; i++) {
|
|
if (g_instance.pid_cxt.CkptBgWriterPID[i] != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.CkptBgWriterPID[i], SIGHUP);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WalWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalWriterPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.WalRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalRcvWriterPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.WalReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalReceiverPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.DataRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataRcvWriterPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.DataReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataReceiverPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.AutoVacPID != 0)
|
|
signal_child(g_instance.pid_cxt.AutoVacPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.PgArchPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgArchPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.SysLoggerPID != 0)
|
|
signal_child(g_instance.pid_cxt.SysLoggerPID, SIGHUP);
|
|
/* signal the auditor process */
|
|
if (g_instance.pid_cxt.PgAuditPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.PgAuditPID, SIGHUP);
|
|
}
|
|
if (g_instance.pid_cxt.PgStatPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgStatPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.SnapshotPID != 0)
|
|
signal_child(g_instance.pid_cxt.SnapshotPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.AshPID != 0)
|
|
signal_child(g_instance.pid_cxt.AshPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.StatementPID != 0)
|
|
signal_child(g_instance.pid_cxt.StatementPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.AlarmCheckerPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.AlarmCheckerPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.ReaperBackendPID != 0) {
|
|
signal_child(g_instance.pid_cxt.ReaperBackendPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WLMCollectPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMCollectPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.WLMMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMMonitorPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.WLMArbiterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMArbiterPID, SIGHUP);
|
|
|
|
if (g_instance.pid_cxt.CBMWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.CBMWriterPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.RemoteServicePID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.RemoteServicePID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.PercentilePID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.PercentilePID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.HeartbeatPID != 0) {
|
|
signal_child(g_instance.pid_cxt.HeartbeatPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommSenderFlowPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommSenderFlowPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommReceiverFlowPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommReceiverFlowPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommAuxiliaryPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommAuxiliaryPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommPoolerCleanPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommPoolerCleanPID, SIGHUP);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommReceiverPIDS != NULL) {
|
|
int recv_loop = 0;
|
|
for (recv_loop = 0; recv_loop < g_instance.attr.attr_network.comm_max_receiver; recv_loop++) {
|
|
if (g_instance.pid_cxt.CommReceiverPIDS[recv_loop] != 0) {
|
|
signal_child(g_instance.pid_cxt.CommReceiverPIDS[recv_loop], SIGHUP);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (g_instance.pid_cxt.TsCompactionPID != 0) {
|
|
signal_child(g_instance.pid_cxt.TsCompactionPID, SIGHUP);
|
|
}
|
|
if (g_instance.pid_cxt.TsCompactionAuxiliaryPID != 0) {
|
|
signal_child(g_instance.pid_cxt.TsCompactionAuxiliaryPID, SIGHUP);
|
|
}
|
|
(void)streaming_backend_manager(STREAMING_BACKEND_SIGHUP);
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
/* Reload authentication config files too */
|
|
int loadhbaCount = 0;
|
|
|
|
(void)pthread_rwlock_wrlock(&hba_rwlock);
|
|
while (!load_hba()) {
|
|
loadhbaCount++;
|
|
pg_usleep(200000L); // slepp 200ms for reload
|
|
if (loadhbaCount >= 3) {
|
|
/*
|
|
* It makes no sense to continue if we fail to load the HBA file,
|
|
* since there is no way to connect to the database in this case.
|
|
*/
|
|
ereport(WARNING, (errmsg("pg_hba.conf not reloaded")));
|
|
break;
|
|
}
|
|
}
|
|
(void)pthread_rwlock_unlock(&hba_rwlock);
|
|
|
|
load_ident();
|
|
|
|
/* Reload license file. */
|
|
signalReloadLicenseHandler(SIGHUP);
|
|
|
|
#ifdef EXEC_BACKEND
|
|
/* Update the starting-point file for future children */
|
|
write_nondefault_variables(PGC_SIGHUP);
|
|
#endif
|
|
}
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
|
|
|
errno = save_errno;
|
|
}
|
|
/*
|
|
* SIGBUS -- When uce failure occurs in system memory, sigbus_handler will exit according to the region
|
|
of its logical address.
|
|
1. Calculate the buffer pool address range to determine whether the error address is in the buffer pool.
|
|
2. For addresses outside the buffer pool range, print the NIC log and exit
|
|
3. For addresses within the buffer pool range, calculate block_id and judge whether the page is dirty
|
|
4. If the page is not dirty, execute pmdie to exit normally and print warning message. If the page is dirty,
|
|
print the PANIC log and exit
|
|
*/
|
|
void SIGBUS_handler(SIGNAL_ARGS)
|
|
{
|
|
uint64 buffer_size;
|
|
int buf_id;
|
|
int si_code = g_instance.sigbus_cxt.sigbus_code;
|
|
unsigned long long sigbus_addr = (unsigned long long)g_instance.sigbus_cxt.sigbus_addr;
|
|
if (si_code != SIGBUS_MCEERR_AR && si_code != SIGBUS_MCEERR_AO) {
|
|
ereport(PANIC,
|
|
(errcode(ERRCODE_UE_COMMON_ERROR),
|
|
errmsg("errcode:%u, SIGBUS signal received, Gaussdb will shut down immediately",
|
|
ERRCODE_UE_COMMON_ERROR)));
|
|
}
|
|
#ifdef __aarch64__
|
|
buffer_size = g_instance.attr.attr_storage.NBuffers * (Size)BLCKSZ + PG_CACHE_LINE_SIZE;
|
|
#else
|
|
buffer_size = g_instance.attr.attr_storage.NBuffers * (Size)BLCKSZ;
|
|
#endif
|
|
unsigned long long startaddr = (unsigned long long)t_thrd.storage_cxt.BufferBlocks;
|
|
unsigned long long endaddr = startaddr + buffer_size;
|
|
/* Determine the range of address carried by sigbus, And print the log according to the page state. */
|
|
if (sigbus_addr >= startaddr && sigbus_addr <= endaddr) {
|
|
buf_id = floor((sigbus_addr - startaddr) / (Size)BLCKSZ);
|
|
BufferDesc* buf_desc = GetBufferDescriptor(buf_id);
|
|
if (buf_desc->state & BM_DIRTY || buf_desc->state & BM_JUST_DIRTIED || buf_desc->state & BM_CHECKPOINT_NEEDED ||
|
|
buf_desc->state & BM_IO_IN_PROGRESS) {
|
|
ereport(PANIC,
|
|
(errcode(ERRCODE_UE_DIRTY_PAGE),
|
|
errmsg("errcode:%u, Uncorrected Error occurred at dirty page. The error address is: 0x%llx. Gaussdb will shut "
|
|
"down immediately.",
|
|
ERRCODE_UE_DIRTY_PAGE, sigbus_addr)));
|
|
} else {
|
|
ereport(WARNING,
|
|
(errcode(ERRCODE_UE_CLEAN_PAGE),
|
|
errmsg("errcode:%u, Uncorrected Error occurred at clean/free page. The error address is: 0x%llx. GaussDB will "
|
|
"shutdown.",
|
|
ERRCODE_UE_CLEAN_PAGE, sigbus_addr)));
|
|
pmdie(SIGBUS);
|
|
}
|
|
} else if (sigbus_addr == 0) {
|
|
ereport(PANIC,
|
|
(errcode(ERRCODE_UE_COMMON_ERROR),
|
|
errmsg("errcode:%u, SIGBUS signal received, sigbus_addr is None. Gaussdb will shut down immediately",
|
|
ERRCODE_UE_COMMON_ERROR)));
|
|
} else {
|
|
ereport(PANIC,
|
|
(errcode(ERRCODE_UE_COMMON_ERROR),
|
|
errmsg("errcode:%u, SIGBUS signal received. The error address is: 0x%llx, Gaussdb will shut down immediately",
|
|
ERRCODE_UE_COMMON_ERROR, sigbus_addr)));
|
|
}
|
|
}
|
|
|
|
void KillGraceThreads(void)
|
|
{
|
|
if (g_instance.pid_cxt.PgStatPID != 0) {
|
|
signal_child(g_instance.pid_cxt.PgStatPID, SIGQUIT);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.AlarmCheckerPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.AlarmCheckerPID, SIGQUIT);
|
|
}
|
|
}
|
|
|
|
static void NotifyShutdown(void)
|
|
{
|
|
will_shutdown = true;
|
|
return;
|
|
}
|
|
|
|
static void NotifyProcessActive(void)
|
|
{
|
|
will_shutdown = false;
|
|
return;
|
|
}
|
|
/*
|
|
* pmdie -- signal handler for processing various postmaster signals.
|
|
*/
|
|
|
|
static void pmdie(SIGNAL_ARGS)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
|
|
ereport(DEBUG2, (errmsg_internal("postmaster received signal %d", postgres_signal_arg)));
|
|
NotifyShutdown();
|
|
|
|
switch (postgres_signal_arg) {
|
|
case SIGTERM:
|
|
case SIGINT:
|
|
case SIGBUS:
|
|
|
|
if (STANDBY_MODE == t_thrd.postmaster_cxt.HaShmData->current_mode && !dummyStandbyMode &&
|
|
SIGTERM == postgres_signal_arg) {
|
|
/*
|
|
* Smart g_instance.status:
|
|
*
|
|
* Wait for children to end their work, then shut down.
|
|
*/
|
|
if (g_instance.status >= SmartShutdown)
|
|
break;
|
|
|
|
g_instance.status = SmartShutdown;
|
|
ereport(LOG, (errmsg("received smart shutdown request")));
|
|
|
|
/* Audit system stop */
|
|
pgaudit_system_stop_ok(SmartShutdown);
|
|
} else {
|
|
/*
|
|
* Fast g_instance.status:
|
|
*
|
|
* Abort all children with SIGTERM (rollback active transactions
|
|
* and exit) and shut down when they are gone.
|
|
*/
|
|
if (g_instance.status >= FastShutdown)
|
|
break;
|
|
|
|
g_instance.status = FastShutdown;
|
|
ereport(LOG, (errmsg("received fast shutdown request")));
|
|
|
|
/* Audit system stop */
|
|
pgaudit_system_stop_ok(FastShutdown);
|
|
}
|
|
|
|
if (pmState == PM_STARTUP || pmState == PM_INIT) {
|
|
KillGraceThreads();
|
|
WaitGraceThreadsExit();
|
|
|
|
// threading: do not clean sema, maybe other thread is using it.
|
|
cancelSemphoreRelease();
|
|
cancelIpcMemoryDetach();
|
|
|
|
ExitPostmaster(0);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.StartupPID != 0)
|
|
signal_child(g_instance.pid_cxt.StartupPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.BgWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.BgWriterPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WalRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalRcvWriterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WalReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalReceiverPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.DataRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataRcvWriterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.DataReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataReceiverPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.FaultMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.FaultMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.TwoPhaseCleanerPID != 0)
|
|
signal_child(g_instance.pid_cxt.TwoPhaseCleanerPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WLMCollectPID != 0) {
|
|
WLMProcessThreadShutDown();
|
|
signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.SnapshotPID != 0) {
|
|
WLMProcessThreadShutDown();
|
|
signal_child(g_instance.pid_cxt.SnapshotPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.AshPID != 0) {
|
|
WLMProcessThreadShutDown();
|
|
signal_child(g_instance.pid_cxt.AshPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.StatementPID != 0) {
|
|
WLMProcessThreadShutDown();
|
|
signal_child(g_instance.pid_cxt.StatementPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.PercentilePID != 0) {
|
|
WLMProcessThreadShutDown();
|
|
signal_child(g_instance.pid_cxt.PercentilePID, SIGTERM);
|
|
}
|
|
if (g_instance.pid_cxt.WLMMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WLMArbiterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMArbiterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.CPMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.CPMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.CBMWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.CBMWriterPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.RemoteServicePID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.RemoteServicePID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommSenderFlowPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommSenderFlowPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommReceiverFlowPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommReceiverFlowPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommAuxiliaryPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommAuxiliaryPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommPoolerCleanPID != 0) {
|
|
signal_child(g_instance.pid_cxt.CommPoolerCleanPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CommReceiverPIDS != NULL) {
|
|
int recv_loop = 0;
|
|
for (recv_loop = 0; recv_loop < g_instance.attr.attr_network.comm_max_receiver; recv_loop++) {
|
|
if (g_instance.pid_cxt.CommReceiverPIDS[recv_loop] != 0) {
|
|
signal_child(g_instance.pid_cxt.CommReceiverPIDS[recv_loop], SIGTERM);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (g_instance.pid_cxt.BarrierCreatorPID != 0) {
|
|
barrier_creator_thread_shutdown();
|
|
signal_child(g_instance.pid_cxt.BarrierCreatorPID, SIGTERM);
|
|
}
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (g_instance.pid_cxt.CsnminSyncPID != 0) {
|
|
csnminsync_thread_shutdown();
|
|
signal_child(g_instance.pid_cxt.CsnminSyncPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.TsCompactionPID != 0) {
|
|
signal_child(g_instance.pid_cxt.TsCompactionPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.TsCompactionAuxiliaryPID != 0) {
|
|
signal_child(g_instance.pid_cxt.TsCompactionAuxiliaryPID, SIGTERM);
|
|
}
|
|
|
|
(void)streaming_backend_manager(STREAMING_BACKEND_SIGTERM);
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
if (ENABLE_THREAD_POOL && (pmState == PM_RECOVERY || pmState == PM_STARTUP)) {
|
|
/*
|
|
* Although there is not connections from client at PM_RECOVERY and PM_STARTUP
|
|
* state, we still have to close thread pool thread.
|
|
*/
|
|
g_threadPoolControler->ShutDownThreads();
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) {
|
|
signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM);
|
|
}
|
|
|
|
if (pmState == PM_RECOVERY) {
|
|
/*
|
|
* Only startup, bgwriter, walreceiver, and/or checkpointer
|
|
* should be active in this state; we just signaled the first
|
|
* three, and we don't want to kill checkpointer yet.
|
|
*/
|
|
pmState = PM_WAIT_BACKENDS;
|
|
} else if (pmState == PM_RUN || pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_READONLY ||
|
|
pmState == PM_WAIT_BACKENDS || pmState == PM_HOT_STANDBY) {
|
|
ereport(LOG, (errmsg("aborting any active transactions")));
|
|
|
|
if (ENABLE_THREAD_POOL) {
|
|
g_threadPoolControler->CloseAllSessions();
|
|
g_threadPoolControler->ShutDownThreads();
|
|
}
|
|
/* shut down all backends and autovac workers */
|
|
(void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC);
|
|
|
|
/* and the autovac launcher too */
|
|
if (g_instance.pid_cxt.AutoVacPID != 0)
|
|
signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM);
|
|
|
|
/* and the walwriter too */
|
|
if (g_instance.pid_cxt.WalWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM);
|
|
|
|
|
|
pmState = PM_WAIT_BACKENDS;
|
|
}
|
|
/*
|
|
* Now wait for backends to exit. If there are none,
|
|
* PostmasterStateMachine will take the next step.
|
|
*/
|
|
PostmasterStateMachine();
|
|
break;
|
|
|
|
case SIGQUIT:
|
|
|
|
/*
|
|
* Immediate g_instance.status:
|
|
*
|
|
* exit whole process, do not send SIGQUIT to child.
|
|
*/
|
|
ereport(LOG, (errmsg("received immediate shutdown request")));
|
|
/* Audit system stop */
|
|
pgaudit_system_stop_ok(ImmediateShutdown);
|
|
g_instance.status = ImmediateShutdown;
|
|
|
|
KillGraceThreads();
|
|
WaitGraceThreadsExit();
|
|
|
|
// threading: do not clean sema, maybe other thread is using it.
|
|
//
|
|
cancelSemphoreRelease();
|
|
cancelIpcMemoryDetach();
|
|
|
|
ExitPostmaster(0);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
NotifyProcessActive();
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
|
|
|
errno = save_errno;
|
|
}
|
|
|
|
/* set walsenders node state */
|
|
static void SetWalsndsNodeState(ClusterNodeState requester, ClusterNodeState others)
|
|
{
|
|
int i;
|
|
|
|
/* update the demote state */
|
|
for (i = 0; i < g_instance.attr.attr_storage.max_wal_senders; i++) {
|
|
/* use volatile pointer to prevent code rearrangement */
|
|
volatile WalSnd* walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[i];
|
|
|
|
if (0 == walsnd->pid)
|
|
continue;
|
|
|
|
if (walsnd->node_state >= NODESTATE_SMART_DEMOTE_REQUEST && walsnd->node_state <= NODESTATE_FAST_DEMOTE_REQUEST)
|
|
walsnd->node_state = requester;
|
|
else
|
|
walsnd->node_state = others;
|
|
}
|
|
}
|
|
|
|
/* prepare to response to standby for switchover */
|
|
static void PrepareDemoteResponse(void)
|
|
{
|
|
if (NoDemote == g_instance.demotion)
|
|
return;
|
|
|
|
SetWalsndsNodeState(NODESTATE_PROMOTE_APPROVE, NODESTATE_STANDBY_REDIRECT);
|
|
|
|
/*
|
|
* For standby demote to a cascade standby, it is safe
|
|
* to change servermode here when promote has been approved.
|
|
*/
|
|
if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) {
|
|
volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
|
|
/* A standby instance will be demote to a cascade standby */
|
|
SpinLockAcquire(&hashmdata->mutex);
|
|
hashmdata->current_mode = STANDBY_MODE;
|
|
hashmdata->is_cascade_standby = true;
|
|
SpinLockRelease(&hashmdata->mutex);
|
|
|
|
load_server_mode();
|
|
}
|
|
|
|
allow_immediate_pgstat_restart();
|
|
}
|
|
|
|
/* * process demote request from standby */
|
|
static void ProcessDemoteRequest(void)
|
|
{
|
|
DemoteMode mode;
|
|
|
|
/* get demote request type */
|
|
mode = t_thrd.walsender_cxt.WalSndCtl->demotion;
|
|
|
|
if (NoDemote == mode)
|
|
return;
|
|
|
|
/* check the postmaster state */
|
|
if (pmState != PM_RUN && pmState != PM_HOT_STANDBY && NoDemote == g_instance.demotion) {
|
|
SetWalsndsNodeState(NODESTATE_NORMAL, NODESTATE_NORMAL);
|
|
t_thrd.walsender_cxt.WalSndCtl->demotion = NoDemote;
|
|
ereport(NOTICE, (errmsg("postmaster state not in PM_RUN or PM_HOT_STANDBY, it would not demote.")));
|
|
return;
|
|
}
|
|
|
|
PMUpdateDBState(DEMOTING_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file: db state(DEMOTING_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
|
|
switch (mode) {
|
|
case SmartDemote:
|
|
/*
|
|
* Smart Demote:
|
|
*
|
|
* Wait for children to end their work, then start up as standby.
|
|
*/
|
|
if (g_instance.demotion >= SmartDemote)
|
|
break;
|
|
g_instance.demotion = SmartDemote;
|
|
ereport(LOG, (errmsg("received smart demote request")));
|
|
|
|
if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_STARTUP) {
|
|
/* autovacuum workers are told to shut down immediately */
|
|
(void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_AUTOVAC);
|
|
/* and the autovac launcher too */
|
|
if (g_instance.pid_cxt.AutoVacPID != 0)
|
|
signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM);
|
|
|
|
if ((IS_PGXC_COORDINATOR) && g_instance.pid_cxt.CommPoolerCleanPID != 0)
|
|
signal_child(g_instance.pid_cxt.CommPoolerCleanPID, SIGTERM);
|
|
|
|
/* and the bgwriter too */
|
|
if (g_instance.pid_cxt.BgWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.BgWriterPID, SIGTERM);
|
|
}
|
|
if (g_instance.pid_cxt.TwoPhaseCleanerPID != 0)
|
|
signal_child(g_instance.pid_cxt.TwoPhaseCleanerPID, SIGTERM);
|
|
|
|
/* and the walwriter too */
|
|
if (g_instance.pid_cxt.WalWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.CBMWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.CBMWriterPID, SIGTERM);
|
|
}
|
|
|
|
// should do this ?
|
|
if (g_instance.pid_cxt.RemoteServicePID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.RemoteServicePID, SIGTERM);
|
|
}
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
(void)streaming_backend_manager(STREAMING_BACKEND_SIGTERM);
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
StopAliveBuildSender();
|
|
/*
|
|
* If we're in recovery, we can't kill the startup process
|
|
* right away, because at present doing so does not release
|
|
* its locks. We might want to change this in a future
|
|
* release. For the time being, the PM_WAIT_READONLY state
|
|
* indicates that we're waiting for the regular (read only)
|
|
* backends to die off; once they do, we'll kill the startup
|
|
* and walreceiver processes.
|
|
*/
|
|
pmState = (pmState == PM_RUN) ? PM_WAIT_BACKUP : PM_WAIT_READONLY;
|
|
}
|
|
|
|
break;
|
|
|
|
case FastDemote:
|
|
/*
|
|
* Fast Demote:
|
|
*
|
|
* Abort all children with SIGTERM (rollback active transactions
|
|
* and exit) and start up when they are gone.
|
|
*/
|
|
if (g_instance.demotion >= FastDemote)
|
|
break;
|
|
g_instance.demotion = FastDemote;
|
|
ereport(LOG, (errmsg("received fast demote request")));
|
|
|
|
if (g_instance.pid_cxt.StartupPID != 0)
|
|
signal_child(g_instance.pid_cxt.StartupPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.BgWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.BgWriterPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WalRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalRcvWriterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WalReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalReceiverPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.DataRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataRcvWriterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.DataReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataReceiverPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.HeartbeatPID != 0)
|
|
signal_child(g_instance.pid_cxt.HeartbeatPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.TwoPhaseCleanerPID != 0)
|
|
signal_child(g_instance.pid_cxt.TwoPhaseCleanerPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WLMCollectPID != 0) {
|
|
WLMProcessThreadShutDown();
|
|
signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WLMMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WLMArbiterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMArbiterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.CPMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.CPMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.FaultMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.FaultMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.CBMWriterPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.CBMWriterPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.RemoteServicePID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.RemoteServicePID, SIGTERM);
|
|
}
|
|
|
|
/* as single_node model will start WLM & Snaphost, but dont't terminate it whiele switchover, now kill it */
|
|
if (g_instance.pid_cxt.WLMCollectPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.SnapshotPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.SnapshotPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.AshPID!= 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.AshPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.StatementPID!= 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.StatementPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.PercentilePID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.PercentilePID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.BarrierCreatorPID != 0) {
|
|
barrier_creator_thread_shutdown();
|
|
signal_child(g_instance.pid_cxt.BarrierCreatorPID, SIGTERM);
|
|
}
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (g_instance.pid_cxt.CsnminSyncPID != 0) {
|
|
csnminsync_thread_shutdown();
|
|
signal_child(g_instance.pid_cxt.CsnminSyncPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.TsCompactionPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.TsCompactionPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.TsCompactionAuxiliaryPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.TsCompactionAuxiliaryPID, SIGTERM);
|
|
}
|
|
|
|
(void)streaming_backend_manager(STREAMING_BACKEND_SIGTERM);
|
|
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
if (pmState == PM_RECOVERY) {
|
|
/*
|
|
* Only startup, bgwriter, and checkpointer should be active
|
|
* in this state; we just signaled the first two, and we don't
|
|
* want to kill checkpointer yet.
|
|
*/
|
|
pmState = PM_WAIT_BACKENDS;
|
|
} else if (pmState == PM_RUN || pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_READONLY ||
|
|
pmState == PM_WAIT_BACKENDS || pmState == PM_HOT_STANDBY) {
|
|
ereport(LOG, (errmsg("aborting any active transactions")));
|
|
|
|
if (ENABLE_THREAD_POOL) {
|
|
g_threadPoolControler->CloseAllSessions();
|
|
g_threadPoolControler->ShutDownThreads();
|
|
}
|
|
/* shut down all backends and autovac workers */
|
|
(void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC);
|
|
|
|
/* and the autovac launcher too */
|
|
if (g_instance.pid_cxt.AutoVacPID != 0)
|
|
signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM);
|
|
|
|
if ((IS_PGXC_COORDINATOR) && g_instance.pid_cxt.CommPoolerCleanPID != 0)
|
|
signal_child(g_instance.pid_cxt.CommPoolerCleanPID, SIGTERM);
|
|
|
|
/* and the walwriter too */
|
|
if (g_instance.pid_cxt.WalWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM);
|
|
StopAliveBuildSender();
|
|
|
|
if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM);
|
|
|
|
pmState = PM_WAIT_BACKENDS;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* Reset the seqno for matview */
|
|
MatviewShmemSetInvalid();
|
|
|
|
/*
|
|
* Now wait for backends to exit. If there are none,
|
|
* PostmasterStateMachine will take the next step.
|
|
*/
|
|
PostmasterStateMachine();
|
|
}
|
|
|
|
/*
|
|
* Reaper -- signal handler to cleanup after a child process dies.
|
|
*/
|
|
static void reaper(SIGNAL_ARGS)
|
|
{
|
|
int save_errno = errno;
|
|
ThreadId pid; /* process id of dead child process */
|
|
long exitstatus; /* its exit status */
|
|
int* status = NULL;
|
|
ThreadId oldpid = 0;
|
|
|
|
#define LOOPTEST() (pid = gs_thread_id(t_thrd.postmaster_cxt.CurExitThread))
|
|
#define LOOPHEADER() (exitstatus = (long)(intptr_t)status)
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
ereport(DEBUG4, (errmsg_internal("reaping dead processes")));
|
|
|
|
for (;;) {
|
|
LOOPTEST();
|
|
|
|
if (pid == oldpid) {
|
|
break;
|
|
}
|
|
|
|
oldpid = pid;
|
|
if (gs_thread_join(t_thrd.postmaster_cxt.CurExitThread, (void**)&status) != 0) {
|
|
/*
|
|
* If the thread does not exist, treat it as normal exit and we continue to
|
|
* do our clean-up work. Otherwise, we treate it as crashed 'cause we do
|
|
* not know the current status of the thread and it's better to quit directly
|
|
* which sames more safely.
|
|
*/
|
|
if (ESRCH == pthread_kill(pid, 0)) {
|
|
exitstatus = 0;
|
|
ereport(LOG, (errmsg("failed to join thread %lu, no such process", pid)));
|
|
} else {
|
|
exitstatus = 1;
|
|
HandleChildCrash(pid, exitstatus, _(GetProcName(pid)));
|
|
}
|
|
} else {
|
|
LOOPHEADER();
|
|
ereport(DEBUG1, (errmsg("have joined thread %lu, exitstatus=%ld.", pid, exitstatus)));
|
|
}
|
|
|
|
/*
|
|
* Check if this child was a startup process.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.StartupPID) {
|
|
g_instance.pid_cxt.StartupPID = 0;
|
|
|
|
/*
|
|
* Startup process exited in response to a shutdown request (or it
|
|
* completed normally regardless of the shutdown request).
|
|
*/
|
|
if (g_instance.status > NoShutdown && (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus))) {
|
|
pmState = PM_WAIT_BACKENDS;
|
|
/* PostmasterStateMachine logic does the rest */
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Startup process exited in response to a standby demote request.
|
|
*/
|
|
if (g_instance.demotion > NoDemote &&
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE &&
|
|
(EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus))) {
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Unexpected exit of startup process (including FATAL exit)
|
|
* during PM_STARTUP is treated as catastrophic. There are no
|
|
* other processes running yet, so we can just exit.
|
|
*/
|
|
if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus)) {
|
|
LogChildExit(LOG, _("startup process"), pid, exitstatus);
|
|
ereport(LOG, (errmsg("aborting startup due to startup process failure")));
|
|
if (get_real_recovery_parallelism() > 1) {
|
|
HandleChildCrash(pid, exitstatus, _("startup process"));
|
|
} else {
|
|
/* Shut down threadpool worker before exit. */
|
|
if (ENABLE_THREAD_POOL) {
|
|
g_threadPoolControler->ShutDownThreads(true);
|
|
g_threadPoolControler->ShutDownListeners(true);
|
|
g_threadPoolControler->ShutDownScheduler(true);
|
|
}
|
|
ExitPostmaster(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* After PM_STARTUP, any unexpected exit (including FATAL exit) of
|
|
* the startup process is catastrophic, so kill other children,
|
|
* and set RecoveryError so we don't try to reinitialize after
|
|
* they're gone. Exception: if g_instance.fatal_error is already set, that
|
|
* implies we previously sent the startup process a SIGQUIT, so
|
|
* that's probably the reason it died, and we do want to try to
|
|
* restart in that case.
|
|
*/
|
|
if (!EXIT_STATUS_0(exitstatus)) {
|
|
if (!g_instance.fatal_error)
|
|
g_instance.recover_error = true;
|
|
|
|
HandleChildCrash(pid, exitstatus, _("startup process"));
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Startup succeeded, commence normal operations
|
|
*/
|
|
g_instance.fatal_error = false;
|
|
g_instance.demotion = NoDemote;
|
|
t_thrd.postmaster_cxt.ReachedNormalRunning = true;
|
|
pmState = PM_RUN;
|
|
|
|
if (t_thrd.postmaster_cxt.HaShmData && (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE ||
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode == PENDING_MODE)) {
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode = PRIMARY_MODE;
|
|
if (g_instance.pid_cxt.HeartbeatPID != 0)
|
|
signal_child(g_instance.pid_cxt.HeartbeatPID, SIGTERM);
|
|
UpdateOptsFile();
|
|
if (t_thrd.walreceiverfuncs_cxt.WalRcv)
|
|
t_thrd.walreceiverfuncs_cxt.WalRcv->node_state = NODESTATE_NORMAL;
|
|
}
|
|
|
|
/*
|
|
* Kill any walsenders to force the downstream standby(s) to
|
|
* reread the timeline history file, adjust their timelines and
|
|
* establish replication connections again. This is required
|
|
* because the timeline of cascading standby is not consistent
|
|
* with that of cascaded one just after failover. We LOG this
|
|
* message since we need to leave a record to explain this
|
|
* disconnection.
|
|
*
|
|
* XXX should avoid the need for disconnection. When we do,
|
|
* am_cascading_walsender should be replaced with
|
|
* RecoveryInProgress()
|
|
*/
|
|
if (g_instance.attr.attr_storage.max_wal_senders > 0 && CountChildren(BACKEND_TYPE_WALSND) > 0) {
|
|
ereport(LOG,
|
|
(errmsg("terminating all walsender processes to force cascaded "
|
|
"standby(s) to update timeline and reconnect")));
|
|
(void)SignalSomeChildren(SIGUSR2, BACKEND_TYPE_WALSND);
|
|
}
|
|
|
|
/*
|
|
* Crank up the background tasks, if we didn't do that already
|
|
* when we entered consistent recovery state. It doesn't matter
|
|
* if this fails, we'll just try again later.
|
|
*/
|
|
if (g_instance.pid_cxt.CheckpointerPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.CheckpointerPID = initialize_util_thread(CHECKPOINT_THREAD);
|
|
|
|
if (g_instance.pid_cxt.BgWriterPID == 0 && !dummyStandbyMode &&
|
|
!g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
g_instance.pid_cxt.BgWriterPID = initialize_util_thread(BGWRITER);
|
|
}
|
|
|
|
if (!dummyStandbyMode && g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
for (int i = 0; i < g_instance.attr.attr_storage.pagewriter_thread_num; i++) {
|
|
if (g_instance.pid_cxt.PageWriterPID[i] == 0) {
|
|
g_instance.pid_cxt.PageWriterPID[i] = initialize_util_thread(PAGEWRITER_THREAD);
|
|
}
|
|
}
|
|
|
|
int thread_num = g_instance.attr.attr_storage.bgwriter_thread_num;
|
|
thread_num = thread_num > 0 ? thread_num : 1;
|
|
for (int i = 0; i < thread_num; i++) {
|
|
if (g_instance.pid_cxt.CkptBgWriterPID[i] == 0) {
|
|
g_instance.pid_cxt.CkptBgWriterPID[i] = initialize_util_thread(BGWRITER);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WalWriterPID == 0)
|
|
g_instance.pid_cxt.WalWriterPID = initialize_util_thread(WALWRITER);
|
|
|
|
if (g_instance.pid_cxt.WalWriterAuxiliaryPID == 0)
|
|
g_instance.pid_cxt.WalWriterAuxiliaryPID = initialize_util_thread(WALWRITERAUXILIARY);
|
|
|
|
if (g_instance.pid_cxt.CBMWriterPID == 0 && !dummyStandbyMode &&
|
|
u_sess->attr.attr_storage.enable_cbm_tracking)
|
|
g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER);
|
|
|
|
/*
|
|
* Likewise, start other special children as needed. In a restart
|
|
* situation, some of them may be alive already.
|
|
*/
|
|
if (!u_sess->proc_cxt.IsBinaryUpgrade && AutoVacuumingActive() && g_instance.pid_cxt.AutoVacPID == 0 &&
|
|
!dummyStandbyMode && u_sess->attr.attr_common.upgrade_mode != 1 && !InplaceUpgradePrecommit)
|
|
g_instance.pid_cxt.AutoVacPID = initialize_util_thread(AUTOVACUUM_LAUNCHER);
|
|
|
|
/* Before GRAND VERSION NUM 81000, we do not support scheduled job. */
|
|
if (g_instance.pid_cxt.PgJobSchdPID == 0 &&
|
|
g_instance.attr.attr_sql.job_queue_processes && u_sess->attr.attr_common.upgrade_mode != 1)
|
|
g_instance.pid_cxt.PgJobSchdPID = initialize_util_thread(JOB_SCHEDULER);
|
|
|
|
if ((IS_PGXC_COORDINATOR) && g_instance.pid_cxt.CommPoolerCleanPID == 0 &&
|
|
u_sess->attr.attr_common.upgrade_mode != 1) {
|
|
StartPoolCleaner();
|
|
}
|
|
|
|
if (XLogArchivingActive() && g_instance.pid_cxt.PgArchPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.PgArchPID = pgarch_start();
|
|
|
|
if (g_instance.pid_cxt.PgStatPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.PgStatPID = pgstat_start();
|
|
|
|
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && g_instance.pid_cxt.SnapshotPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.SnapshotPID = snapshot_start();
|
|
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && g_instance.pid_cxt.PercentilePID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.PercentilePID = initialize_util_thread(PERCENTILE_WORKER);
|
|
|
|
if (ENABLE_ASP && g_instance.pid_cxt.AshPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.AshPID = initialize_util_thread(ASH_WORKER);
|
|
|
|
if (ENABLE_STATEMENT_TRACK && g_instance.pid_cxt.StatementPID == 0)
|
|
g_instance.pid_cxt.StatementPID = initialize_util_thread(TRACK_STMT_WORKER);
|
|
|
|
/* Database Security: Support database audit */
|
|
/* start auditor process */
|
|
/* start the audit collector as needed. */
|
|
if (g_instance.pid_cxt.PgAuditPID == 0 && u_sess->attr.attr_security.Audit_enabled && !dummyStandbyMode)
|
|
g_instance.pid_cxt.PgAuditPID = pgaudit_start();
|
|
|
|
if (t_thrd.postmaster_cxt.audit_primary_start && !t_thrd.postmaster_cxt.audit_primary_failover &&
|
|
!t_thrd.postmaster_cxt.audit_standby_switchover) {
|
|
pgaudit_system_start_ok(g_instance.attr.attr_network.PostPortNumber);
|
|
t_thrd.postmaster_cxt.audit_primary_start = false;
|
|
}
|
|
|
|
if (
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
IS_PGXC_COORDINATOR &&
|
|
#else
|
|
(t_thrd.postmaster_cxt.HaShmData->current_mode == NORMAL_MODE ||
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode == PRIMARY_MODE) &&
|
|
#endif
|
|
u_sess->attr.attr_common.upgrade_mode != 1 &&
|
|
g_instance.pid_cxt.TwoPhaseCleanerPID == 0)
|
|
g_instance.pid_cxt.TwoPhaseCleanerPID = initialize_util_thread(TWOPASECLEANER);
|
|
|
|
if (g_instance.pid_cxt.FaultMonitorPID == 0)
|
|
g_instance.pid_cxt.FaultMonitorPID = initialize_util_thread(FAULTMONITOR);
|
|
|
|
/* if workload manager is off, we still use this thread to build user hash table */
|
|
if ((ENABLE_WORKLOAD_CONTROL || !WLMIsInfoInit()) && g_instance.pid_cxt.WLMCollectPID == 0 &&
|
|
!dummyStandbyMode) {
|
|
/* DN need rebuild hash when upgrade to primary */
|
|
if (IS_PGXC_DATANODE)
|
|
g_instance.wlm_cxt->stat_manager.infoinit = 0;
|
|
g_instance.pid_cxt.WLMCollectPID = initialize_util_thread(WLM_WORKER);
|
|
}
|
|
|
|
if (ENABLE_WORKLOAD_CONTROL && (g_instance.pid_cxt.WLMMonitorPID == 0) && !dummyStandbyMode)
|
|
g_instance.pid_cxt.WLMMonitorPID = initialize_util_thread(WLM_MONITOR);
|
|
|
|
if (ENABLE_WORKLOAD_CONTROL && (g_instance.pid_cxt.WLMArbiterPID == 0) && !dummyStandbyMode)
|
|
g_instance.pid_cxt.WLMArbiterPID = initialize_util_thread(WLM_ARBITER);
|
|
|
|
if (IS_PGXC_COORDINATOR && g_instance.attr.attr_sql.max_resource_package &&
|
|
(g_instance.pid_cxt.CPMonitorPID == 0) && !dummyStandbyMode)
|
|
g_instance.pid_cxt.CPMonitorPID = initialize_util_thread(WLM_CPMONITOR);
|
|
|
|
if (g_instance.pid_cxt.RemoteServicePID == 0 && !dummyStandbyMode && IS_PGXC_DATANODE &&
|
|
t_thrd.postmaster_cxt.HaShmData && t_thrd.postmaster_cxt.HaShmData->current_mode != NORMAL_MODE &&
|
|
!IS_DN_WITHOUT_STANDBYS_MODE() && IsRemoteReadModeOn())
|
|
g_instance.pid_cxt.RemoteServicePID = initialize_util_thread(RPC_SERVICE);
|
|
|
|
if (NeedHeartbeat())
|
|
g_instance.pid_cxt.HeartbeatPID = initialize_util_thread(HEARTBEAT);
|
|
|
|
if (START_BARRIER_CREATOR && g_instance.pid_cxt.BarrierCreatorPID == 0 &&
|
|
XLogArchivingActive() && getObsReplicationSlot() != NULL) {
|
|
g_instance.pid_cxt.BarrierCreatorPID = initialize_util_thread(BARRIER_CREATOR);
|
|
}
|
|
|
|
if (GTM_LITE_CN && g_instance.pid_cxt.CsnminSyncPID == 0) {
|
|
g_instance.pid_cxt.CsnminSyncPID = initialize_util_thread(CSNMIN_SYNC);
|
|
}
|
|
|
|
PMUpdateDBState(NORMAL_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file: db state(NORMAL_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
|
|
/* at this point we are really open for business */
|
|
ereport(LOG, (errmsg("database system is ready to accept connections")));
|
|
|
|
continue;
|
|
}
|
|
|
|
if (g_threadPoolControler != NULL && pmState == PM_RUN &&
|
|
pid == g_threadPoolControler->GetScheduler()->GetThreadId() &&
|
|
g_threadPoolControler->GetScheduler()->HasShutDown() == true) {
|
|
g_threadPoolControler->GetScheduler()->StartUp();
|
|
g_threadPoolControler->GetScheduler()->SetShutDown(false);
|
|
continue;
|
|
}
|
|
/*
|
|
* Was it the bgwriter? Normal exit can be ignored; we'll start a new
|
|
* one at the next iteration of the postmaster's main loop, if
|
|
* necessary. Any other exit condition is treated as a crash.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.BgWriterPID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.BgWriterPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("background writer process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the bgwriter?
|
|
*/
|
|
if (g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
int i;
|
|
int thread_num = g_instance.attr.attr_storage.bgwriter_thread_num;
|
|
thread_num = thread_num > 0 ? thread_num : 1;
|
|
for (i = 0; i < thread_num; i++) {
|
|
if (pid == g_instance.pid_cxt.CkptBgWriterPID[i]) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.CkptBgWriterPID[i] = 0;
|
|
if (!EXIT_STATUS_0(exitstatus)) {
|
|
HandleChildCrash(pid, exitstatus, _("incre ckpt background writer process"));
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Was it the pagewriter?
|
|
*/
|
|
if (g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
int i;
|
|
for (i = 0; i < g_instance.attr.attr_storage.pagewriter_thread_num; i++) {
|
|
if (pid == g_instance.pid_cxt.PageWriterPID[i]) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.PageWriterPID[i] = 0;
|
|
if (!EXIT_STATUS_0(exitstatus)) {
|
|
HandleChildCrash(pid, exitstatus, _("page writer process"));
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Was it the checkpointer?
|
|
*/
|
|
if (pid == g_instance.pid_cxt.CheckpointerPID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.CheckpointerPID = 0;
|
|
|
|
if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN) {
|
|
/*
|
|
* OK, we saw normal exit of the checkpointer after it's been
|
|
* told to shut down. We expect that it wrote a shutdown
|
|
* checkpoint. (If for some reason it didn't, recovery will
|
|
* occur on next postmaster start.)
|
|
*
|
|
* At this point we should have no normal backend children
|
|
* left (else we'd not be in PM_SHUTDOWN state) but we might
|
|
* have dead_end children to wait for.
|
|
*
|
|
* If we have an archiver subprocess, tell it to do a last
|
|
* archive cycle and quit. Likewise, if we have walsender
|
|
* processes, tell them to send any remaining WAL and quit.
|
|
*/
|
|
Assert(g_instance.status > NoShutdown || g_instance.demotion > NoDemote);
|
|
|
|
/* Waken archiver for the last time */
|
|
if (g_instance.pid_cxt.PgArchPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgArchPID, SIGUSR2);
|
|
|
|
PrepareDemoteResponse();
|
|
|
|
/*
|
|
* Waken all senders for the last time. No regular backends
|
|
* should be around anymore except catchup process.
|
|
*/
|
|
SignalChildren(SIGUSR2);
|
|
|
|
if (g_instance.pid_cxt.HeartbeatPID != 0)
|
|
signal_child(g_instance.pid_cxt.HeartbeatPID, SIGTERM);
|
|
|
|
pmState = PM_SHUTDOWN_2;
|
|
|
|
/*
|
|
* We can also shut down the stats collector now; there's
|
|
* nothing left for it to do.
|
|
*/
|
|
if (g_instance.pid_cxt.PgStatPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgStatPID, SIGQUIT);
|
|
|
|
if (g_instance.pid_cxt.SnapshotPID != 0)
|
|
signal_child(g_instance.pid_cxt.SnapshotPID, SIGQUIT);
|
|
|
|
if (g_instance.pid_cxt.AshPID != 0)
|
|
signal_child(g_instance.pid_cxt.AshPID, SIGQUIT);
|
|
|
|
if (g_instance.pid_cxt.StatementPID != 0)
|
|
signal_child(g_instance.pid_cxt.StatementPID, SIGQUIT);
|
|
|
|
if (g_instance.pid_cxt.PercentilePID != 0)
|
|
signal_child(g_instance.pid_cxt.PercentilePID, SIGQUIT);
|
|
|
|
/*
|
|
* We can also shut down the audit collector now; there's
|
|
* nothing left for it to do.
|
|
*/
|
|
if (g_instance.pid_cxt.PgAuditPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.PgAuditPID, SIGQUIT);
|
|
}
|
|
} else {
|
|
/*
|
|
* Any unexpected exit of the checkpointer (including FATAL
|
|
* exit) is treated as a crash.
|
|
*/
|
|
HandleChildCrash(pid, exitstatus, _("checkpointer process"));
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the wal writer? Normal exit can be ignored; we'll start a
|
|
* new one at the next iteration of the postmaster's main loop, if
|
|
* necessary. Any other exit condition is treated as a crash.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.WalWriterPID) {
|
|
g_instance.pid_cxt.WalWriterPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("WAL writer process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the wal file creator? Normal exit can be ignored; we'll start a
|
|
* new one at the next iteration of the postmaster's main loop, if
|
|
* necessary. Any other exit condition is treated as a crash.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.WalWriterAuxiliaryPID) {
|
|
g_instance.pid_cxt.WalWriterAuxiliaryPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("WAL file creator process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the wal receiver? If exit status is zero (normal) or one
|
|
* (FATAL exit), we assume everything is all right just like normal
|
|
* backends.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.WalReceiverPID) {
|
|
g_instance.pid_cxt.WalReceiverPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("WAL receiver process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the wal receive writer? If exit status is zero (normal) or one
|
|
* (FATAL exit), we assume everything is all right just like normal
|
|
* backends.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.WalRcvWriterPID) {
|
|
g_instance.pid_cxt.WalRcvWriterPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("WAL receive writer process"));
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the data receiver? If exit status is zero (normal) or one
|
|
* (FATAL exit), we assume everything is all right just like normal
|
|
* backends.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.DataReceiverPID) {
|
|
g_instance.pid_cxt.DataReceiverPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("DATA receiver process"));
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the data receive writer? If exit status is zero (normal) or one
|
|
* (FATAL exit), we assume everything is all right just like normal
|
|
* backends.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.DataRcvWriterPID) {
|
|
g_instance.pid_cxt.DataRcvWriterPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("DATA receive writer process"));
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the data catchup? If exit status is zero (normal) or one
|
|
* (FATAL exit), we assume everything is all right just like normal
|
|
* backends.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.CatchupPID) {
|
|
g_instance.pid_cxt.CatchupPID = 0;
|
|
catchup_online = false;
|
|
|
|
/*
|
|
* Catchup likes a normal backend, do standard backend child cleanup by
|
|
* 'CleanupBackend', it will handle child crash, at here, we do not need
|
|
* to handle it like other threads(for example: g_instance.pid_cxt.DataRcvWriterPID);
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* Was it the autovacuum launcher? Normal exit can be ignored; we'll
|
|
* start a new one at the next iteration of the postmaster's main
|
|
* loop, if necessary. Any other exit condition is treated as a
|
|
* crash.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.AutoVacPID) {
|
|
g_instance.pid_cxt.AutoVacPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("autovacuum launcher process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.PgJobSchdPID) {
|
|
g_instance.pid_cxt.PgJobSchdPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("job scheduler process"), pid, exitstatus);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the archiver? If so, just try to start a new one; no need
|
|
* to force reset of the rest of the system. (If fail, we'll try
|
|
* again in future cycles of the main loop.). Unless we were waiting
|
|
* for it to shut down; don't restart it in that case, and
|
|
* PostmasterStateMachine() will advance to the next shutdown step.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.PgArchPID) {
|
|
g_instance.pid_cxt.PgArchPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("archiver process"), pid, exitstatus);
|
|
|
|
if (XLogArchivingActive()) {
|
|
if (pmState == PM_RUN || pmState == PM_HOT_STANDBY || pmState == PM_RECOVERY) {
|
|
g_instance.pid_cxt.PgArchPID = pgarch_start();
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Was it the statistics collector? If so, just try to start a new
|
|
* one; no need to force reset of the rest of the system. (If fail,
|
|
* we'll try again in future cycles of the main loop.)
|
|
*/
|
|
if (pid == g_instance.pid_cxt.PgStatPID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.PgStatPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("statistics collector process"), pid, exitstatus);
|
|
|
|
if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
|
|
g_instance.pid_cxt.PgStatPID = pgstat_start();
|
|
continue;
|
|
}
|
|
|
|
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && pid == g_instance.pid_cxt.SnapshotPID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.SnapshotPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("snapshot collector process"), pid, exitstatus);
|
|
|
|
if (pmState == PM_RUN)
|
|
g_instance.pid_cxt.SnapshotPID = snapshot_start();
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.AshPID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.AshPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("Active session history collector process"), pid, exitstatus);
|
|
|
|
if (pmState == PM_RUN && ENABLE_ASP)
|
|
g_instance.pid_cxt.AshPID = initialize_util_thread(ASH_WORKER);
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.StatementPID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.StatementPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("full SQL statement flush process"), pid, exitstatus);
|
|
|
|
if (pmState == PM_RUN && ENABLE_STATEMENT_TRACK)
|
|
g_instance.pid_cxt.StatementPID = initialize_util_thread(TRACK_STMT_WORKER);
|
|
continue;
|
|
}
|
|
|
|
if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && pid == g_instance.pid_cxt.PercentilePID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.PercentilePID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("percentile collector process"), pid, exitstatus);
|
|
|
|
if (pmState == PM_RUN)
|
|
g_instance.pid_cxt.PercentilePID = initialize_util_thread(PERCENTILE_WORKER);
|
|
continue;
|
|
}
|
|
|
|
/* Database Security: Support database audit */
|
|
/*
|
|
* Was it the system auditor? If so, try to start a new one.
|
|
*/
|
|
if (pid == g_instance.pid_cxt.PgAuditPID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.PgAuditPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("system auditor process"), pid, exitstatus);
|
|
if (pmState == PM_RUN)
|
|
g_instance.pid_cxt.PgAuditPID = pgaudit_start();
|
|
continue;
|
|
}
|
|
|
|
/* Was it the system logger? If so, try to start a new one */
|
|
if (pid == g_instance.pid_cxt.SysLoggerPID) {
|
|
g_instance.pid_cxt.SysLoggerPID = 0;
|
|
/* for safety's sake, launch new logger *first* */
|
|
g_instance.pid_cxt.SysLoggerPID = SysLogger_Start();
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("system logger process"), pid, exitstatus);
|
|
|
|
continue;
|
|
}
|
|
|
|
/* Was it the reaper backend thead ? If so, try to start a new one */
|
|
if (pid == g_instance.pid_cxt.ReaperBackendPID) {
|
|
g_instance.pid_cxt.ReaperBackendPID = 0;
|
|
/* for safety's sake, launch new logger *first* */
|
|
g_instance.pid_cxt.ReaperBackendPID = initialize_util_thread(REAPER);
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("reaper backend process"), pid, exitstatus);
|
|
|
|
continue;
|
|
}
|
|
|
|
/* Was it the wlm collector? If so, try to start a new one */
|
|
if (pid == g_instance.pid_cxt.WLMCollectPID) {
|
|
g_instance.pid_cxt.WLMCollectPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("wlm collector process"), pid, exitstatus);
|
|
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.WLMMonitorPID) {
|
|
g_instance.pid_cxt.WLMMonitorPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("wlm monitor process"), pid, exitstatus);
|
|
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.WLMArbiterPID) {
|
|
g_instance.pid_cxt.WLMArbiterPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("wlm arbiter process"), pid, exitstatus);
|
|
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.CPMonitorPID) {
|
|
g_instance.pid_cxt.CPMonitorPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("compute pool monitor process"), pid, exitstatus);
|
|
|
|
continue;
|
|
}
|
|
|
|
/* Was it the twophasecleaner? If so, try to handle */
|
|
if (
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
IS_PGXC_COORDINATOR &&
|
|
#endif
|
|
pid == g_instance.pid_cxt.TwoPhaseCleanerPID) {
|
|
g_instance.pid_cxt.TwoPhaseCleanerPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("twophase cleaner process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
/* Was it the csnmin sync? If so, try to start a new one */
|
|
if (GTM_LITE_CN && pid == g_instance.pid_cxt.CsnminSyncPID) {
|
|
g_instance.pid_cxt.CsnminSyncPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("csnmin sync process"), pid, exitstatus);
|
|
continue;
|
|
}
|
|
|
|
/* Was it the barrier creator? If so, try to start a new one */
|
|
if (START_BARRIER_CREATOR && pid == g_instance.pid_cxt.BarrierCreatorPID) {
|
|
g_instance.pid_cxt.BarrierCreatorPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("barrier creator process"), pid, exitstatus);
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.FaultMonitorPID) {
|
|
g_instance.pid_cxt.FaultMonitorPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("fault monitor process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.CBMWriterPID) {
|
|
g_instance.pid_cxt.CBMWriterPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus)) {
|
|
LogChildExit(LOG, _("CBM writer process"), pid, exitstatus);
|
|
}
|
|
t_thrd.cbm_cxt.XlogCbmSys->needReset = true;
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.RemoteServicePID) {
|
|
Assert(!dummyStandbyMode);
|
|
g_instance.pid_cxt.RemoteServicePID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("remote service process"));
|
|
|
|
continue;
|
|
}
|
|
if (get_real_recovery_parallelism() > 1) {
|
|
PageRedoExitStatus pageredoStatus = CheckExitPageWorkers(pid);
|
|
if (pageredoStatus == PAGE_REDO_THREAD_EXIT_NORMAL) {
|
|
continue;
|
|
} else if (pageredoStatus == PAGE_REDO_THREAD_EXIT_ABNORMAL) {
|
|
ereport(LOG, (errmsg("aborting due to page redo process failure")));
|
|
HandleChildCrash(pid, exitstatus, _("page redo process"));
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.HeartbeatPID) {
|
|
g_instance.pid_cxt.HeartbeatPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("Heartbeat process"));
|
|
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.CommPoolerCleanPID) {
|
|
g_instance.pid_cxt.CommPoolerCleanPID = 0;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("pooler cleaner process"), pid, exitstatus);
|
|
continue;
|
|
}
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (pid == g_instance.pid_cxt.TsCompactionPID) {
|
|
g_instance.pid_cxt.TsCompactionPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("timeseries compaction process"));
|
|
continue;
|
|
}
|
|
|
|
if (pid == g_instance.pid_cxt.TsCompactionAuxiliaryPID) {
|
|
g_instance.pid_cxt.TsCompactionAuxiliaryPID = 0;
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
HandleChildCrash(pid, exitstatus, _("timeseries compaction auxiliary process"));
|
|
continue;
|
|
}
|
|
|
|
if (streaming_backend_manager(STREAMING_BACKEND_REAP, (void *)&pid)) {
|
|
if (!EXIT_STATUS_0(exitstatus))
|
|
LogChildExit(LOG, _("streaming backend process"), pid, exitstatus);
|
|
continue;
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
/*
|
|
* Else do standard backend child cleanup.
|
|
*/
|
|
CleanupBackend(pid, exitstatus);
|
|
} /* loop over pending child-death reports */
|
|
|
|
/*
|
|
* After cleaning out the SIGCHLD queue, see if we have any state changes
|
|
* or actions to make.
|
|
*/
|
|
PostmasterStateMachine();
|
|
|
|
/* Done with signal handler */
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
|
|
|
errno = save_errno;
|
|
}
|
|
|
|
/*
|
|
* Get proc name due to thread id.
|
|
*/
|
|
static const char* GetProcName(ThreadId pid)
|
|
{
|
|
if (pid == 0)
|
|
return "invalid process";
|
|
else if (pid == g_instance.pid_cxt.StartupPID)
|
|
return "startup process";
|
|
else if (pid == g_instance.pid_cxt.BgWriterPID)
|
|
return "background writer process";
|
|
else if (pid == g_instance.pid_cxt.CheckpointerPID)
|
|
return "checkpointer process";
|
|
else if (pid == g_instance.pid_cxt.WalWriterPID)
|
|
return "WAL writer process";
|
|
else if (pid == g_instance.pid_cxt.WalWriterAuxiliaryPID)
|
|
return "WAL file creator process";
|
|
else if (pid == g_instance.pid_cxt.WalReceiverPID)
|
|
return "WAL receiver process";
|
|
else if (pid == g_instance.pid_cxt.WalRcvWriterPID)
|
|
return "WAL receive writer process";
|
|
else if (pid == g_instance.pid_cxt.DataReceiverPID)
|
|
return "DATA receiver process";
|
|
else if (pid == g_instance.pid_cxt.DataRcvWriterPID)
|
|
return "DATA receive writer process";
|
|
else if (pid == g_instance.pid_cxt.CatchupPID)
|
|
return "catchup process";
|
|
else if (pid == g_instance.pid_cxt.AutoVacPID)
|
|
return "autovacuum launcher process";
|
|
else if (pid == g_instance.pid_cxt.PgJobSchdPID)
|
|
return "job scheduler process";
|
|
else if (pid == g_instance.pid_cxt.PgArchPID)
|
|
return "archiver process";
|
|
else if (pid == g_instance.pid_cxt.PgStatPID)
|
|
return "statistics collector process";
|
|
else if (pid == g_instance.pid_cxt.SnapshotPID)
|
|
return "snapshot collector process";
|
|
else if (pid == g_instance.pid_cxt.AshPID)
|
|
return "active session history collector process";
|
|
else if (pid == g_instance.pid_cxt.StatementPID)
|
|
return "full SQL statement flush process";
|
|
else if (pid == g_instance.pid_cxt.PercentilePID)
|
|
return "percentile collector process";
|
|
else if (pid == g_instance.pid_cxt.PgAuditPID)
|
|
return "system auditor process";
|
|
else if (pid == g_instance.pid_cxt.SysLoggerPID)
|
|
return "system logger process";
|
|
else if (pid == g_instance.pid_cxt.WLMCollectPID)
|
|
return "wlm collector process";
|
|
else if (pid == g_instance.pid_cxt.WLMMonitorPID)
|
|
return "wlm monitor process";
|
|
else if (pid == g_instance.pid_cxt.WLMArbiterPID)
|
|
return "wlm arbiter process";
|
|
else if (pid == g_instance.pid_cxt.CPMonitorPID)
|
|
return "compute pool monitor process";
|
|
else if (pid == g_instance.pid_cxt.TwoPhaseCleanerPID)
|
|
return "twophase cleaner process";
|
|
else if (pid == g_instance.pid_cxt.FaultMonitorPID)
|
|
return "fault monitor process";
|
|
else if (g_instance.pid_cxt.AlarmCheckerPID == pid)
|
|
return "alarm checker process";
|
|
else if (g_instance.pid_cxt.AioCompleterStarted == pid)
|
|
return "aio completer process";
|
|
else if (pid == g_instance.pid_cxt.CBMWriterPID)
|
|
return "CBM writer process";
|
|
else if (g_instance.pid_cxt.RemoteServicePID == pid)
|
|
return "remote service process";
|
|
else if (g_instance.pid_cxt.HeartbeatPID == pid)
|
|
return "Heartbeat process";
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
else if (g_instance.pid_cxt.TsCompactionPID == pid)
|
|
return "TsCompaction process";
|
|
else if (g_instance.pid_cxt.TsCompactionAuxiliaryPID == pid)
|
|
return "TsCompaction Auxiliary process";
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
else if (g_instance.pid_cxt.CsnminSyncPID == pid)
|
|
return "csnmin sync process";
|
|
else if (g_instance.pid_cxt.BarrierCreatorPID == pid)
|
|
return "barrier creator process";
|
|
else if (g_instance.pid_cxt.CommSenderFlowPID == pid)
|
|
return "libcomm sender flow process";
|
|
else if (g_instance.pid_cxt.CommReceiverFlowPID == pid)
|
|
return "libcomm receiver flow process";
|
|
else if (g_instance.pid_cxt.CommAuxiliaryPID == pid)
|
|
return "libcomm auxiliary process";
|
|
else if (g_instance.pid_cxt.CommPoolerCleanPID == pid)
|
|
return "pool cleaner process";
|
|
else if (g_instance.pid_cxt.CommReceiverPIDS != NULL) {
|
|
int recv_loop = 0;
|
|
for (recv_loop = 0; recv_loop < g_instance.attr.attr_network.comm_max_receiver; recv_loop++) {
|
|
if (g_instance.pid_cxt.CommReceiverPIDS[recv_loop] == pid) {
|
|
return "libcomm receiver loop process";
|
|
}
|
|
}
|
|
return "server process";
|
|
} else {
|
|
return "server process";
|
|
}
|
|
}
|
|
|
|
/*
|
|
* CleanupBackend -- cleanup after terminated backend.
|
|
*
|
|
* Remove all local state associated with backend.
|
|
*/
|
|
static void CleanupBackend(ThreadId pid, int exitstatus) /* child's exit status. */
|
|
{
|
|
Dlelem* curr = NULL;
|
|
|
|
LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
|
|
|
|
/*
|
|
* If a backend dies in an ugly way then we must signal all other backends
|
|
* to quickdie. If exit status is zero (normal) or one (FATAL exit), we
|
|
* assume everything is all right and proceed to remove the backend from
|
|
* the active backend list.
|
|
*/
|
|
#ifdef WIN32
|
|
|
|
/*
|
|
* On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
|
|
* since that sometimes happens under load when the process fails to start
|
|
* properly (long before it starts using shared memory). Microsoft reports
|
|
* it is related to mutex failure:
|
|
* http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
|
|
*/
|
|
if (exitstatus == ERROR_WAIT_NO_CHILDREN) {
|
|
LogChildExit(LOG, _("server process"), pid, exitstatus);
|
|
exitstatus = 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) {
|
|
HandleChildCrash(pid, exitstatus, _("server process"));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* We always add the backend_list from head. Think about a scenario, an exit
|
|
* thread released its pid to system, which then reused by a new thread.
|
|
* After that, PM received signal to clean up the exit thread, we traversed the
|
|
* list from head and use pid to match thread Backend, we probably matched
|
|
* the new thread rather than the exit thread. To avoid this problem, we can
|
|
* traverse the backend_list from tail.
|
|
*/
|
|
for (curr = DLGetTail(g_instance.backend_list); curr; curr = DLGetPred(curr)) {
|
|
Backend* bp = (Backend*)DLE_VAL(curr);
|
|
|
|
if (bp->pid == pid && bp->dead_end) {
|
|
{
|
|
if (!ReleasePostmasterChildSlot(bp->child_slot)) {
|
|
/*
|
|
* Uh-oh, the child failed to clean itself up. Treat as a
|
|
* crash after all.
|
|
*/
|
|
HandleChildCrash(pid, exitstatus, _("server process"));
|
|
return;
|
|
}
|
|
|
|
BackendArrayRemove(bp);
|
|
}
|
|
|
|
DLRemove(curr);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
|
|
* walwriter or autovacuum.
|
|
*
|
|
* The objectives here are to clean up our local state about the child
|
|
* process, and to signal all other remaining children to quickdie.
|
|
*/
|
|
void HandleChildCrash(ThreadId pid, int exitstatus, const char* procname)
|
|
{
|
|
/*
|
|
* Make log entry unless there was a previous crash (if so, nonzero exit
|
|
* status is to be expected in SIGQUIT response; don't clutter log)
|
|
*/
|
|
if (!g_instance.fatal_error) {
|
|
LogChildExit(LOG, procname, pid, exitstatus);
|
|
ereport(LOG, (errmsg("terminating any other active server processes")));
|
|
}
|
|
|
|
ereport(LOG, (errmsg("%s (ThreadId %lu) exited with exit code %d", procname, pid, WEXITSTATUS(exitstatus))));
|
|
|
|
// Threading: do not handle child crash,
|
|
// &g_instance.proc_aux_base or autovacuum elog(FATAL) could reach here,
|
|
// if we handle it, we will send SIGQUIT to backend process,
|
|
// then backend may handle the signal when doing malloc, cause memory exception.
|
|
// So exit directly.
|
|
//
|
|
ereport(LOG, (errmsg("the server process exits")));
|
|
|
|
cancelIpcMemoryDetach();
|
|
|
|
fflush(stdout);
|
|
fflush(stderr);
|
|
_exit(1);
|
|
}
|
|
|
|
/*
|
|
* Log the death of a child process.
|
|
*/
|
|
static void LogChildExit(int lev, const char* procname, ThreadId pid, int exitstatus)
|
|
{
|
|
/*
|
|
* size of activity_buffer is arbitrary, but set equal to default
|
|
* track_activity_query_size
|
|
*/
|
|
char activity_buffer[1024];
|
|
const char* activity = NULL;
|
|
|
|
if (!EXIT_STATUS_0(exitstatus)) {
|
|
activity = pgstat_get_crashed_backend_activity(pid, activity_buffer, sizeof(activity_buffer));
|
|
}
|
|
if (WIFEXITED(exitstatus)) {
|
|
ereport(lev,
|
|
|
|
/* ------
|
|
translator: %s is a noun phrase describing a child process, such as
|
|
"server process" */
|
|
(errmsg("%s (ThreadId %lu) exited with exit code %d", procname, pid, WEXITSTATUS(exitstatus)),
|
|
activity ? errdetail("Failed process was running: %s", activity) : 0));
|
|
} else if (WIFSIGNALED(exitstatus)) {
|
|
#if defined(WIN32)
|
|
ereport(lev,
|
|
|
|
/* ------
|
|
translator: %s is a noun phrase describing a child process, such as
|
|
"server process" */
|
|
(errmsg("%s (ThreadId %lu) was terminated by exception 0x%X", procname, pid, WTERMSIG(exitstatus)),
|
|
errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
|
|
activity ? errdetail("Failed process was running: %s", activity) : 0));
|
|
|
|
#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
|
|
ereport(lev,
|
|
|
|
/* ------
|
|
translator: %s is a noun phrase describing a child process, such as
|
|
"server process" */
|
|
(errmsg("%s (ThreadId %lu) was terminated by signal %d: %s",
|
|
procname,
|
|
pid,
|
|
WTERMSIG(exitstatus),
|
|
WTERMSIG(exitstatus) < NSIG ? sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
|
|
activity ? errdetail("Failed process was running: %s", activity) : 0));
|
|
#else
|
|
ereport(lev,
|
|
|
|
/* ------
|
|
translator: %s is a noun phrase describing a child process, such as
|
|
"server process" */
|
|
(errmsg("%s (ThreadId %lu) was terminated by signal %d", procname, pid, WTERMSIG(exitstatus)),
|
|
activity ? errdetail("Failed process was running: %s", activity) : 0));
|
|
#endif
|
|
} else {
|
|
ereport(lev,
|
|
|
|
/* ------
|
|
translator: %s is a noun phrase describing a child process, such as
|
|
"server process" */
|
|
(errmsg("%s (ThreadId %lu) exited with unrecognized status %d", procname, pid, exitstatus),
|
|
activity ? errdetail("Failed process was running: %s", activity) : 0));
|
|
}
|
|
}
|
|
|
|
static bool ckpt_all_flush_buffer_thread_exit()
|
|
{
|
|
if (g_instance.pid_cxt.PageWriterPID[0] == 0 &&
|
|
(g_instance.pid_cxt.CkptBgWriterPID == NULL || g_instance.pid_cxt.CkptBgWriterPID[0] == 0)){
|
|
return true;
|
|
}else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static void PostmasterStateMachineReadOnly(void)
|
|
{
|
|
if (pmState == PM_WAIT_READONLY) {
|
|
/*
|
|
* PM_WAIT_READONLY state ends when we have no regular backends that
|
|
* have been started during recovery. We kill the startup and
|
|
* walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
|
|
* we might like to kill these processes first and then wait for
|
|
* backends to die off, but that doesn't work at present because
|
|
* killing the startup process doesn't release its locks.
|
|
*/
|
|
if (CountChildren(BACKEND_TYPE_NORMAL) == 0) {
|
|
if (g_instance.pid_cxt.StartupPID != 0)
|
|
signal_child(g_instance.pid_cxt.StartupPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WalReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalReceiverPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WalRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalRcvWriterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.DataReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataReceiverPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.DataRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataRcvWriterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WLMCollectPID != 0) {
|
|
WLMProcessThreadShutDown();
|
|
signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM);
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WLMMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.WLMArbiterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WLMArbiterPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.CPMonitorPID != 0)
|
|
signal_child(g_instance.pid_cxt.CPMonitorPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.RemoteServicePID != 0)
|
|
signal_child(g_instance.pid_cxt.RemoteServicePID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.HeartbeatPID != 0)
|
|
signal_child(g_instance.pid_cxt.HeartbeatPID, SIGTERM);
|
|
|
|
if (g_instance.pid_cxt.BarrierCreatorPID != 0) {
|
|
barrier_creator_thread_shutdown();
|
|
signal_child(g_instance.pid_cxt.BarrierCreatorPID, SIGTERM);
|
|
}
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (g_instance.pid_cxt.CsnminSyncPID != 0) {
|
|
csnminsync_thread_shutdown();
|
|
signal_child(g_instance.pid_cxt.CsnminSyncPID, SIGTERM);
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
pmState = PM_WAIT_BACKENDS;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Advance the postmaster's state machine and take actions as appropriate
|
|
*
|
|
* This is common code for pmdie(), reaper() and sigusr1_handler(), which
|
|
* receive the signals that might mean we need to change state.
|
|
*/
|
|
static void PostmasterStateMachine(void)
|
|
{
|
|
if (pmState == PM_WAIT_BACKUP) {
|
|
/*
|
|
* PM_WAIT_BACKUP state ends when online backup mode is not active.
|
|
*/
|
|
if (!BackupInProgress())
|
|
pmState = PM_WAIT_BACKENDS;
|
|
}
|
|
|
|
PostmasterStateMachineReadOnly();
|
|
/*
|
|
* If we are in a state-machine state that implies waiting for backends to
|
|
* exit, see if they're all gone, and change state if so.
|
|
*/
|
|
if (pmState == PM_WAIT_BACKENDS) {
|
|
/*
|
|
* PM_WAIT_BACKENDS state ends when we have no regular backends
|
|
* (including autovac workers) and no walwriter, autovac launcher or
|
|
* bgwriter. If we are doing crash recovery then we expect the
|
|
* checkpointer to exit as well, otherwise not. The archiver, stats,
|
|
* and syslogger processes are disregarded since they are not
|
|
* connected to shared memory; we also disregard dead_end children
|
|
* here. Walsenders are also disregarded, they will be terminated
|
|
* later after writing the checkpoint record, like the archiver
|
|
* process.
|
|
*/
|
|
if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && g_instance.pid_cxt.StartupPID == 0 &&
|
|
g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && g_instance.pid_cxt.FaultMonitorPID == 0 &&
|
|
g_instance.pid_cxt.WalReceiverPID == 0 && g_instance.pid_cxt.WalRcvWriterPID == 0 &&
|
|
g_instance.pid_cxt.DataReceiverPID == 0 && g_instance.pid_cxt.DataRcvWriterPID == 0 &&
|
|
g_instance.pid_cxt.BgWriterPID == 0 && g_instance.pid_cxt.StatementPID == 0 &&
|
|
(g_instance.pid_cxt.CheckpointerPID == 0 || !g_instance.fatal_error) &&
|
|
g_instance.pid_cxt.WalWriterPID == 0 && g_instance.pid_cxt.WalWriterAuxiliaryPID == 0 &&
|
|
g_instance.pid_cxt.AutoVacPID == 0 &&
|
|
g_instance.pid_cxt.WLMCollectPID == 0 && g_instance.pid_cxt.WLMMonitorPID == 0 &&
|
|
g_instance.pid_cxt.WLMArbiterPID == 0 && g_instance.pid_cxt.CPMonitorPID == 0 &&
|
|
g_instance.pid_cxt.PgJobSchdPID == 0 && g_instance.pid_cxt.CBMWriterPID == 0 &&
|
|
g_instance.pid_cxt.SnapshotPID == 0 && g_instance.pid_cxt.PercentilePID == 0 &&
|
|
g_instance.pid_cxt.RemoteServicePID == 0 && g_instance.pid_cxt.AshPID == 0 &&
|
|
g_instance.pid_cxt.CsnminSyncPID == 0 && g_instance.pid_cxt.BarrierCreatorPID == 0 &&
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
g_instance.pid_cxt.CommPoolerCleanPID == 0 &&
|
|
streaming_backend_manager(STREAMING_BACKEND_SHUTDOWN) &&
|
|
g_instance.pid_cxt.TsCompactionPID == 0 && g_instance.pid_cxt.TsCompactionAuxiliaryPID == 0
|
|
&& g_instance.pid_cxt.CommPoolerCleanPID == 0 &&
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
IsAllPageWorkerExit() && IsAllBuildSenderExit()) {
|
|
if (g_instance.fatal_error) {
|
|
/*
|
|
* Start waiting for dead_end children to die. This state
|
|
* change causes ServerLoop to stop creating new ones.
|
|
*/
|
|
pmState = PM_WAIT_DEAD_END;
|
|
|
|
/*
|
|
* We already SIGQUIT'd the archiver and stats processes, if
|
|
* any, when we entered g_instance.fatal_error state.
|
|
*/
|
|
} else {
|
|
/*
|
|
* If we get here, we are proceeding with normal shutdown. All
|
|
* the regular children are gone, and it's time to tell the
|
|
* checkpointer to do a shutdown checkpoint.
|
|
*/
|
|
Assert(g_instance.status > NoShutdown || g_instance.demotion > NoDemote);
|
|
|
|
/* Start the checkpointer if not running */
|
|
if (g_instance.pid_cxt.CheckpointerPID == 0 && !dummyStandbyMode)
|
|
g_instance.pid_cxt.CheckpointerPID = initialize_util_thread(CHECKPOINT_THREAD);
|
|
|
|
if (g_instance.pid_cxt.PageWriterPID != NULL) {
|
|
g_instance.ckpt_cxt_ctl->page_writer_can_exit = false;
|
|
|
|
for (int i = 0; i < g_instance.attr.attr_storage.pagewriter_thread_num; i++) {
|
|
if (g_instance.pid_cxt.PageWriterPID[i] != 0) {
|
|
signal_child(g_instance.pid_cxt.PageWriterPID[i], SIGTERM);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (g_instance.pid_cxt.CkptBgWriterPID != NULL) {
|
|
int thread_num = g_instance.attr.attr_storage.bgwriter_thread_num;
|
|
thread_num = thread_num > 0 ? thread_num : 1;
|
|
for (int i = 0; i < thread_num; i++) {
|
|
if (g_instance.pid_cxt.CkptBgWriterPID[i] != 0) {
|
|
signal_child(g_instance.pid_cxt.CkptBgWriterPID[i], SIGTERM);
|
|
}
|
|
}
|
|
}
|
|
/* And tell it to shut down */
|
|
if (g_instance.pid_cxt.CheckpointerPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.CheckpointerPID, SIGUSR2);
|
|
pmState = PM_SHUTDOWN;
|
|
} else {
|
|
/*
|
|
* If we failed to fork a checkpointer, just shut down.
|
|
* Any required cleanup will happen at next restart. We
|
|
* set g_instance.fatal_error so that an "abnormal shutdown" message
|
|
* gets logged when we exit.
|
|
*/
|
|
g_instance.ckpt_cxt_ctl->page_writer_can_exit = true;
|
|
g_instance.fatal_error = true;
|
|
pmState = PM_WAIT_DEAD_END;
|
|
|
|
/* Kill the senders, archiver and stats collector too */
|
|
SignalChildren(SIGQUIT);
|
|
|
|
if (g_instance.pid_cxt.PgArchPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgArchPID, SIGQUIT);
|
|
|
|
if (g_instance.pid_cxt.PgStatPID != 0)
|
|
signal_child(g_instance.pid_cxt.PgStatPID, SIGQUIT);
|
|
|
|
/* signal the auditor process */
|
|
if (g_instance.pid_cxt.PgAuditPID != 0) {
|
|
Assert(!dummyStandbyMode);
|
|
signal_child(g_instance.pid_cxt.PgAuditPID, SIGQUIT);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pmState == PM_SHUTDOWN_2) {
|
|
/*
|
|
* PM_SHUTDOWN_2 state ends when there's no other children than
|
|
* dead_end children left. There shouldn't be any regular backends
|
|
* left by now anyway; what we're really waiting for is walsenders and
|
|
* archiver.
|
|
*
|
|
* Walreceiver should normally be dead by now, but not when a fast
|
|
* shutdown is performed during recovery.
|
|
*/
|
|
if (g_instance.pid_cxt.PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
|
|
g_instance.pid_cxt.WalReceiverPID == 0 && g_instance.pid_cxt.WalRcvWriterPID == 0 &&
|
|
g_instance.pid_cxt.DataReceiverPID == 0 && g_instance.pid_cxt.DataRcvWriterPID == 0 &&
|
|
g_instance.pid_cxt.HeartbeatPID == 0) {
|
|
pmState = PM_WAIT_DEAD_END;
|
|
}
|
|
}
|
|
|
|
if (pmState == PM_WAIT_DEAD_END) {
|
|
/*
|
|
* PM_WAIT_DEAD_END state ends when the g_instance.backend_list is entirely empty
|
|
* (ie, no dead_end children remain), and the archiver and stats
|
|
* collector are gone too.
|
|
*
|
|
* The reason we wait for those two is to protect them against a new
|
|
* postmaster starting conflicting subprocesses; this isn't an
|
|
* ironclad protection, but it at least helps in the
|
|
* shutdown-and-immediately-restart scenario. Note that they have
|
|
* already been sent appropriate shutdown signals, either during a
|
|
* normal state transition leading up to PM_WAIT_DEAD_END, or during
|
|
* g_instance.fatal_error processing.
|
|
*/
|
|
if (DLGetHead(g_instance.backend_list) == NULL && g_instance.pid_cxt.PgArchPID == 0 &&
|
|
g_instance.pid_cxt.PgStatPID == 0 && g_instance.pid_cxt.PgAuditPID == 0 &&
|
|
ckpt_all_flush_buffer_thread_exit()) {
|
|
/* These other guys should be dead already */
|
|
Assert(g_instance.pid_cxt.TwoPhaseCleanerPID == 0);
|
|
Assert(g_instance.pid_cxt.FaultMonitorPID == 0);
|
|
Assert(g_instance.pid_cxt.StartupPID == 0);
|
|
Assert(g_instance.pid_cxt.WalReceiverPID == 0);
|
|
Assert(g_instance.pid_cxt.WalRcvWriterPID == 0);
|
|
Assert(g_instance.pid_cxt.DataReceiverPID == 0);
|
|
Assert(g_instance.pid_cxt.DataRcvWriterPID == 0);
|
|
Assert(g_instance.pid_cxt.BgWriterPID == 0);
|
|
Assert(g_instance.pid_cxt.CheckpointerPID == 0);
|
|
Assert(g_instance.pid_cxt.WalWriterPID == 0);
|
|
Assert(g_instance.pid_cxt.WalWriterAuxiliaryPID == 0);
|
|
Assert(g_instance.pid_cxt.AutoVacPID == 0);
|
|
Assert(g_instance.pid_cxt.PgJobSchdPID == 0);
|
|
Assert(g_instance.pid_cxt.CBMWriterPID == 0);
|
|
Assert(g_instance.pid_cxt.RemoteServicePID == 0);
|
|
Assert(g_instance.pid_cxt.SnapshotPID == 0);
|
|
Assert(g_instance.pid_cxt.AshPID == 0);
|
|
Assert(g_instance.pid_cxt.StatementPID == 0);
|
|
Assert(g_instance.pid_cxt.PercentilePID == 0);
|
|
Assert(g_instance.pid_cxt.HeartbeatPID == 0);
|
|
Assert(g_instance.pid_cxt.CsnminSyncPID == 0);
|
|
Assert(g_instance.pid_cxt.BarrierCreatorPID == 0);
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
Assert(g_instance.pid_cxt.TsCompactionPID == 0);
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
Assert(g_instance.pid_cxt.CommPoolerCleanPID == 0);
|
|
Assert(IsAllPageWorkerExit() == true);
|
|
Assert(IsAllBuildSenderExit() == true);
|
|
/* syslogger is not considered here */
|
|
pmState = PM_NO_CHILDREN;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If we've been told to shut down, we exit as soon as there are no
|
|
* remaining children. If there was a crash, cleanup will occur at the
|
|
* next startup. (Before PostgreSQL 8.3, we tried to recover from the
|
|
* crash before exiting, but that seems unwise if we are quitting because
|
|
* we got SIGTERM from init --- there may well not be time for recovery
|
|
* before init decides to SIGKILL us.)
|
|
*
|
|
* Note that the syslogger continues to run. It will exit when it sees
|
|
* EOF on its input pipe, which happens when there are no more upstream
|
|
* processes.
|
|
*/
|
|
if (g_instance.status > NoShutdown && pmState == PM_NO_CHILDREN) {
|
|
if (g_instance.fatal_error) {
|
|
ereport(LOG, (errmsg("abnormal database system shutdown")));
|
|
ExitPostmaster(1);
|
|
} else {
|
|
/*
|
|
* Terminate exclusive backup mode to avoid recovery after a clean
|
|
* fast shutdown. Since an exclusive backup can only be taken
|
|
* during normal running (and not, for example, while running
|
|
* under Hot Standby) it only makes sense to do this if we reached
|
|
* normal running. If we're still in recovery, the backup file is
|
|
* one we're recovering *from*, and we must keep it around so that
|
|
* recovery restarts from the right place.
|
|
*/
|
|
if (t_thrd.postmaster_cxt.ReachedNormalRunning)
|
|
CancelBackup();
|
|
|
|
/* Normal exit from the postmaster is here */
|
|
ExitPostmaster(0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If recovery failed, or the user does not want an automatic restart
|
|
* after backend crashes, wait for all non-syslogger children to exit, and
|
|
* then exit postmaster. We don't try to reinitialize when recovery fails,
|
|
* because more than likely it will just fail again and we will keep
|
|
* trying forever.
|
|
*/
|
|
if (pmState == PM_NO_CHILDREN && (g_instance.recover_error || !u_sess->attr.attr_sql.restart_after_crash))
|
|
ExitPostmaster(1);
|
|
|
|
/*
|
|
* If we need to recover from a crash, wait for all non-syslogger children
|
|
* to exit, then reset shmem and StartupDataBase.
|
|
*/
|
|
if (g_instance.fatal_error && pmState == PM_NO_CHILDREN) {
|
|
volatile HaShmemData* hashmdata = NULL;
|
|
ServerMode cur_mode = NORMAL_MODE;
|
|
ereport(LOG, (errmsg("all server processes terminated; reinitializing")));
|
|
hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
cur_mode = hashmdata->current_mode;
|
|
/* cause gpc scheduler use lwlock, so before reset shared memory(still has lwlock),
|
|
get gpc_reset_lock and reset gpc */
|
|
if (ENABLE_GPC) {
|
|
GPCResetAll();
|
|
if (g_threadPoolControler && g_threadPoolControler->GetScheduler()->HasShutDown() == false)
|
|
g_threadPoolControler->ShutDownScheduler(true);
|
|
}
|
|
shmem_exit(1);
|
|
reset_shared(g_instance.attr.attr_network.PostPortNumber);
|
|
|
|
/* after reseting shared memory, we shall reset col-space cache.
|
|
* all the data of this cache will be out of date after switchover.
|
|
* clean up in order to corrupted data writing.
|
|
*/
|
|
CStoreAllocator::ResetColSpaceCache();
|
|
DfsInsert::ResetDfsSpaceCache();
|
|
|
|
hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
hashmdata->current_mode = cur_mode;
|
|
g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP);
|
|
Assert(g_instance.pid_cxt.StartupPID != 0);
|
|
pmState = PM_STARTUP;
|
|
}
|
|
|
|
/*
|
|
* If we need to recover from primary demote, wait for all non-syslogger children
|
|
* to exit, then reset shmem and StartupDataBase.
|
|
*/
|
|
if (g_instance.demotion > NoDemote && pmState == PM_NO_CHILDREN) {
|
|
ereport(LOG, (errmsg("all server processes terminated; reinitializing")));
|
|
/* cause gpc scheduler use lwlock, so before reset shared memory(still has lwlock),
|
|
get gpc_reset_lock and reset gpc */
|
|
if (ENABLE_GPC) {
|
|
GPCResetAll();
|
|
if (g_threadPoolControler && g_threadPoolControler->GetScheduler()->HasShutDown() == false)
|
|
g_threadPoolControler->ShutDownScheduler(true);
|
|
}
|
|
shmem_exit(1);
|
|
reset_shared(g_instance.attr.attr_network.PostPortNumber);
|
|
|
|
/* after reseting shared memory, we shall reset col-space cache.
|
|
* all the data of this cache will be out of date after switchover.
|
|
* clean up in order to corrupted data writing.
|
|
*/
|
|
CStoreAllocator::ResetColSpaceCache();
|
|
DfsInsert::ResetDfsSpaceCache();
|
|
|
|
/* if failed to enter archive-recovery state, then reboot as primary. */
|
|
{
|
|
volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
hashmdata->current_mode = STANDBY_MODE;
|
|
UpdateOptsFile();
|
|
ereport(LOG, (errmsg("archive recovery started")));
|
|
}
|
|
|
|
g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP);
|
|
Assert(g_instance.pid_cxt.StartupPID != 0);
|
|
pmState = PM_STARTUP;
|
|
}
|
|
|
|
if (pmState == PM_STARTUP &&
|
|
t_thrd.xlog_cxt.server_mode == STANDBY_MODE &&
|
|
t_thrd.xlog_cxt.is_cascade_standby) {
|
|
SetHaShmemData();
|
|
}
|
|
}
|
|
|
|
void signalBackend(Backend* bn, int signal, int be_mode)
|
|
{
|
|
if ((uint32)bn->flag & THRD_EXIT)
|
|
ereport(LOG, (errmsg("Thread pid(%lu), flag(%d) may be exited repeatedly", bn->pid, bn->flag)));
|
|
|
|
int nowait = 0;
|
|
if (signal == SIGTERM) {
|
|
nowait = 1;
|
|
bn->flag = ((uint32)(bn->flag)) | THRD_SIGTERM;
|
|
}
|
|
|
|
if (gs_signal_send(bn->pid, signal, nowait) != 0) {
|
|
ereport(WARNING,
|
|
(errmsg("kill(pid %ld, signal %d) failed,"
|
|
" thread name \"%s\", pmState %d, Demotion %d, Shutdown %d, backend type %d",
|
|
(long)bn->pid,
|
|
signal,
|
|
GetProcName(bn->pid),
|
|
pmState,
|
|
g_instance.demotion,
|
|
Shutdown,
|
|
be_mode)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Send a signal to a postmaster child process
|
|
*
|
|
* On systems that have setsid(), each child process sets itself up as a
|
|
* process group leader. For signals that are generally interpreted in the
|
|
* appropriate fashion, we signal the entire process group not just the
|
|
* direct child process. This allows us to, for example, SIGQUIT a blocked
|
|
* archive_recovery script, or SIGINT a script being run by a backend via
|
|
* system().
|
|
*
|
|
* There is a race condition for recently-forked children: they might not
|
|
* have executed setsid() yet. So we signal the child directly as well as
|
|
* the group. We assume such a child will handle the signal before trying
|
|
* to spawn any grandchild processes. We also assume that signaling the
|
|
* child twice will not cause any problems.
|
|
*
|
|
* *be_mode* is meaning fully when this is a backend pid.
|
|
*/
|
|
void signal_child(ThreadId pid, int signal, int be_mode)
|
|
{
|
|
int err = 0;
|
|
if (0 != (err = gs_signal_send(pid, signal))) {
|
|
ereport(WARNING,
|
|
(errmsg("kill(pid %ld, signal %d) failed: \"%s\","
|
|
" thread name \"%s\", pmState %d, Demotion %d, Shutdown %d, backend type %d",
|
|
(long)pid,
|
|
signal,
|
|
gs_strerror(err),
|
|
GetProcName(pid),
|
|
pmState,
|
|
g_instance.demotion,
|
|
Shutdown,
|
|
be_mode)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Send a signal to the targeted children (but NOT special children;
|
|
* dead_end children are never signaled, either).
|
|
*/
|
|
static bool SignalSomeChildren(int signal, int target)
|
|
{
|
|
Dlelem* curr = NULL;
|
|
bool signaled = false;
|
|
|
|
for (curr = DLGetHead(g_instance.backend_list); curr; curr = DLGetSucc(curr)) {
|
|
Backend* bp = (Backend*)DLE_VAL(curr);
|
|
int child = BACKEND_TYPE_ALL;
|
|
|
|
/*
|
|
* we want to know the type of this backend thread, so
|
|
* in debug mode IF judgement is skipped. but in release
|
|
* mode, turn it on.
|
|
*/
|
|
#ifndef USE_ASSERT_CHECKING
|
|
/*
|
|
* Since target == BACKEND_TYPE_ALL is the most common case, we test
|
|
* it first and avoid touching shared memory for every child.
|
|
*/
|
|
if (target != BACKEND_TYPE_ALL)
|
|
#endif /* USE_ASSERT_CHECKING */
|
|
{
|
|
if (bp->is_autovacuum)
|
|
child = BACKEND_TYPE_AUTOVAC;
|
|
else if (IsPostmasterChildWalSender(bp->child_slot))
|
|
child = BACKEND_TYPE_WALSND;
|
|
else if (IsPostmasterChildDataSender(bp->child_slot))
|
|
child = BACKEND_TYPE_DATASND;
|
|
else if (IsPostmasterChildTempBackend(bp->child_slot))
|
|
child = BACKEND_TYPE_TEMPBACKEND;
|
|
else
|
|
child = BACKEND_TYPE_NORMAL;
|
|
|
|
if (!(target & child))
|
|
continue;
|
|
}
|
|
|
|
ereport(DEBUG4, (errmsg_internal("sending signal %d to process %lu", signal, bp->pid)));
|
|
|
|
signalBackend(bp, signal, child);
|
|
signaled = true;
|
|
}
|
|
|
|
return signaled;
|
|
}
|
|
|
|
bool SignalCancelAllBackEnd()
|
|
{
|
|
return SignalSomeChildren(SIGINT, BACKEND_TYPE_NORMAL);
|
|
}
|
|
|
|
static int IsHaWhiteListIp(const Port* port)
|
|
{
|
|
const int maxIpAddressLen = 64;
|
|
char ipstr[maxIpAddressLen] = {'\0'};
|
|
sockaddr_in* pSin = (sockaddr_in*)&port->raddr.addr;
|
|
|
|
inet_ntop(AF_INET, &pSin->sin_addr, ipstr, maxIpAddressLen - 1);
|
|
if (IS_PGXC_COORDINATOR &&
|
|
!is_cluster_internal_IP(*(struct sockaddr*)&port->raddr.addr) &&
|
|
!(strcmp(ipstr, "0.0.0.0") == 0)) {
|
|
ereport(WARNING, (errmsg("the ha listen ip and port is not for remote client from [%s].", ipstr)));
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
return STATUS_OK;
|
|
}
|
|
|
|
/*
|
|
* BackendStartup -- start backend process
|
|
*
|
|
* returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
|
|
*
|
|
* Note: if you change this code, also consider StartAutovacuumWorker.
|
|
*/
|
|
static int BackendStartup(Port* port, bool isConnectHaPort)
|
|
{
|
|
Backend* bn = NULL; /* for backend cleanup */
|
|
ThreadId pid;
|
|
|
|
if (isConnectHaPort && IsHaWhiteListIp(port) == STATUS_ERROR) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
int childSlot = AssignPostmasterChildSlot();
|
|
|
|
if (childSlot == -1)
|
|
return STATUS_ERROR;
|
|
|
|
bn = AssignFreeBackEnd(childSlot);
|
|
|
|
GenerateCancelKey(false);
|
|
/* Pass down canAcceptConnections state */
|
|
port->canAcceptConnections = canAcceptConnections(false);
|
|
if (port->canAcceptConnections != CAC_OK && port->canAcceptConnections != CAC_WAITBACKUP) {
|
|
(void)ReleasePostmasterChildSlot(childSlot);
|
|
if (port->canAcceptConnections == CAC_TOOMANY)
|
|
ereport(WARNING, (errmsg("could not fork new process for connection due to too many connections")));
|
|
else
|
|
ereport(
|
|
WARNING, (errmsg("could not fork new process for connection due to PMstate %s", GetPMState(pmState))));
|
|
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
// We can't fork thread when PM is shutting down to avoid PM deal with SIGCHLD in busy
|
|
// One scenario is switchover DN and many client connect to server
|
|
//
|
|
if (CAC_SHUTDOWN == port->canAcceptConnections) {
|
|
(void)ReleasePostmasterChildSlot(childSlot);
|
|
ereport(WARNING, (errmsg("could not fork new process for connection:postmaster is shutting down")));
|
|
report_fork_failure_to_client(port, 0, "could not fork new process for connection:postmaster is shutting down");
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/*
|
|
* Unless it's a dead_end child, assign it a child slot number
|
|
*/
|
|
bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = childSlot;
|
|
|
|
pid = initialize_worker_thread(WORKER, port);
|
|
t_thrd.proc_cxt.MyPMChildSlot = 0;
|
|
if (pid == (ThreadId)-1) {
|
|
/* in parent, fork failed */
|
|
int save_errno = errno;
|
|
(void)ReleasePostmasterChildSlot(childSlot);
|
|
|
|
errno = save_errno;
|
|
ereport(WARNING, (errmsg("could not fork new process for connection socket %d : %m", (int)port->sock)));
|
|
report_fork_failure_to_client(port, save_errno);
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/* in parent, successful fork */
|
|
ereport(DEBUG2, (errmsg_internal("forked new backend, pid=%lu socket=%d", pid, (int)port->sock)));
|
|
|
|
/*
|
|
* Everything's been successful, it's safe to add this backend to our list
|
|
* of backends.
|
|
*/
|
|
bn->pid = pid;
|
|
bn->is_autovacuum = false;
|
|
bn->cancel_key = t_thrd.proc_cxt.MyCancelKey;
|
|
DLInitElem(&bn->elem, bn);
|
|
DLAddHead(g_instance.backend_list, &bn->elem);
|
|
|
|
return STATUS_OK;
|
|
}
|
|
|
|
/*
|
|
* Try to report backend fork() failure to client before we close the
|
|
* connection. Since we do not care to risk blocking the postmaster on
|
|
* this connection, we set the connection to non-blocking and try only once.
|
|
*
|
|
* This is grungy special-purpose code; we cannot use backend libpq since
|
|
* it's not up and running.
|
|
*/
|
|
static void report_fork_failure_to_client(Port* port, int errnum, const char* specialErrorInfo)
|
|
{
|
|
char buffer[1000] = {0};
|
|
int rc;
|
|
errno_t ret;
|
|
|
|
if (specialErrorInfo == NULL) {
|
|
/* Format the error message packet (always V2 protocol) */
|
|
ret = sprintf_s(
|
|
buffer, sizeof(buffer), "E%s%s\n", _("could not fork new process for connection: "), gs_strerror(errnum));
|
|
securec_check_ss(ret, "\0", "\0");
|
|
} else {
|
|
size_t len = strlen(specialErrorInfo);
|
|
if (len >= sizeof(buffer)) {
|
|
rc = snprintf_truncated_s(buffer, sizeof(buffer), "%s", specialErrorInfo);
|
|
securec_check_ss(rc, "\0", "\0");
|
|
} else {
|
|
rc = snprintf_truncated_s(buffer, sizeof(buffer), "%s", specialErrorInfo);
|
|
securec_check_ss(rc, "\0", "\0");
|
|
buffer[len] = 0;
|
|
}
|
|
}
|
|
if (port->is_logic_conn) {
|
|
rc = gs_send(&port->gs_sock, buffer, strlen(buffer) + 1, -1, TRUE);
|
|
} else {
|
|
/* Set port to non-blocking. Don't do send() if this fails */
|
|
if (!pg_set_noblock(port->sock))
|
|
return;
|
|
|
|
/* We'll retry after EINTR, but ignore all other failures */
|
|
do {
|
|
rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
|
|
} while (rc < 0 && errno == EINTR);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* BackendInitialize -- initialize an interactive (postmaster-child)
|
|
* backend process, and collect the client's startup packet.
|
|
*
|
|
* returns: nothing. Will not return at all if there's any failure.
|
|
*
|
|
* Note: this code does not depend on having any access to shared memory.
|
|
* In the EXEC_BACKEND case, we are physically attached to shared memory
|
|
* but have not yet set up most of our local pointers to shmem structures.
|
|
*/
|
|
static void BackendInitialize(Port* port)
|
|
{
|
|
PreClientAuthorize();
|
|
|
|
/* save thread start time */
|
|
t_thrd.proc_cxt.MyStartTime = timestamptz_to_time_t(GetCurrentTimestamp());
|
|
|
|
/*
|
|
* Initialize libpq to talk to client and enable reporting of ereport errors
|
|
* to the client. Must do this now because authentication uses libpq to
|
|
* send messages.
|
|
*/
|
|
pq_init();
|
|
|
|
/* now safe to ereport to client */
|
|
t_thrd.postgres_cxt.whereToSendOutput = DestRemote;
|
|
|
|
/*
|
|
* We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
|
|
* timeout while trying to collect the startup packet. Otherwise the
|
|
* postmaster cannot shutdown the database FAST or IMMED cleanly if a
|
|
* buggy client fails to send the packet promptly.
|
|
*/
|
|
(void)gspqsignal(SIGTERM, startup_die);
|
|
(void)gspqsignal(SIGQUIT, startup_die);
|
|
(void)gspqsignal(SIGALRM, startup_alarm);
|
|
|
|
/* Do the next initialization when we get a real connetion. */
|
|
if (IS_THREAD_POOL_WORKER)
|
|
return;
|
|
|
|
int status = ClientConnInitilize(port);
|
|
|
|
if (status == STATUS_EOF)
|
|
return;
|
|
else if (status == STATUS_ERROR)
|
|
proc_exit(0);
|
|
}
|
|
|
|
void PreClientAuthorize()
|
|
{
|
|
/*
|
|
* PreAuthDelay is a debugging aid for investigating problems in the
|
|
* authentication cycle: it can be set in postgresql.conf to allow time to
|
|
* attach to the newly-forked backend with a debugger. (See also
|
|
* PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
|
|
* is not honored until after authentication.)
|
|
*/
|
|
if (u_sess->attr.attr_security.PreAuthDelay > 0)
|
|
pg_usleep(u_sess->attr.attr_security.PreAuthDelay * 1000000L);
|
|
|
|
/* This flag will remain set until InitPostgres finishes authentication */
|
|
u_sess->ClientAuthInProgress = true; /* limit visibility of log messages */
|
|
}
|
|
|
|
int ClientConnInitilize(Port* port)
|
|
{
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.StartupBlockSig, NULL);
|
|
|
|
/* Save session start time. */
|
|
port->SessionStartTime = GetCurrentTimestamp();
|
|
|
|
RemoteHostInitilize(port);
|
|
|
|
/*
|
|
* Ready to begin client interaction. We will give up and exit(1) after a
|
|
* time delay, so that a broken client can't hog a connection
|
|
* indefinitely. PreAuthDelay and any DNS interactions above don't count
|
|
* against the time limit.
|
|
*/
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (!enable_sig_alarm((u_sess->attr.attr_network.PoolerConnectTimeout - 1) * 1000, false)) {
|
|
ereport(FATAL, (errmsg("could not set timer for startup packet timeout")));
|
|
}
|
|
#endif
|
|
int status = StartupPacketInitialize(port);
|
|
|
|
/*
|
|
* Stop here if it was bad or a cancel packet. ProcessStartupPacket
|
|
* already did any appropriate error reporting.
|
|
*/
|
|
if (u_sess->stream_cxt.stop_mythread) {
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
return STATUS_EOF;
|
|
}
|
|
|
|
if (status != STATUS_OK)
|
|
return status;
|
|
|
|
PsDisplayInitialize(port);
|
|
|
|
/*
|
|
* Disable the timeout, and prevent SIGTERM/SIGQUIT again.
|
|
*/
|
|
if (!disable_sig_alarm(false))
|
|
ereport(FATAL, (errmsg("could not disable timer for startup packet timeout")));
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
|
|
return STATUS_OK;
|
|
}
|
|
|
|
static void RemoteHostInitilize(Port* port)
|
|
{
|
|
char remote_host[NI_MAXHOST];
|
|
char remote_port[NI_MAXSERV];
|
|
|
|
/* set these to empty in case they are needed before we set them up */
|
|
port->remote_host = "";
|
|
port->remote_port = "";
|
|
|
|
/*
|
|
* Get the remote host name and port for logging and status display.
|
|
*/
|
|
remote_host[0] = '\0';
|
|
remote_port[0] = '\0';
|
|
|
|
if (pg_getnameinfo_all(&port->raddr.addr,
|
|
port->raddr.salen,
|
|
remote_host,
|
|
sizeof(remote_host),
|
|
remote_port,
|
|
sizeof(remote_port),
|
|
(u_sess->attr.attr_common.log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV) != 0) {
|
|
int ret = pg_getnameinfo_all(&port->raddr.addr,
|
|
port->raddr.salen,
|
|
remote_host,
|
|
sizeof(remote_host),
|
|
remote_port,
|
|
sizeof(remote_port),
|
|
NI_NUMERICHOST | NI_NUMERICSERV);
|
|
|
|
if (ret != 0)
|
|
ereport(WARNING, (errmsg_internal("pg_getnameinfo_all() failed: %s", gai_strerror(ret))));
|
|
}
|
|
|
|
if (u_sess->attr.attr_storage.Log_connections) {
|
|
if (remote_port[0])
|
|
ereport(LOG, (errmsg("connection received: host=%s port=%s", remote_host, remote_port)));
|
|
else
|
|
ereport(LOG, (errmsg("connection received: host=%s", remote_host)));
|
|
}
|
|
|
|
/*
|
|
* save remote_host and remote_port in port structure
|
|
*/
|
|
port->remote_host = MemoryContextStrdup(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR), remote_host);
|
|
port->remote_port = MemoryContextStrdup(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR), remote_port);
|
|
|
|
if (u_sess->attr.attr_common.log_hostname)
|
|
port->remote_hostname = port->remote_host;
|
|
}
|
|
|
|
static int StartupPacketInitialize(Port* port)
|
|
{
|
|
int status;
|
|
sigset_t old_sigset;
|
|
|
|
/*
|
|
* Unblock SIGUSR2 so that SIGALRM can be triggered if ProcessStartupPacket encounter timeout.
|
|
*/
|
|
old_sigset = gs_signal_unblock_sigusr2();
|
|
|
|
/*
|
|
* Receive the startup packet (which might turn out to be a cancel request
|
|
* packet). for logic connection interruption is not allowed.
|
|
*/
|
|
if (IS_PGXC_DATANODE && g_instance.attr.attr_storage.comm_cn_dn_logic_conn)
|
|
t_thrd.postmaster_cxt.ProcessStartupPacketForLogicConn = true;
|
|
status = ProcessStartupPacket(port, false);
|
|
if (IS_PGXC_DATANODE && g_instance.attr.attr_storage.comm_cn_dn_logic_conn)
|
|
t_thrd.postmaster_cxt.ProcessStartupPacketForLogicConn = false;
|
|
|
|
CHECK_FOR_PROCDIEPENDING();
|
|
|
|
/* recover the signal mask */
|
|
gs_signal_recover_mask(old_sigset);
|
|
|
|
return status;
|
|
}
|
|
|
|
static void PsDisplayInitialize(Port* port)
|
|
{
|
|
char remote_ps_data[NI_MAXHOST + NI_MAXSERV + 2];
|
|
errno_t rc;
|
|
|
|
if (port->remote_port[0] == '\0')
|
|
rc = snprintf_s(remote_ps_data, sizeof(remote_ps_data), NI_MAXHOST, "%s", port->remote_host);
|
|
else
|
|
rc = snprintf_s(remote_ps_data,
|
|
sizeof(remote_ps_data),
|
|
sizeof(remote_ps_data) - 1,
|
|
"%s(%s)",
|
|
port->remote_host,
|
|
port->remote_port);
|
|
securec_check_ss_c(rc, "\0", "\0");
|
|
|
|
/*
|
|
* Now that we have the user and database name, we can set the process
|
|
* title for ps. It's good to do this as early as possible in startup.
|
|
*
|
|
* For a walsender, the ps display is set in the following form:
|
|
*
|
|
* postgres: wal sender process <user> <host> <activity>
|
|
*
|
|
* To achieve that, we pass "wal sender process" as username and username
|
|
* as dbname to init_ps_display(). XXX: should add a new variant of
|
|
* init_ps_display() to avoid abusing the parameters like this.
|
|
*/
|
|
if (AM_WAL_SENDER)
|
|
init_ps_display("wal sender process",
|
|
port->user_name,
|
|
remote_ps_data,
|
|
u_sess->attr.attr_common.update_process_title ? "authentication" : "");
|
|
else
|
|
init_ps_display(port->user_name,
|
|
port->database_name,
|
|
remote_ps_data,
|
|
u_sess->attr.attr_common.update_process_title ? "authentication" : "");
|
|
}
|
|
|
|
void PortInitialize(Port* port, knl_thread_arg* arg)
|
|
{
|
|
/* When we do port init at GaussDBThreadMain, use knl_thread_arg to init */
|
|
if (arg != NULL) {
|
|
/* Read in the variables file */
|
|
int ss_rc = memset_s(port, sizeof(Port), 0, sizeof(Port));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
|
|
/*
|
|
* Socket 0 may be closed if we do not use it, so we
|
|
* must set socket to invalid socket instead of 0.
|
|
*/
|
|
port->sock = PGINVALID_SOCKET;
|
|
port->gs_sock = GS_INVALID_GSOCK;
|
|
|
|
/* Save port etc. for ps status */
|
|
u_sess->proc_cxt.MyProcPort = port;
|
|
|
|
/* read variables from arg */
|
|
if (arg->role == RPC_WORKER)
|
|
read_backend_variables((char*)&backend_save_para, port);
|
|
else
|
|
read_backend_variables(arg->save_para, port);
|
|
|
|
/* fix thread pool workers and some background threads creation_time
|
|
* in pg_os_threads view not correct issue.
|
|
*/
|
|
u_sess->proc_cxt.MyProcPort->SessionStartTime = GetCurrentTimestamp();
|
|
}
|
|
|
|
/*
|
|
* Set up memory area for GSS information. Mirrors the code in ConnCreate
|
|
* for the non-exec case.
|
|
*/
|
|
#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
|
|
port->gss = (pg_gssinfo*)MemoryContextAllocZero(
|
|
SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR), 1 * sizeof(pg_gssinfo));
|
|
|
|
if (!port->gss)
|
|
ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
#endif
|
|
}
|
|
|
|
void CheckClientIp(Port* port)
|
|
{
|
|
/* Check whether the client ip is configured in pg_hba.conf */
|
|
char ip[IP_LEN] = {'\0'};
|
|
if (!check_ip_whitelist(port, ip, IP_LEN)) {
|
|
pq_init(); /* initialize libpq to talk to client */
|
|
t_thrd.postgres_cxt.whereToSendOutput = DestRemote; /* now safe to ereport to client */
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
|
|
errmsg("no pg_hba.conf entry for host \"%s\".", ip)));
|
|
}
|
|
}
|
|
|
|
void initRandomState(TimestampTz start_time, TimestampTz stop_time)
|
|
{
|
|
long secs;
|
|
int usecs;
|
|
|
|
/*
|
|
* Don't want backend to be able to see the postmaster random number
|
|
* generator state. We have to clobber the static random_seed *and* start
|
|
* a new random sequence in the random() library function.
|
|
*/
|
|
t_thrd.postmaster_cxt.random_seed = 0;
|
|
t_thrd.postmaster_cxt.random_start_time.tv_usec = 0;
|
|
/* slightly hacky way to get integer microseconds part of timestamptz */
|
|
TimestampDifference(start_time, stop_time, &secs, &usecs);
|
|
gs_srandom((unsigned int)(t_thrd.proc_cxt.MyProcPid ^ (unsigned int)usecs));
|
|
}
|
|
|
|
/*
|
|
* BackendRun -- set up the backend's argument list and invoke PostgresMain()
|
|
*
|
|
* returns:
|
|
* Shouldn't return at all.
|
|
* If PostgresMain() fails, return status.
|
|
*/
|
|
static int BackendRun(Port* port)
|
|
{
|
|
char** av;
|
|
int maxac;
|
|
int ac;
|
|
long secs;
|
|
int usecs;
|
|
int i;
|
|
|
|
/* add process definer mode */
|
|
Reset_Pseudo_CurrentUserId();
|
|
|
|
/*
|
|
* Don't want backend to be able to see the postmaster random number
|
|
* generator state. We have to clobber the static random_seed *and* start
|
|
* a new random sequence in the random() library function.
|
|
*/
|
|
t_thrd.postmaster_cxt.random_seed = 0;
|
|
t_thrd.postmaster_cxt.random_start_time.tv_usec = 0;
|
|
/* slightly hacky way to get integer microseconds part of timestamptz */
|
|
TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
|
|
gs_srandom((unsigned int)(t_thrd.proc_cxt.MyProcPid ^ (unsigned int)usecs));
|
|
|
|
/*
|
|
* Now, build the argv vector that will be given to PostgresMain.
|
|
*
|
|
* The maximum possible number of commandline arguments that could come
|
|
* from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
|
|
* pg_split_opts().
|
|
*/
|
|
maxac = 2; /* for fixed args supplied below */
|
|
maxac += (strlen(g_instance.ExtraOptions) + 1) / 2;
|
|
|
|
av = (char**)MemoryContextAlloc(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR),
|
|
maxac * sizeof(char*));
|
|
ac = 0;
|
|
|
|
av[ac++] = "gaussdb";
|
|
|
|
/*
|
|
* Pass any backend switches specified with -o on the postmaster's own
|
|
* command line. We assume these are secure. (It's OK to mangle
|
|
* ExtraOptions now, since we're safely inside a subprocess.)
|
|
*/
|
|
pg_split_opts(av, &ac, g_instance.ExtraOptions);
|
|
|
|
av[ac] = NULL;
|
|
|
|
Assert(ac < maxac);
|
|
|
|
/*
|
|
* Debug: print arguments being passed to backend
|
|
*/
|
|
ereport(DEBUG3, (errmsg_internal("%s child[%d]: starting with (", progname, (int)gs_thread_self())));
|
|
|
|
for (i = 0; i < ac; ++i)
|
|
ereport(DEBUG3, (errmsg_internal("\t%s", av[i])));
|
|
|
|
ereport(DEBUG3, (errmsg_internal(")")));
|
|
|
|
/*
|
|
* Make sure we aren't in t_thrd.mem_cxt.postmaster_mem_cxt anymore. (We can't delete it
|
|
* just yet, though, because InitPostgres will need the HBA data.)
|
|
*/
|
|
MemoryContextSwitchTo(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT));
|
|
|
|
return PostgresMain(ac, av, port->database_name, port->user_name);
|
|
}
|
|
|
|
#ifdef ENABLE_LLT
|
|
extern "C" {
|
|
extern void HLLT_Coverage_SaveCoverageData();
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* ExitPostmaster -- cleanup
|
|
*
|
|
* Do NOT call exit() directly --- always go through here!
|
|
*/
|
|
void ExitPostmaster(int status)
|
|
{
|
|
/* should cleanup shared memory and kill all backends */
|
|
|
|
/*
|
|
* Not sure of the semantics here. When the Postmaster dies, should the
|
|
* backends all be killed? probably not.
|
|
*
|
|
* MUST -- vadim 05-10-1999
|
|
*/
|
|
|
|
CloseGaussPidDir();
|
|
|
|
obs_deinitialize();
|
|
|
|
/* when exiting the postmaster process, destroy the hash table */
|
|
if (g_instance.comm_cxt.usedDnSpace != NULL) {
|
|
hash_destroy(g_instance.comm_cxt.usedDnSpace);
|
|
}
|
|
|
|
if (g_instance.policy_cxt.account_table != NULL) {
|
|
hash_destroy(g_instance.policy_cxt.account_table);
|
|
}
|
|
|
|
#ifdef ENABLE_MOT
|
|
TermMOT(); /* shutdown memory engine before codegen is destroyed */
|
|
#endif
|
|
|
|
#ifdef ENABLE_LLVM_COMPILE
|
|
CodeGenProcessTearDown();
|
|
#endif
|
|
|
|
/* Save llt data to disk before postmaster exit */
|
|
#ifdef ENABLE_LLT
|
|
HLLT_Coverage_SaveCoverageData();
|
|
#endif
|
|
|
|
// flush stdout buffer before _exit
|
|
//
|
|
fflush(stdout);
|
|
|
|
LogCtlLastFlushBeforePMExit();
|
|
|
|
proc_exit(status);
|
|
}
|
|
|
|
static void handle_recovery_started()
|
|
{
|
|
if (pmState == PM_STARTUP && g_instance.status == NoShutdown && !dummyStandbyMode) {
|
|
/* WAL redo has started. We're out of reinitialization. */
|
|
g_instance.fatal_error = false;
|
|
g_instance.demotion = NoDemote;
|
|
|
|
/*
|
|
* Crank up the background tasks. It doesn't matter if this fails,
|
|
* we'll just try again later.
|
|
*/
|
|
Assert(g_instance.pid_cxt.CheckpointerPID == 0);
|
|
g_instance.pid_cxt.CheckpointerPID = initialize_util_thread(CHECKPOINT_THREAD);
|
|
Assert(g_instance.pid_cxt.BgWriterPID == 0);
|
|
if (!g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
g_instance.pid_cxt.BgWriterPID = initialize_util_thread(BGWRITER);
|
|
}
|
|
|
|
if (g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
for (int i = 0; i < g_instance.attr.attr_storage.pagewriter_thread_num; i++) {
|
|
Assert(g_instance.pid_cxt.PageWriterPID[i] == 0);
|
|
g_instance.pid_cxt.PageWriterPID[i] = initialize_util_thread(PAGEWRITER_THREAD);
|
|
}
|
|
int thread_num = g_instance.attr.attr_storage.bgwriter_thread_num;
|
|
thread_num = thread_num > 0 ? thread_num : 1;
|
|
for (int i = 0; i < thread_num; i++) {
|
|
Assert(g_instance.pid_cxt.CkptBgWriterPID[i] == 0);
|
|
g_instance.pid_cxt.CkptBgWriterPID[i] = initialize_util_thread(BGWRITER);
|
|
}
|
|
}
|
|
Assert(g_instance.pid_cxt.CBMWriterPID == 0);
|
|
if (u_sess->attr.attr_storage.enable_cbm_tracking) {
|
|
g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER);
|
|
}
|
|
Assert(g_instance.pid_cxt.RemoteServicePID == 0);
|
|
if (IS_PGXC_DATANODE && t_thrd.postmaster_cxt.HaShmData->current_mode != NORMAL_MODE &&
|
|
!IS_DN_WITHOUT_STANDBYS_MODE() && IsRemoteReadModeOn())
|
|
g_instance.pid_cxt.RemoteServicePID = initialize_util_thread(RPC_SERVICE);
|
|
pmState = PM_RECOVERY;
|
|
}
|
|
}
|
|
|
|
static void handle_begin_hot_standby()
|
|
{
|
|
if ((dummyStandbyMode || pmState == PM_RECOVERY) && g_instance.status == NoShutdown) {
|
|
/*
|
|
* Likewise, start other special children as needed.
|
|
*/
|
|
Assert(g_instance.pid_cxt.PgStatPID == 0);
|
|
if (!dummyStandbyMode)
|
|
g_instance.pid_cxt.PgStatPID = pgstat_start();
|
|
PMUpdateDBState(NORMAL_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file: db state(NORMAL_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
|
|
ereport(LOG, (errmsg("database system is ready to accept read only connections")));
|
|
|
|
pmState = PM_HOT_STANDBY;
|
|
}
|
|
}
|
|
|
|
static void handle_promote_signal()
|
|
{
|
|
if (g_instance.pid_cxt.StartupPID != 0 &&
|
|
(pmState == PM_STARTUP ||
|
|
pmState == PM_RECOVERY ||
|
|
pmState == PM_HOT_STANDBY ||
|
|
pmState == PM_WAIT_READONLY)) {
|
|
gs_lock_test_and_set_64(&g_instance.stat_cxt.NodeStatResetTime, GetCurrentTimestamp());
|
|
if (GetHaShmemMode() != STANDBY_MODE) {
|
|
ereport(LOG, (errmsg("Instance can't be promoted in none standby mode.")));
|
|
} else {
|
|
/* Database Security: Support database audit */
|
|
if (t_thrd.walreceiverfuncs_cxt.WalRcv &&
|
|
NODESTATE_STANDBY_PROMOTING == t_thrd.walreceiverfuncs_cxt.WalRcv->node_state) {
|
|
t_thrd.postmaster_cxt.audit_standby_switchover = true;
|
|
/* Tell startup process to finish recovery */
|
|
SendNotifySignal(NOTIFY_SWITCHOVER, g_instance.pid_cxt.StartupPID);
|
|
} else {
|
|
if (t_thrd.walreceiverfuncs_cxt.WalRcv)
|
|
t_thrd.walreceiverfuncs_cxt.WalRcv->node_state = NODESTATE_STANDBY_FAILOVER_PROMOTING;
|
|
t_thrd.postmaster_cxt.audit_primary_failover = true;
|
|
/* Tell startup process to finish recovery */
|
|
ereport(LOG, (errmsg("Instance to do failover.")));
|
|
SendNotifySignal(NOTIFY_FAILOVER, g_instance.pid_cxt.StartupPID);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void handle_primary_signal(volatile HaShmemData* hashmdata)
|
|
{
|
|
if (g_instance.pid_cxt.StartupPID != 0 &&
|
|
hashmdata->current_mode == PENDING_MODE &&
|
|
(pmState == PM_STARTUP ||
|
|
pmState == PM_RECOVERY ||
|
|
pmState == PM_HOT_STANDBY ||
|
|
pmState == PM_WAIT_READONLY)) {
|
|
/* just notify the startup process, does not set HAshmemory here. */
|
|
SendNotifySignal(NOTIFY_PRIMARY, g_instance.pid_cxt.StartupPID);
|
|
}
|
|
|
|
if (hashmdata->current_mode == NORMAL_MODE) {
|
|
if (g_instance.attr.attr_storage.max_wal_senders < 2 ||
|
|
g_instance.attr.attr_storage.wal_level != WAL_LEVEL_HOT_STANDBY ||
|
|
g_instance.attr.attr_storage.EnableHotStandby == false)
|
|
ereport(WARNING, (errmsg("when notifying normal mode to primary mode, \
|
|
wal_level requires \"hot_standby\", and hot_standby requires \"on\", \
|
|
and max_wal_senders requires at least 2.")));
|
|
else {
|
|
hashmdata->current_mode = PRIMARY_MODE;
|
|
UpdateOptsFile();
|
|
}
|
|
}
|
|
|
|
PMUpdateDBState(NORMAL_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file: db state(NORMAL_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
}
|
|
|
|
static void handle_standby_signal(volatile HaShmemData* hashmdata)
|
|
{
|
|
if (g_instance.pid_cxt.StartupPID != 0 &&
|
|
(pmState == PM_STARTUP ||
|
|
pmState == PM_RECOVERY ||
|
|
pmState == PM_HOT_STANDBY ||
|
|
pmState == PM_WAIT_READONLY)) {
|
|
hashmdata->current_mode = STANDBY_MODE;
|
|
PMUpdateDBState(NEEDREPAIR_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file: db state(NEEDREPAIR_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
/*
|
|
* wakeup startup process from sleep by signal, cause we are
|
|
* in standby mode, the signal has no specific affect.
|
|
*/
|
|
SendNotifySignal(NOTIFY_STANDBY, g_instance.pid_cxt.StartupPID);
|
|
UpdateOptsFile();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* sigusr1_handler - handle signal conditions from child processes
|
|
*/
|
|
static void sigusr1_handler(SIGNAL_ARGS)
|
|
{
|
|
int mode = 0;
|
|
int save_errno = errno;
|
|
volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
|
|
/*
|
|
* RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
|
|
* unexpected states. If the startup process quickly starts up, completes
|
|
* recovery, exits, we might process the death of the startup process
|
|
* first. We don't want to go back to recovery in that case.
|
|
*/
|
|
if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED)) {
|
|
handle_recovery_started();
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY)) {
|
|
handle_begin_hot_standby();
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_LOCAL_RECOVERY_DONE) &&
|
|
(pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
|
|
g_instance.status == NoShutdown) {
|
|
PMUpdateDBStateLSN();
|
|
ereport(LOG, (errmsg("set lsn after recovery done in gaussdb state file")));
|
|
}
|
|
|
|
if (g_instance.pid_cxt.WalWriterAuxiliaryPID == 0 && t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE &&
|
|
(pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)) {
|
|
g_instance.pid_cxt.WalWriterAuxiliaryPID = initialize_util_thread(WALWRITERAUXILIARY);
|
|
ereport(LOG,
|
|
(errmsg("sigusr1_handler create WalWriterAuxiliary(%lu) after local recovery is done. pmState:%u, ServerMode:%u",
|
|
g_instance.pid_cxt.WalWriterAuxiliaryPID, pmState, t_thrd.postmaster_cxt.HaShmData->current_mode)));
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_UPDATE_WAITING)) {
|
|
PMUpdateDBState(WAITING_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("set gaussdb state file: db state(WAITING_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
}
|
|
if (CheckPostmasterSignal(PMSIGNAL_UPDATE_PROMOTING)) {
|
|
PMUpdateDBState(PROMOTING_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("set gaussdb state file: db state(PROMOTING_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
|
|
/* promote cascade standby */
|
|
if (IsCascadeStandby()) {
|
|
t_thrd.xlog_cxt.is_cascade_standby = false;
|
|
SetHaShmemData();
|
|
}
|
|
}
|
|
if (CheckPostmasterSignal(PMSIGNAL_UPDATE_HAREBUILD_REASON) &&
|
|
(pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
|
|
g_instance.status == NoShutdown) {
|
|
PMUpdateDBStateHaRebuildReason();
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) && g_instance.pid_cxt.PgArchPID != 0) {
|
|
/*
|
|
* Send SIGUSR1 to archiver process, to wake it up and begin archiving
|
|
* next transaction log file.
|
|
*/
|
|
signal_child(g_instance.pid_cxt.PgArchPID, SIGUSR1);
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE) && g_instance.pid_cxt.SysLoggerPID != 0) {
|
|
/* Tell syslogger to rotate logfile */
|
|
signal_child(g_instance.pid_cxt.SysLoggerPID, SIGUSR1);
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) && g_instance.status == NoShutdown) {
|
|
/*
|
|
* Start one iteration of the autovacuum daemon, even if autovacuuming
|
|
* is nominally not enabled. This is so we can have an active defense
|
|
* excessive clog. We set a flag for the main loop to do it rather than
|
|
* trying to do it here --- this is because the autovac process itself
|
|
* may send the signal, and we want to handle that by launching
|
|
* another iteration as soon as the current one completes.
|
|
*/
|
|
t_thrd.postmaster_cxt.start_autovac_launcher = true;
|
|
}
|
|
|
|
/* should not start a worker in shutdown or demotion procedure */
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) && g_instance.status == NoShutdown &&
|
|
g_instance.demotion == NoDemote) {
|
|
/* The autovacuum launcher wants us to start a worker process. */
|
|
StartAutovacuumWorker();
|
|
}
|
|
|
|
/* should not start a worker in shutdown or demotion procedure */
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_CLEAN_STATEMENT) && g_instance.status == NoShutdown &&
|
|
g_instance.demotion == NoDemote) {
|
|
/* The statement flush thread wants us to start a clean statement worker process. */
|
|
StartCleanStatement();
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_JOB_SCHEDULER)) {
|
|
t_thrd.postmaster_cxt.start_job_scheduler = true;
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_JOB_WORKER) &&
|
|
g_instance.status == NoShutdown && g_instance.demotion == NoDemote) {
|
|
/* the parent process, return 0 if the fork failed, return the PID if fork succeed. */
|
|
StartPgjobWorker();
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER) && g_instance.pid_cxt.WalReceiverPID == 0 &&
|
|
(pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
|
|
g_instance.status == NoShutdown) {
|
|
if (g_instance.pid_cxt.WalRcvWriterPID == 0) {
|
|
g_instance.pid_cxt.WalRcvWriterPID = initialize_util_thread(WALRECWRITE);
|
|
SetWalRcvWriterPID(g_instance.pid_cxt.WalRcvWriterPID);
|
|
}
|
|
|
|
/* Startup Process wants us to start the walreceiver process. */
|
|
g_instance.pid_cxt.WalReceiverPID = initialize_util_thread(WALRECEIVER);
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_DATARECEIVER) && g_instance.pid_cxt.DataReceiverPID == 0 &&
|
|
(pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
|
|
g_instance.status == NoShutdown) {
|
|
if (g_instance.pid_cxt.DataRcvWriterPID == 0) {
|
|
g_instance.pid_cxt.DataRcvWriterPID = initialize_util_thread(DATARECWRITER);
|
|
SetDataRcvWriterPID(g_instance.pid_cxt.DataRcvWriterPID);
|
|
}
|
|
|
|
/* Startup Process wants us to start the datareceiver process. */
|
|
g_instance.pid_cxt.DataReceiverPID = initialize_util_thread(DATARECIVER);
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_CATCHUP) && g_instance.pid_cxt.CatchupPID == 0 &&
|
|
g_instance.status == NoShutdown) {
|
|
/* The datasender wants us to start the catch-up process. */
|
|
g_instance.pid_cxt.CatchupPID = StartCatchupWorker();
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE) &&
|
|
(pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_BACKENDS)) {
|
|
/* Advance postmaster's state machine */
|
|
PostmasterStateMachine();
|
|
}
|
|
|
|
if ((mode = CheckSwitchoverSignal()) != 0 && WalRcvIsOnline() && DataRcvIsOnline() &&
|
|
(pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY)) {
|
|
ereport(LOG, (errmsg("to do switchover")));
|
|
/* Tell walreceiver process to start switchover */
|
|
t_thrd.walreceiverfuncs_cxt.WalRcv->node_state = (ClusterNodeState)mode;
|
|
signal_child(g_instance.pid_cxt.WalReceiverPID, SIGUSR1);
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_DEMOTE_PRIMARY)) {
|
|
gs_lock_test_and_set_64(&g_instance.stat_cxt.NodeStatResetTime, GetCurrentTimestamp());
|
|
ProcessDemoteRequest();
|
|
}
|
|
|
|
if (CheckFinishRedoSignal() && g_instance.comm_cxt.localinfo_cxt.is_finish_redo != 1) {
|
|
pg_atomic_write_u32(&(g_instance.comm_cxt.localinfo_cxt.is_finish_redo), 1);
|
|
}
|
|
if (CheckPromoteSignal()) {
|
|
handle_promote_signal();
|
|
}
|
|
|
|
/* If it is primary signal, then set HaShmData and send sigusr2 to startup process */
|
|
if (CheckPrimarySignal()) {
|
|
handle_primary_signal(hashmdata);
|
|
}
|
|
|
|
if (CheckStandbySignal()) {
|
|
handle_standby_signal(hashmdata);
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_UPDATE_NORMAL)) {
|
|
PMUpdateDBState(NORMAL_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file: db state(NORMAL_STATE), server mode(%s)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_ROLLBACK_STANDBY_PROMOTE)) {
|
|
PMUpdateDBState(NORMAL_STATE, get_cur_mode(), get_cur_repl_num());
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file:"
|
|
"db state(NORMAL_STATE), server mode(%s), reason(switchover failure)",
|
|
wal_get_role_string(get_cur_mode()))));
|
|
}
|
|
|
|
if (CheckPostmasterSignal(PMSIGNAL_START_THREADPOOL_WORKER) &&
|
|
g_instance.status == NoShutdown) {
|
|
if (g_threadPoolControler != NULL) {
|
|
g_threadPoolControler->AddWorkerIfNecessary();
|
|
}
|
|
}
|
|
|
|
#if defined (ENABLE_MULTIPLE_NODES) || defined (ENABLE_PRIVATEGAUSS)
|
|
check_and_process_hotpatch();
|
|
#endif
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
|
|
|
errno = save_errno;
|
|
}
|
|
|
|
/*
|
|
* Timeout or shutdown signal from postmaster while processing startup packet.
|
|
* Cleanup and exit(1).
|
|
*
|
|
* XXX: possible future improvement: try to send a message indicating
|
|
* why we are disconnecting. Problem is to be sure we don't block while
|
|
* doing so, nor mess up SSL initialization. In practice, if the client
|
|
* has wedged here, it probably couldn't do anything with the message anyway.
|
|
*/
|
|
void startup_die(SIGNAL_ARGS)
|
|
{
|
|
/*
|
|
* when process startup packet for logic conn
|
|
* gs_wait_poll will hold lock, so proc_exit here
|
|
* will occur dead lock. gs_r_cancel will signal gs_wait_poll
|
|
* without lock and then proc_exit when
|
|
* ProcessStartupPacketForLogicConn is false
|
|
*/
|
|
if (t_thrd.postmaster_cxt.ProcessStartupPacketForLogicConn) {
|
|
t_thrd.int_cxt.ProcDiePending = true;
|
|
gs_r_cancel();
|
|
} else {
|
|
t_thrd.int_cxt.ProcDiePending = true;
|
|
}
|
|
}
|
|
|
|
/* copy from startup_die, and set cancel_from_timeout flag */
|
|
static void
|
|
startup_alarm(SIGNAL_ARGS)
|
|
{
|
|
/*
|
|
* when process startup packet for logic conn
|
|
* gs_wait_poll will hold lock, so proc_exit here
|
|
* will occur dead lock. gs_r_cancel will signal gs_wait_poll
|
|
* without lock and then proc_exit when
|
|
* ProcessStartupPacketForLogicConn is false
|
|
*/
|
|
t_thrd.storage_cxt.cancel_from_timeout = true;
|
|
force_backtrace_messages = true;
|
|
if (t_thrd.postmaster_cxt.ProcessStartupPacketForLogicConn) {
|
|
t_thrd.int_cxt.ProcDiePending = true;
|
|
gs_r_cancel();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Dummy signal handler
|
|
*
|
|
* We use this for signals that we don't actually use in the postmaster,
|
|
* but we do use in backends. If we were to SIG_IGN such signals in the
|
|
* postmaster, then a newly started backend might drop a signal that arrives
|
|
* before it's able to reconfigure its signal processing. (See notes in
|
|
* tcop/postgres.c.)
|
|
*/
|
|
static void dummy_handler(SIGNAL_ARGS)
|
|
{}
|
|
|
|
/*
|
|
* PostmasterRandom
|
|
*/
|
|
long PostmasterRandom(void)
|
|
{
|
|
/*
|
|
* Select a random seed at the time of first receiving a request.
|
|
*/
|
|
if (t_thrd.postmaster_cxt.random_seed == 0) {
|
|
do {
|
|
struct timeval random_stop_time;
|
|
|
|
gettimeofday(&random_stop_time, NULL);
|
|
|
|
/*
|
|
* We are not sure how much precision is in tv_usec, so we swap
|
|
* the high and low 16 bits of 'random_stop_time' and XOR them
|
|
* with 'random_start_time'. On the off chance that the result is
|
|
* 0, we loop until it isn't.
|
|
*/
|
|
t_thrd.postmaster_cxt.random_seed =
|
|
(unsigned long)t_thrd.postmaster_cxt.random_start_time.tv_usec ^
|
|
(((unsigned long)random_stop_time.tv_usec << 16) |
|
|
(((unsigned long)random_stop_time.tv_usec >> 16) & 0xffff));
|
|
} while (t_thrd.postmaster_cxt.random_seed == 0);
|
|
|
|
gs_srandom(t_thrd.postmaster_cxt.random_seed);
|
|
}
|
|
|
|
return gs_random();
|
|
}
|
|
|
|
/*
|
|
* Count up number of child processes of specified types (dead_end chidren
|
|
* are always excluded).
|
|
*/
|
|
static int CountChildren(int target)
|
|
{
|
|
Dlelem* curr = NULL;
|
|
int cnt = 0;
|
|
|
|
for (curr = DLGetHead(g_instance.backend_list); curr; curr = DLGetSucc(curr)) {
|
|
Backend* bp = (Backend*)DLE_VAL(curr);
|
|
|
|
/*
|
|
* Since target == BACKEND_TYPE_ALL is the most common case, we test
|
|
* it first and avoid touching shared memory for every child.
|
|
*/
|
|
if (target != BACKEND_TYPE_ALL) {
|
|
int child;
|
|
|
|
if (bp->is_autovacuum)
|
|
child = BACKEND_TYPE_AUTOVAC;
|
|
else if (IsPostmasterChildWalSender(bp->child_slot))
|
|
child = BACKEND_TYPE_WALSND;
|
|
else if (IsPostmasterChildDataSender(bp->child_slot))
|
|
child = BACKEND_TYPE_DATASND;
|
|
else
|
|
child = BACKEND_TYPE_NORMAL;
|
|
|
|
if (!((unsigned int)target & (unsigned int)child))
|
|
continue;
|
|
}
|
|
|
|
cnt++;
|
|
}
|
|
|
|
return cnt;
|
|
}
|
|
|
|
/*
|
|
* StartAutovacuumWorker
|
|
* Start an autovac worker process.
|
|
*
|
|
* This function is here because it enters the resulting PID into the
|
|
* postmaster's private backends list.
|
|
*
|
|
* NB -- this code very roughly matches BackendStartup.
|
|
*/
|
|
static void StartAutovacuumWorker(void)
|
|
{
|
|
Backend* bn = NULL;
|
|
|
|
/*
|
|
* If not in condition to run a process, don't try, but handle it like a
|
|
* fork failure. This does not normally happen, since the signal is only
|
|
* supposed to be sent by autovacuum launcher when it's OK to do it, but
|
|
* we have to check to avoid race-condition problems during DB state
|
|
* changes.
|
|
*/
|
|
if (canAcceptConnections(false) == CAC_OK) {
|
|
int slot = AssignPostmasterChildSlot();
|
|
|
|
if (slot == -1) {
|
|
return;
|
|
}
|
|
|
|
bn = AssignFreeBackEnd(slot);
|
|
|
|
if (bn != NULL) {
|
|
/*
|
|
* Compute the cancel key that will be assigned to this session.
|
|
* We probably don't need cancel keys for autovac workers, but
|
|
* we'd better have something random in the field to prevent
|
|
* unfriendly people from sending cancels to them.
|
|
*/
|
|
GenerateCancelKey(false);
|
|
bn->cancel_key = t_thrd.proc_cxt.MyCancelKey;
|
|
|
|
/* Autovac workers are not dead_end and need a child slot */
|
|
bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot;
|
|
bn->pid = initialize_util_thread(AUTOVACUUM_WORKER, bn);
|
|
t_thrd.proc_cxt.MyPMChildSlot = 0;
|
|
if (bn->pid > 0) {
|
|
bn->is_autovacuum = true;
|
|
DLInitElem(&bn->elem, bn);
|
|
DLAddHead(g_instance.backend_list, &bn->elem);
|
|
/* all OK */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* fork failed, fall through to report -- actual error message was
|
|
* logged by initialize_util_thread
|
|
*/
|
|
(void)ReleasePostmasterChildSlot(bn->child_slot);
|
|
bn->pid = 0;
|
|
} else
|
|
ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
}
|
|
|
|
/*
|
|
* Report the failure to the launcher, if it's running. (If it's not, we
|
|
* might not even be connected to shared memory, so don't try to call
|
|
* AutoVacWorkerFailed.) Note that we also need to signal it so that it
|
|
* responds to the condition, but we don't do that here, instead waiting
|
|
* for ServerLoop to do it. This way we avoid a ping-pong signalling in
|
|
* quick succession between the autovac launcher and postmaster in case
|
|
* things get ugly.
|
|
*/
|
|
if (g_instance.pid_cxt.AutoVacPID != 0) {
|
|
AutoVacWorkerFailed();
|
|
t_thrd.postmaster_cxt.avlauncher_needs_signal = true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Reaper -- signal handler to cleanup after a child process dies.
|
|
*/
|
|
static void reaper_backend(SIGNAL_ARGS)
|
|
{
|
|
int save_errno = errno;
|
|
ThreadId pid; /* process id of dead child process */
|
|
long exitstatus; /* its exit status */
|
|
int* status = NULL;
|
|
ThreadId oldpid = 0;
|
|
|
|
#define LOOPTEST() (pid = gs_thread_id(t_thrd.postmaster_cxt.CurExitThread))
|
|
#define LOOPHEADER() (exitstatus = (long)(intptr_t)status)
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
|
|
ereport(DEBUG4, (errmsg_internal("reaping dead processes")));
|
|
|
|
for (;;) {
|
|
LOOPTEST();
|
|
|
|
if (pid == oldpid) {
|
|
break;
|
|
}
|
|
|
|
oldpid = pid;
|
|
if (gs_thread_join(t_thrd.postmaster_cxt.CurExitThread, (void**)&status) != 0) {
|
|
/*
|
|
* If the thread does not exist, treat it as normal exit and we continue to
|
|
* do our clean-up work. Otherwise, we treate it as crashed 'cause we do
|
|
* not know the current status of the thread and it's better to quit directly
|
|
* which sames more safely.
|
|
*/
|
|
if (ESRCH == pthread_kill(pid, 0)) {
|
|
exitstatus = 0;
|
|
ereport(LOG, (errmsg("failed to join thread %lu, no such process", pid)));
|
|
} else {
|
|
exitstatus = 1;
|
|
HandleChildCrash(pid, exitstatus, "Backend process");
|
|
}
|
|
} else {
|
|
LOOPHEADER();
|
|
ereport(DEBUG1, (errmsg("have joined thread %lu, exitstatus=%ld.", pid, exitstatus)));
|
|
}
|
|
|
|
// copy from CleanupBackend(),but remove traversing g_instance.backend_list scope.
|
|
LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
|
|
#ifdef WIN32
|
|
if (exitstatus == ERROR_WAIT_NO_CHILDREN) {
|
|
LogChildExit(LOG, _("server process"), pid, exitstatus);
|
|
exitstatus = 0;
|
|
}
|
|
|
|
#endif
|
|
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) {
|
|
HandleChildCrash(pid, exitstatus, _("server process"));
|
|
}
|
|
}
|
|
|
|
/* Done with signal handler */
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
|
errno = save_errno;
|
|
}
|
|
|
|
/*
|
|
* @@GaussDB@@
|
|
* Brief : the implement of the reaper backend thread
|
|
* Description :
|
|
* Notes :
|
|
*/
|
|
void ReaperBackendMain()
|
|
{
|
|
|
|
/* we are a postmaster subprocess now */
|
|
IsUnderPostmaster = true;
|
|
|
|
/* reset t_thrd.proc_cxt.MyProcPid */
|
|
t_thrd.proc_cxt.MyProcPid = gs_thread_self();
|
|
|
|
/* record Start Time for logging */
|
|
t_thrd.proc_cxt.MyStartTime = time(NULL);
|
|
|
|
/* reord my name */
|
|
t_thrd.proc_cxt.MyProgName = "ReaperBackend";
|
|
|
|
/* Identify myself via ps */
|
|
init_ps_display("ReaperBackend", "", "", "");
|
|
|
|
ereport(LOG, (errmsg("reaper backend started.")));
|
|
|
|
InitializeLatchSupport(); /* needed for latch waits */
|
|
|
|
/* Initialize private latch for use by signal handlers */
|
|
InitLatch(&t_thrd.postmaster_cxt.ReaperBackendLatch);
|
|
|
|
/*
|
|
* Properly accept or ignore signals the postmaster might send us
|
|
*
|
|
* Note: we deliberately ignore SIGTERM, because during a standard Unix
|
|
* system shutdown cycle, init will SIGTERM all processes at once. We
|
|
* want to wait for the backends to exit, whereupon the postmaster will
|
|
* tell us it's okay to shut down (via SIGUSR2).
|
|
*/
|
|
(void)gspqsignal(SIGHUP, SIG_IGN);
|
|
(void)gspqsignal(SIGINT, SIG_IGN);
|
|
(void)gspqsignal(SIGTERM, SIG_IGN);
|
|
(void)gspqsignal(SIGQUIT, SIG_IGN);
|
|
(void)gspqsignal(SIGALRM, SIG_IGN);
|
|
(void)gspqsignal(SIGPIPE, SIG_IGN);
|
|
(void)gspqsignal(SIGUSR1, SIG_IGN);
|
|
(void)gspqsignal(SIGUSR2, SIG_IGN);
|
|
|
|
(void)gspqsignal(SIGCHLD, reaper_backend);
|
|
/*
|
|
* Reset some signals that are accepted by postmaster but not here
|
|
*/
|
|
(void)gspqsignal(SIGTTIN, SIG_DFL);
|
|
(void)gspqsignal(SIGTTOU, SIG_DFL);
|
|
(void)gspqsignal(SIGCONT, SIG_DFL);
|
|
(void)gspqsignal(SIGWINCH, SIG_DFL);
|
|
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
|
|
(void)gs_signal_unblock_sigusr2();
|
|
|
|
/* all is done info top memory context. */
|
|
(void)MemoryContextSwitchTo(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT));
|
|
|
|
for (;;) {
|
|
/* Clear any already-pending wakeups */
|
|
ResetLatch(&t_thrd.postmaster_cxt.ReaperBackendLatch);
|
|
|
|
/*
|
|
* Sleep until there's something to do
|
|
*/
|
|
(void)WaitLatch(&t_thrd.postmaster_cxt.ReaperBackendLatch, WL_LATCH_SET | WL_TIMEOUT, 10 * 1000);
|
|
}
|
|
|
|
ereport(LOG, (errmsg("Reaper backend thread shutting down...")));
|
|
|
|
proc_exit(0);
|
|
}
|
|
|
|
/*
|
|
* StartPgjobWorker
|
|
* Start an job worker process.
|
|
*
|
|
* This function is here because it enters the resulting PID into the
|
|
* postmaster's private backends list.
|
|
*
|
|
*/
|
|
static void StartPgjobWorker(void)
|
|
{
|
|
Backend* bn = NULL;
|
|
|
|
/*
|
|
* If not in condition to run a process, don't try, but handle it like a
|
|
* fork failure. This does not normally happen, since the signal is only
|
|
* supposed to be sent by autovacuum launcher when it's OK to do it, but
|
|
* we have to check to avoid race-condition problems during DB state
|
|
* changes.
|
|
*/
|
|
if (canAcceptConnections(false) == CAC_OK) {
|
|
|
|
int slot = AssignPostmasterChildSlot();
|
|
|
|
if (slot == -1) {
|
|
return;
|
|
}
|
|
bn = AssignFreeBackEnd(slot);
|
|
if (bn != NULL) {
|
|
/*
|
|
* Compute the cancel key that will be assigned to this session.
|
|
* We probably don't need cancel keys for autovac workers, but
|
|
* we'd better have something random in the field to prevent
|
|
* unfriendly people from sending cancels to them.
|
|
*/
|
|
GenerateCancelKey(false);
|
|
bn->cancel_key = t_thrd.proc_cxt.MyCancelKey;
|
|
bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot;
|
|
bn->pid = initialize_util_thread(JOB_WORKER);
|
|
t_thrd.proc_cxt.MyPMChildSlot = 0;
|
|
if (bn->pid > 0) {
|
|
bn->is_autovacuum = false;
|
|
DLInitElem(&bn->elem, bn);
|
|
DLAddHead(g_instance.backend_list, &bn->elem);
|
|
/* all OK */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* fork failed, fall through to report -- actual error message was
|
|
* logged by initialize_util_thread
|
|
*/
|
|
(void)ReleasePostmasterChildSlot(bn->child_slot);
|
|
bn->pid = 0;
|
|
bn = NULL;
|
|
} else
|
|
ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
}
|
|
|
|
/*
|
|
* Report the failure to the launcher, if it's running. (If it's not, we
|
|
* might not even be connected to shared memory, so don't try to call
|
|
* RecordForkJobWorkerFailed.) Note that we also need to signal it so that it
|
|
* responds to the condition, but we don't do that here, instead waiting
|
|
* for ServerLoop to do it. This way we avoid a ping-pong signalling in
|
|
* quick succession between the job scheduler and postmaster in case
|
|
* things get ugly.
|
|
*/
|
|
if (g_instance.pid_cxt.PgJobSchdPID != 0) {
|
|
RecordForkJobWorkerFailed();
|
|
t_thrd.postmaster_cxt.jobscheduler_needs_signal = true;
|
|
}
|
|
}
|
|
|
|
static void StartPoolCleaner(void)
|
|
{
|
|
Backend* bn = NULL;
|
|
|
|
/*
|
|
* If not in condition to run a process, don't try, but handle it like a
|
|
* fork failure. This does not normally happen, since the signal is only
|
|
* supposed to be sent by autovacuum launcher when it's OK to do it, but
|
|
* we have to check to avoid race-condition problems during DB state
|
|
* changes.
|
|
*/
|
|
if (canAcceptConnections(false) == CAC_OK) {
|
|
|
|
int slot = AssignPostmasterChildSlot();
|
|
if (slot == -1) {
|
|
return;
|
|
}
|
|
bn = AssignFreeBackEnd(slot);
|
|
if (bn != NULL) {
|
|
/*
|
|
* Compute the cancel key that will be assigned to this session.
|
|
* We probably don't need cancel keys for autovac workers, but
|
|
* we'd better have something random in the field to prevent
|
|
* unfriendly people from sending cancels to them.
|
|
*/
|
|
GenerateCancelKey(false);
|
|
bn->cancel_key = t_thrd.proc_cxt.MyCancelKey;
|
|
bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot;
|
|
bn->pid = initialize_util_thread(COMM_POOLER_CLEAN);
|
|
g_instance.pid_cxt.CommPoolerCleanPID = bn->pid;
|
|
t_thrd.proc_cxt.MyPMChildSlot = 0;
|
|
if (bn->pid > 0) {
|
|
bn->is_autovacuum = false;
|
|
DLInitElem(&bn->elem, bn);
|
|
DLAddHead(g_instance.backend_list, &bn->elem);
|
|
/* all OK */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* fork failed, fall through to report -- actual error message was
|
|
* logged by initialize_util_thread
|
|
*/
|
|
(void)ReleasePostmasterChildSlot(bn->child_slot);
|
|
bn->pid = 0;
|
|
bn = NULL;
|
|
} else
|
|
ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
}
|
|
}
|
|
|
|
static void StartCleanStatement(void)
|
|
{
|
|
Backend* bn = NULL;
|
|
|
|
/*
|
|
* If not in condition to run a process, don't try, but handle it like a
|
|
* fork failure. This does not normally happen, since the signal is only
|
|
* supposed to be sent by autovacuum launcher when it's OK to do it, but
|
|
* we have to check to avoid race-condition problems during DB state
|
|
* changes.
|
|
*/
|
|
if (canAcceptConnections(false) == CAC_OK) {
|
|
int slot = AssignPostmasterChildSlot();
|
|
|
|
if (slot == -1) {
|
|
return;
|
|
}
|
|
|
|
bn = AssignFreeBackEnd(slot);
|
|
|
|
if (bn != NULL) {
|
|
GenerateCancelKey(false);
|
|
bn->cancel_key = t_thrd.proc_cxt.MyCancelKey;
|
|
|
|
/* Autovac workers are not dead_end and need a child slot */
|
|
bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot;
|
|
bn->pid = initialize_util_thread(TRACK_STMT_CLEANER, bn);
|
|
t_thrd.proc_cxt.MyPMChildSlot = 0;
|
|
if (bn->pid > 0) {
|
|
bn->is_autovacuum = false;
|
|
DLInitElem(&bn->elem, bn);
|
|
DLAddHead(g_instance.backend_list, &bn->elem);
|
|
/* all OK */
|
|
return;
|
|
}
|
|
|
|
(void)ReleasePostmasterChildSlot(bn->child_slot);
|
|
bn->pid = 0;
|
|
} else {
|
|
ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Create the opts file
|
|
*/
|
|
static bool CreateOptsFile(int argc, const char* argv[], const char* fullprogname)
|
|
{
|
|
FILE* fp = NULL;
|
|
int i;
|
|
|
|
#define OPTS_FILE "postmaster.opts"
|
|
|
|
if ((fp = fopen(OPTS_FILE, "w")) == NULL) {
|
|
ereport(LOG, (errmsg("could not create file \"%s\": %m", OPTS_FILE)));
|
|
return false;
|
|
}
|
|
|
|
fprintf(fp, "%s", fullprogname);
|
|
|
|
for (i = 1; i < argc; i++)
|
|
fprintf(fp, " \"%s\"", argv[i]);
|
|
|
|
fputs("\n", fp);
|
|
|
|
if (fclose(fp)) {
|
|
ereport(LOG, (errmsg("could not close file \"%s\": %m", OPTS_FILE)));
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Update the opts file */
|
|
static void UpdateOptsFile(void)
|
|
{
|
|
FILE* fp = NULL;
|
|
char* dest = NULL;
|
|
char buffer[MAXPGPATH] = {0};
|
|
const char* modeopt = " \"-M\" \"";
|
|
const char* modestr = NULL;
|
|
|
|
#define OPTS_FILE "postmaster.opts"
|
|
#define OPTS_TEMP_FILE "postmaster.opts.tmp"
|
|
|
|
if (NULL == t_thrd.postmaster_cxt.HaShmData)
|
|
return;
|
|
|
|
switch (t_thrd.postmaster_cxt.HaShmData->current_mode) {
|
|
case PRIMARY_MODE:
|
|
modestr = "primary";
|
|
break;
|
|
case STANDBY_MODE:
|
|
modestr = "standby";
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
|
|
if ((fp = fopen(OPTS_FILE, "r")) == NULL) {
|
|
ereport(LOG, (errmsg("could not open file \"%s\": %m", OPTS_FILE)));
|
|
return;
|
|
}
|
|
|
|
(void)fgets(buffer, MAXPGPATH, fp);
|
|
if (fclose(fp)) {
|
|
ereport(LOG, (errmsg("could not close file \"%s\": %m", OPTS_FILE)));
|
|
return;
|
|
}
|
|
|
|
if ((fp = fopen(OPTS_TEMP_FILE, "w")) == NULL) {
|
|
ereport(LOG, (errmsg("could not create file \"%s\": %m", OPTS_TEMP_FILE)));
|
|
return;
|
|
}
|
|
|
|
if ((dest = strchr(buffer, '\n')) != NULL)
|
|
*dest = 0;
|
|
|
|
dest = strstr(buffer, modeopt);
|
|
if (dest != NULL) {
|
|
dest += strlen(modeopt);
|
|
*dest = 0;
|
|
(void)fprintf(fp, "%s%s\"", buffer, modestr);
|
|
|
|
dest++;
|
|
dest = strstr(dest, "\" \"");
|
|
if (dest != NULL)
|
|
(void)fprintf(fp, "%s\n", ++dest);
|
|
else
|
|
(void)fputs("\n", fp);
|
|
} else {
|
|
(void)fprintf(fp, "%s%s%s\"\n", buffer, modeopt, modestr);
|
|
}
|
|
|
|
if (fclose(fp)) {
|
|
ereport(LOG, (errmsg("could not close file \"%s\": %m", OPTS_TEMP_FILE)));
|
|
return;
|
|
}
|
|
|
|
(void)unlink(OPTS_FILE);
|
|
if (rename(OPTS_TEMP_FILE, OPTS_FILE) != 0)
|
|
ereport(LOG,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not rename file \"%s\" to \"%s\": %m", OPTS_TEMP_FILE, OPTS_FILE)));
|
|
}
|
|
|
|
/*
|
|
* StartCatchupWorker
|
|
* Start catch-up process.
|
|
*
|
|
* This function is here because it enters the resulting PID into the
|
|
* postmaster's private backends list.
|
|
*
|
|
* NB -- this code very roughly matches BackendStartup.
|
|
*/
|
|
static ThreadId StartCatchupWorker(void)
|
|
{
|
|
Backend* bn = NULL;
|
|
|
|
/*
|
|
* If not in condition to run a process, don't try, but handle it like a
|
|
* fork failure. This does not normally happen, since the signal is only
|
|
* supposed to be sent by autovacuum launcher when it's OK to do it, but
|
|
* we have to check to avoid race-condition problems during DB state
|
|
* changes.
|
|
*/
|
|
if (canAcceptConnections(false) == CAC_OK) {
|
|
|
|
int slot = AssignPostmasterChildSlot();
|
|
if (slot == -1) {
|
|
return 0;
|
|
}
|
|
bn = AssignFreeBackEnd(slot);
|
|
if (bn != NULL) {
|
|
/*
|
|
* Compute the cancel key that will be assigned to this session.
|
|
* We probably don't need cancel keys for autovac workers, but
|
|
* we'd better have something random in the field to prevent
|
|
* unfriendly people from sending cancels to them.
|
|
*/
|
|
GenerateCancelKey(false);
|
|
bn->cancel_key = t_thrd.proc_cxt.MyCancelKey;
|
|
|
|
/* Data catch-up are not dead_end and need a child slot */
|
|
bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot;
|
|
bn->pid = initialize_util_thread(CATCHUP);
|
|
t_thrd.proc_cxt.MyPMChildSlot = 0;
|
|
if (bn->pid > 0) {
|
|
bn->is_autovacuum = false;
|
|
DLInitElem(&bn->elem, bn);
|
|
DLAddHead(g_instance.backend_list, &bn->elem);
|
|
/* all OK */
|
|
return bn->pid;
|
|
}
|
|
|
|
/*
|
|
* fork failed, fall through to report -- actual error message was
|
|
* logged by StartDataCatchup
|
|
*/
|
|
(void)ReleasePostmasterChildSlot(bn->child_slot);
|
|
bn->pid = 0;
|
|
bn = NULL;
|
|
} else
|
|
ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* MaxLivePostmasterChildren
|
|
*
|
|
* This reports the number of entries needed in per-child-process arrays
|
|
* (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
|
|
* These arrays include regular backends, autovac workers and walsenders,
|
|
* but not special children nor dead_end children. This allows the arrays
|
|
* to have a fixed maximum size, to wit the same too-many-children limit
|
|
* enforced by canAcceptConnections(). The exact value isn't too critical
|
|
* as long as it's more than g_instance.shmem_cxt.MaxBackends.
|
|
*/
|
|
int MaxLivePostmasterChildren(void)
|
|
{
|
|
return 6 * g_instance.shmem_cxt.MaxBackends;
|
|
}
|
|
|
|
#ifdef EXEC_BACKEND
|
|
#ifndef WIN32
|
|
#define write_inheritable_socket(dest, src) ((*(dest) = (src)))
|
|
#define read_inheritable_socket(dest, src) (*(dest) = *(src))
|
|
#else
|
|
static bool write_duplicated_handle(HANDLE* dest, HANDLE src, HANDLE child);
|
|
static bool write_inheritable_socket(InheritableSocket* dest, SOCKET src, pid_t childPid);
|
|
static void read_inheritable_socket(SOCKET* dest, InheritableSocket* src);
|
|
#endif
|
|
|
|
/* Save critical backend variables into the BackendParameters struct */
|
|
#ifndef WIN32
|
|
static bool save_backend_variables(BackendParameters* param, Port* port)
|
|
#else
|
|
static bool save_backend_variables(BackendParameters* param, Port* port, HANDLE childProcess, pid_t childPid)
|
|
#endif
|
|
{
|
|
int ss_rc = memcpy_s(¶m->port, sizeof(param->port), port, sizeof(Port));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
write_inheritable_socket(¶m->portsocket, port->sock);
|
|
|
|
strlcpy(param->DataDir, t_thrd.proc_cxt.DataDir, MAXPGPATH);
|
|
|
|
ss_rc = memcpy_s(¶m->ListenSocket,
|
|
sizeof(param->ListenSocket),
|
|
&t_thrd.postmaster_cxt.ListenSocket,
|
|
sizeof(t_thrd.postmaster_cxt.ListenSocket));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
|
|
param->MyCancelKey = t_thrd.proc_cxt.MyCancelKey;
|
|
param->MyPMChildSlot = t_thrd.proc_cxt.MyPMChildSlot;
|
|
|
|
param->UsedShmemSegID = UsedShmemSegID;
|
|
param->UsedShmemSegAddr = UsedShmemSegAddr;
|
|
|
|
param->ShmemLock = t_thrd.shemem_ptr_cxt.ShmemLock;
|
|
param->ShmemVariableCache = t_thrd.xact_cxt.ShmemVariableCache;
|
|
|
|
param->mainLWLockArray = (LWLock*)t_thrd.shemem_ptr_cxt.mainLWLockArray;
|
|
param->PMSignalState = t_thrd.shemem_ptr_cxt.PMSignalState;
|
|
|
|
param->LocalIpNum = t_thrd.postmaster_cxt.LocalIpNum;
|
|
int rc = memcpy_s(param->LocalAddrList, (MAXLISTEN * IP_LEN), t_thrd.postmaster_cxt.LocalAddrList, (MAXLISTEN * IP_LEN));
|
|
securec_check(rc, "", "");
|
|
param->HaShmData = t_thrd.postmaster_cxt.HaShmData;
|
|
|
|
param->PgStartTime = t_thrd.time_cxt.pg_start_time;
|
|
param->PgReloadTime = t_thrd.time_cxt.pg_reload_time;
|
|
param->first_syslogger_file_time = t_thrd.logger.first_syslogger_file_time;
|
|
|
|
param->redirection_done = t_thrd.postmaster_cxt.redirection_done;
|
|
param->IsBinaryUpgrade = u_sess->proc_cxt.IsBinaryUpgrade;
|
|
param->max_safe_fds = t_thrd.storage_cxt.max_safe_fds;
|
|
param->max_files_per_process = g_instance.attr.attr_common.max_files_per_process;
|
|
param->max_userdatafiles = t_thrd.storage_cxt.max_userdatafiles;
|
|
|
|
#ifdef WIN32
|
|
param->PostmasterHandle = PostmasterHandle;
|
|
|
|
if (!write_duplicated_handle(¶m->initial_signal_pipe, pgwin32_create_signal_listener(childPid), childProcess))
|
|
return false;
|
|
|
|
#else
|
|
ss_rc = memcpy_s(¶m->postmaster_alive_fds,
|
|
sizeof(t_thrd.postmaster_cxt.postmaster_alive_fds),
|
|
&t_thrd.postmaster_cxt.postmaster_alive_fds,
|
|
sizeof(t_thrd.postmaster_cxt.postmaster_alive_fds));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
#endif
|
|
|
|
ss_rc = memcpy_s(¶m->syslogPipe,
|
|
sizeof(param->syslogPipe),
|
|
&t_thrd.postmaster_cxt.syslogPipe,
|
|
sizeof(t_thrd.postmaster_cxt.syslogPipe));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
|
|
|
|
strlcpy(param->pkglib_path, t_thrd.proc_cxt.pkglib_path, MAXPGPATH);
|
|
|
|
param->myTempNamespace = u_sess->catalog_cxt.myTempNamespace;
|
|
param->myTempToastNamespace = u_sess->catalog_cxt.myTempToastNamespace;
|
|
if (module_logging_is_on(MOD_COMM_IPC) && (t_thrd.proc && t_thrd.proc->workingVersionNum >= 92060)) {
|
|
param->comm_ipc_log = true;
|
|
} else {
|
|
param->comm_ipc_log = false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
#ifdef WIN32
|
|
/*
|
|
* Duplicate a handle for usage in a child process, and write the child
|
|
* process instance of the handle to the parameter file.
|
|
*/
|
|
static bool write_duplicated_handle(HANDLE* dest, HANDLE src, HANDLE childProcess)
|
|
{
|
|
HANDLE hChild = INVALID_HANDLE_VALUE;
|
|
|
|
if (!DuplicateHandle(
|
|
GetCurrentProcess(), src, childProcess, &hChild, 0, TRUE, DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS)) {
|
|
ereport(LOG,
|
|
(errmsg_internal(
|
|
"could not duplicate handle to be written to backend parameter file: error code %lu", GetLastError())));
|
|
return false;
|
|
}
|
|
|
|
*dest = hChild;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Duplicate a socket for usage in a child process, and write the resulting
|
|
* structure to the parameter file.
|
|
* This is required because a number of LSPs (Layered Service Providers) very
|
|
* common on Windows (antivirus, firewalls, download managers etc) break
|
|
* straight socket inheritance.
|
|
*/
|
|
static bool write_inheritable_socket(InheritableSocket* dest, SOCKET src, pid_t childpid)
|
|
{
|
|
dest->origsocket = src;
|
|
|
|
if (src != 0 && src != PGINVALID_SOCKET) {
|
|
/* Actual socket */
|
|
if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0) {
|
|
ereport(LOG,
|
|
(errmsg(
|
|
"could not duplicate socket %d for use in backend: error code %d", (int)src, WSAGetLastError())));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Read a duplicate socket structure back, and get the socket descriptor.
|
|
*/
|
|
static void read_inheritable_socket(SOCKET* dest, InheritableSocket* src)
|
|
{
|
|
SOCKET s;
|
|
|
|
if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0) {
|
|
/* Not a real socket! */
|
|
*dest = src->origsocket;
|
|
} else {
|
|
/* Actual socket, so create from structure */
|
|
s = WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, &src->wsainfo, 0, 0);
|
|
|
|
if (s == INVALID_SOCKET) {
|
|
write_stderr("could not create inherited socket: error code %d\n", WSAGetLastError());
|
|
exit(1);
|
|
}
|
|
|
|
*dest = s;
|
|
|
|
/*
|
|
* To make sure we don't get two references to the same socket, close
|
|
* the original one. (This would happen when inheritance actually
|
|
* works..
|
|
*/
|
|
closesocket(src->origsocket);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static void read_backend_variables(char* arg, Port* port)
|
|
{
|
|
BackendParameters* pParam = (BackendParameters*)arg;
|
|
|
|
restore_backend_variables(pParam, port);
|
|
}
|
|
|
|
/* Restore critical backend variables from the BackendParameters struct */
|
|
static void restore_backend_variables(BackendParameters* param, Port* port)
|
|
{
|
|
int rc = memcpy_s(port, sizeof(Port), ¶m->port, sizeof(Port));
|
|
securec_check(rc, "", "");
|
|
|
|
read_inheritable_socket(&port->sock, ¶m->portsocket);
|
|
|
|
SetDataDir(param->DataDir);
|
|
|
|
int ss_rc = memcpy_s(&t_thrd.postmaster_cxt.ListenSocket,
|
|
sizeof(t_thrd.postmaster_cxt.ListenSocket),
|
|
¶m->ListenSocket,
|
|
sizeof(t_thrd.postmaster_cxt.ListenSocket));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
|
|
t_thrd.proc_cxt.MyCancelKey = param->MyCancelKey;
|
|
t_thrd.proc_cxt.MyPMChildSlot = param->MyPMChildSlot;
|
|
|
|
UsedShmemSegID = param->UsedShmemSegID;
|
|
UsedShmemSegAddr = param->UsedShmemSegAddr;
|
|
|
|
t_thrd.shemem_ptr_cxt.ShmemLock = param->ShmemLock;
|
|
t_thrd.xact_cxt.ShmemVariableCache = param->ShmemVariableCache;
|
|
|
|
t_thrd.shemem_ptr_cxt.mainLWLockArray = (LWLockPadded*)param->mainLWLockArray;
|
|
t_thrd.shemem_ptr_cxt.PMSignalState = param->PMSignalState;
|
|
|
|
t_thrd.postmaster_cxt.LocalIpNum = param->LocalIpNum;
|
|
rc = memcpy_s(t_thrd.postmaster_cxt.LocalAddrList, (MAXLISTEN * IP_LEN), param->LocalAddrList, (MAXLISTEN * IP_LEN));
|
|
securec_check(rc, "", "");
|
|
t_thrd.postmaster_cxt.HaShmData = param->HaShmData;
|
|
t_thrd.time_cxt.pg_start_time = param->PgStartTime;
|
|
t_thrd.time_cxt.pg_reload_time = param->PgReloadTime;
|
|
t_thrd.logger.first_syslogger_file_time = param->first_syslogger_file_time;
|
|
|
|
t_thrd.postmaster_cxt.redirection_done = param->redirection_done;
|
|
u_sess->proc_cxt.IsBinaryUpgrade = param->IsBinaryUpgrade;
|
|
t_thrd.storage_cxt.max_safe_fds = param->max_safe_fds;
|
|
g_instance.attr.attr_common.max_files_per_process = param->max_files_per_process;
|
|
t_thrd.storage_cxt.max_userdatafiles = param->max_userdatafiles;
|
|
|
|
#ifdef WIN32
|
|
PostmasterHandle = param->PostmasterHandle;
|
|
pgwin32_initial_signal_pipe = param->initial_signal_pipe;
|
|
#endif
|
|
ss_rc = memcpy_s(&t_thrd.postmaster_cxt.syslogPipe,
|
|
sizeof(t_thrd.postmaster_cxt.syslogPipe),
|
|
¶m->syslogPipe,
|
|
sizeof(t_thrd.postmaster_cxt.syslogPipe));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
|
|
|
|
strlcpy(t_thrd.proc_cxt.pkglib_path, param->pkglib_path, MAXPGPATH);
|
|
|
|
if (StreamThreadAmI()) {
|
|
u_sess->catalog_cxt.myTempNamespace = param->myTempNamespace;
|
|
u_sess->catalog_cxt.myTempToastNamespace = param->myTempToastNamespace;
|
|
}
|
|
|
|
if (param->comm_ipc_log == true) {
|
|
module_logging_enable_comm(MOD_COMM_IPC);
|
|
}
|
|
}
|
|
|
|
#endif /* EXEC_BACKEND */
|
|
|
|
static void BackendArrayAllocation(void)
|
|
{
|
|
// should be the same size as PostmasterChildSlot.
|
|
Size size = mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
|
|
g_instance.backend_array = (Backend*)palloc0(size);
|
|
}
|
|
|
|
Backend* GetBackend(int slot)
|
|
{
|
|
if (slot > 0 && slot <= MaxLivePostmasterChildren()) {
|
|
return &g_instance.backend_array[slot - 1];
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
Backend* AssignFreeBackEnd(int slot)
|
|
{
|
|
Assert(g_instance.backend_array[slot - 1].pid == 0);
|
|
Backend* bn = &g_instance.backend_array[slot - 1];
|
|
|
|
bn->flag = 0;
|
|
bn->pid = 0;
|
|
bn->cancel_key = 0;
|
|
bn->dead_end = false;
|
|
return bn;
|
|
}
|
|
|
|
static void BackendArrayRemove(Backend* bn)
|
|
{
|
|
int i = bn->child_slot - 1;
|
|
|
|
Assert(g_instance.backend_array[i].pid == bn->pid);
|
|
/* Mark the slot as empty */
|
|
g_instance.backend_array[i].pid = 0;
|
|
g_instance.backend_array[i].flag = 0;
|
|
g_instance.backend_array[i].cancel_key = 0;
|
|
g_instance.backend_array[i].dead_end = false;
|
|
}
|
|
|
|
#ifdef WIN32
|
|
static ThreadId win32_waitpid(int* exitstatus)
|
|
{
|
|
DWORD dwd;
|
|
ULONG_PTR key;
|
|
OVERLAPPED* ovl = NULL;
|
|
|
|
/*
|
|
* Check if there are any dead children. If there are, return the pid of
|
|
* the first one that died.
|
|
*/
|
|
if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0)) {
|
|
*exitstatus = (int)key;
|
|
return dwd;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
#endif /* WIN32 */
|
|
|
|
/*
|
|
* Initialize one and only handle for monitoring postmaster death.
|
|
*
|
|
* Called once in the postmaster, so that child processes can subsequently
|
|
* monitor if their parent is dead.
|
|
*/
|
|
static void InitPostmasterDeathWatchHandle(void)
|
|
{
|
|
#ifndef WIN32
|
|
|
|
/*
|
|
* Create a pipe. Postmaster holds the write end of the pipe open
|
|
* (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
|
|
* the read file descriptor to select() to wake up in case postmaster
|
|
* dies, or check for postmaster death with a (read() == 0). Children must
|
|
* close the write end as soon as possible after forking, because EOF
|
|
* won't be signaled in the read end until all processes have closed the
|
|
* write fd. That is taken care of in ClosePostmasterPorts().
|
|
*/
|
|
Assert(t_thrd.proc_cxt.MyProcPid == PostmasterPid);
|
|
|
|
if (pipe(t_thrd.postmaster_cxt.postmaster_alive_fds))
|
|
ereport(FATAL,
|
|
(errcode_for_file_access(), errmsg_internal("could not create pipe to monitor postmaster death: %m")));
|
|
|
|
/*
|
|
* Set O_NONBLOCK to allow testing for the fd's presence with a read()
|
|
* call.
|
|
*/
|
|
if (fcntl(t_thrd.postmaster_cxt.postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK))
|
|
ereport(FATAL,
|
|
(errcode_for_socket_access(),
|
|
errmsg_internal("could not set postmaster death monitoring pipe to non-blocking mode: %m")));
|
|
|
|
#else
|
|
|
|
/*
|
|
* On Windows, we use a process handle for the same purpose.
|
|
*/
|
|
if (DuplicateHandle(GetCurrentProcess(),
|
|
GetCurrentProcess(),
|
|
GetCurrentProcess(),
|
|
&PostmasterHandle,
|
|
0,
|
|
TRUE,
|
|
DUPLICATE_SAME_ACCESS) == 0)
|
|
ereport(FATAL, (errmsg_internal("could not duplicate postmaster handle: error code %lu", GetLastError())));
|
|
|
|
#endif /* WIN32 */
|
|
}
|
|
|
|
Size CBMShmemSize(void)
|
|
{
|
|
Size size = 0;
|
|
|
|
size = add_size(size, sizeof(XlogBitmap));
|
|
return size;
|
|
}
|
|
|
|
void CBMShmemInit(void)
|
|
{
|
|
bool found = false;
|
|
|
|
t_thrd.cbm_cxt.XlogCbmSys = (XlogBitmap*)ShmemInitStruct("CBM Shmem Data", CBMShmemSize(), &found);
|
|
|
|
if (!found)
|
|
InitXlogCbmSys();
|
|
}
|
|
|
|
Size HaShmemSize(void)
|
|
{
|
|
Size size = 0;
|
|
|
|
size = add_size(size, sizeof(HaShmemData));
|
|
return size;
|
|
}
|
|
|
|
void HaShmemInit(void)
|
|
{
|
|
bool found = false;
|
|
|
|
t_thrd.postmaster_cxt.HaShmData = (HaShmemData*)ShmemInitStruct("HA Shmem Data ", HaShmemSize(), &found);
|
|
|
|
if (!found) {
|
|
int i = 0;
|
|
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode = NORMAL_MODE;
|
|
for (i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
t_thrd.postmaster_cxt.HaShmData->disconnect_count[i] = 0;
|
|
t_thrd.postmaster_cxt.HaShmData->repl_reason[i] = NONE_REBUILD;
|
|
}
|
|
SpinLockInit(&t_thrd.postmaster_cxt.HaShmData->mutex);
|
|
}
|
|
if (!IsUnderPostmaster) {
|
|
gs_set_hs_shm_data(t_thrd.postmaster_cxt.HaShmData);
|
|
}
|
|
}
|
|
|
|
/* Check whether the connect info of the port is equal to the replconninfo */
|
|
static bool IsChannelAdapt(Port* port, ReplConnInfo* repl)
|
|
{
|
|
struct sockaddr* laddr = (struct sockaddr*)&(port->laddr.addr);
|
|
struct sockaddr* raddr = (struct sockaddr*)&(port->raddr.addr);
|
|
char local_ip[IP_LEN] = {0};
|
|
char remote_ip[IP_LEN] = {0};
|
|
char* result = NULL;
|
|
|
|
Assert(repl != NULL);
|
|
|
|
if (AF_INET6 == laddr->sa_family) {
|
|
result = inet_net_ntop(AF_INET6, &((struct sockaddr_in*)laddr)->sin_addr, 128, local_ip, IP_LEN);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
} else if (AF_INET == laddr->sa_family) {
|
|
result = inet_net_ntop(AF_INET, &((struct sockaddr_in*)laddr)->sin_addr, 32, local_ip, IP_LEN);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
}
|
|
|
|
if (AF_INET6 == raddr->sa_family) {
|
|
result = inet_net_ntop(AF_INET6, &((struct sockaddr_in*)raddr)->sin_addr, 128, remote_ip, IP_LEN);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
} else if (AF_INET == raddr->sa_family) {
|
|
result = inet_net_ntop(AF_INET, &((struct sockaddr_in*)raddr)->sin_addr, 32, remote_ip, IP_LEN);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
}
|
|
|
|
if (0 == strcmp(local_ip, repl->localhost) && 0 == strcmp(remote_ip, repl->remotehost)) {
|
|
return true;
|
|
} else {
|
|
ereport(DEBUG1, (errmsg("connect local ip %s, connect remote ip %s", local_ip, remote_ip)));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check whether the login connection is from local machine.
|
|
*/
|
|
bool IsFromLocalAddr(Port* port)
|
|
{
|
|
struct sockaddr* local_addr = (struct sockaddr*)&(port->laddr.addr);
|
|
struct sockaddr* remote_addr = (struct sockaddr*)&(port->raddr.addr);
|
|
char local_ip[IP_LEN] = {0};
|
|
char remote_ip[IP_LEN] = {0};
|
|
char* result = NULL;
|
|
|
|
/* parse the local ip address */
|
|
if (AF_INET6 == local_addr->sa_family) {
|
|
result = inet_net_ntop(AF_INET6, &((struct sockaddr_in*)local_addr)->sin_addr, 128, local_ip, IP_LEN);
|
|
} else if (AF_INET == local_addr->sa_family) {
|
|
result = inet_net_ntop(AF_INET, &((struct sockaddr_in*)local_addr)->sin_addr, 32, local_ip, IP_LEN);
|
|
}
|
|
|
|
if (result == NULL) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop local failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
|
|
/* parse the remote ip address */
|
|
if (AF_INET6 == remote_addr->sa_family) {
|
|
result = inet_net_ntop(AF_INET6, &((struct sockaddr_in*)remote_addr)->sin_addr, 128, remote_ip, IP_LEN);
|
|
} else if (AF_INET == remote_addr->sa_family) {
|
|
result = inet_net_ntop(AF_INET, &((struct sockaddr_in*)remote_addr)->sin_addr, 32, remote_ip, IP_LEN);
|
|
}
|
|
|
|
if (result == NULL) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop remote failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
|
|
/* only the remote ip equal to local ip, return true */
|
|
if (strlen(local_ip) != 0 && strlen(remote_ip) != 0 && strcmp(local_ip, remote_ip) == 0)
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* check whether the localaddr of the ip is in the LocalAddrList
|
|
*/
|
|
bool IsLocalAddr(Port* port)
|
|
{
|
|
int i;
|
|
#ifndef WIN32
|
|
struct sockaddr* laddr = (struct sockaddr*)&(port->laddr.addr);
|
|
char local_ip[IP_LEN] = {0};
|
|
char* result = NULL;
|
|
|
|
if (AF_INET6 == laddr->sa_family) {
|
|
result = inet_net_ntop(AF_INET6, &((struct sockaddr_in*)laddr)->sin_addr, 128, local_ip, IP_LEN);
|
|
} else if (AF_INET == laddr->sa_family) {
|
|
result = inet_net_ntop(AF_INET, &((struct sockaddr_in*)laddr)->sin_addr, 32, local_ip, IP_LEN);
|
|
}
|
|
|
|
if (AF_UNIX == laddr->sa_family) {
|
|
return true;
|
|
}
|
|
for (i = 0; i != t_thrd.postmaster_cxt.LocalIpNum; ++i) {
|
|
if (0 == strcmp(local_ip, t_thrd.postmaster_cxt.LocalAddrList[i]) ||
|
|
(AF_INET == laddr->sa_family && 0 == strcmp("0.0.0.0", t_thrd.postmaster_cxt.LocalAddrList[i])) ||
|
|
(AF_INET6 == laddr->sa_family && 0 == strcmp("::", t_thrd.postmaster_cxt.LocalAddrList[i]))) {
|
|
|
|
return true;
|
|
}
|
|
}
|
|
if (NULL == result && laddr->sa_family != AF_UNSPEC) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
return false;
|
|
#else
|
|
for (i = 0; i != t_thrd.postmaster_cxt.LocalIpNum; ++i) {
|
|
ereport(DEBUG1, (errmsg("LocalAddrIP %s\n", t_thrd.postmaster_cxt.LocalAddrList[i])));
|
|
}
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* check whether the input address string is equivalent to local host
|
|
*/
|
|
static bool IsLocalIp(const char* address)
|
|
{
|
|
return (
|
|
strcmp(address, LOCAL_HOST) == 0 || strcmp(address, LOOP_IP_STRING) == 0 || strcmp(address, LOOP_IPV6_IP) == 0);
|
|
}
|
|
|
|
/*
|
|
* check whether the input address string is inplicit, including local host and *
|
|
*/
|
|
static bool IsInplicitIp(const char* address)
|
|
{
|
|
return (IsLocalIp(address) || strcmp(address, "*") == 0);
|
|
}
|
|
|
|
/*
|
|
* if certain replconn channel is equivalent to pooler port,
|
|
* we use that channel for internal tool connections for dn.
|
|
* return value: equivalent replconn array index, i.e. no need
|
|
* for pooler port, or -1 if none equivalent, i.e. pooler port needed.
|
|
* At present, this is determined only at postmaster startup.
|
|
* If we are to support dynamic modification of replconninfo,
|
|
* we would also need to do the judge dynamically.
|
|
*/
|
|
static int NeedPoolerPort(const char* hostName)
|
|
{
|
|
int index = -1;
|
|
|
|
for (int i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
if (t_thrd.postmaster_cxt.ReplConnArray[i] != NULL &&
|
|
t_thrd.postmaster_cxt.ReplConnArray[i]->localport == g_instance.attr.attr_network.PoolerPort &&
|
|
(strcmp(t_thrd.postmaster_cxt.ReplConnArray[i]->localhost, "*") == 0 ||
|
|
strcmp(t_thrd.postmaster_cxt.ReplConnArray[i]->localhost, hostName) == 0)) {
|
|
index = i;
|
|
break;
|
|
}
|
|
}
|
|
return index;
|
|
}
|
|
|
|
/*
|
|
* check whether the port of the socket address is the PoolerPort
|
|
*/
|
|
bool IsHASocketAddr(struct sockaddr* sock_addr)
|
|
{
|
|
int portNumber = 0;
|
|
struct sockaddr_in* ipv4addr = NULL;
|
|
struct sockaddr_in6* ipv6addr = NULL;
|
|
#ifdef HAVE_UNIX_SOCKETS
|
|
struct sockaddr_un* unixaddr = NULL;
|
|
char unixStr[MAX_UNIX_PATH_LEN] = {'\0'};
|
|
#endif
|
|
bool result = false;
|
|
|
|
if (sock_addr == NULL) {
|
|
return false;
|
|
}
|
|
|
|
switch (sock_addr->sa_family) {
|
|
case AF_INET:
|
|
ipv4addr = (struct sockaddr_in*)sock_addr;
|
|
portNumber = ntohs(ipv4addr->sin_port);
|
|
result = (portNumber == g_instance.attr.attr_network.PoolerPort);
|
|
break;
|
|
|
|
case AF_INET6:
|
|
ipv6addr = (struct sockaddr_in6*)sock_addr;
|
|
portNumber = ntohs(ipv6addr->sin6_port);
|
|
result = (portNumber == g_instance.attr.attr_network.PoolerPort);
|
|
break;
|
|
|
|
#ifdef HAVE_UNIX_SOCKETS
|
|
case AF_UNIX:
|
|
unixaddr = (struct sockaddr_un*)sock_addr;
|
|
UNIXSOCK_PATH(unixStr, g_instance.attr.attr_network.PoolerPort, g_instance.attr.attr_network.UnixSocketDir);
|
|
|
|
if (0 == (strncmp(unixaddr->sun_path, unixStr, MAX_UNIX_PATH_LEN))) {
|
|
result = true;
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* check whether the port of the session Port structure is the PoolerPort
|
|
*/
|
|
bool IsHAPort(Port* port)
|
|
{
|
|
if (port == NULL) {
|
|
return false;
|
|
}
|
|
|
|
struct sockaddr* laddr = (struct sockaddr*)&(port->laddr.addr);
|
|
return IsHASocketAddr(laddr);
|
|
}
|
|
|
|
/*
|
|
* check whether the localaddr of the port is the PostPortNumber
|
|
*/
|
|
static bool IsLocalPort(Port* port)
|
|
{
|
|
struct sockaddr* laddr = (struct sockaddr*)&(port->laddr.addr);
|
|
int sockport = 0;
|
|
|
|
if (AF_UNIX == laddr->sa_family) {
|
|
return true;
|
|
} else {
|
|
sockport = ntohs(((struct sockaddr_in*)laddr)->sin_port);
|
|
}
|
|
|
|
if (sockport == g_instance.attr.attr_network.PostPortNumber) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void SetHaShmemData()
|
|
{
|
|
volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
int i = 0, repl_list_num = 0;
|
|
|
|
switch (t_thrd.xlog_cxt.server_mode) {
|
|
case STANDBY_MODE: {
|
|
hashmdata->current_mode = STANDBY_MODE;
|
|
hashmdata->is_cascade_standby = t_thrd.xlog_cxt.is_cascade_standby;
|
|
break;
|
|
}
|
|
case PRIMARY_MODE: {
|
|
hashmdata->current_mode = PRIMARY_MODE;
|
|
break;
|
|
}
|
|
case PENDING_MODE: {
|
|
hashmdata->current_mode = PENDING_MODE;
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
for (i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
if (t_thrd.postmaster_cxt.ReplConnArray[i] != NULL)
|
|
repl_list_num++;
|
|
}
|
|
|
|
hashmdata->repl_list_num = repl_list_num;
|
|
}
|
|
/*
|
|
* check whether the ip and port is already listened
|
|
*/
|
|
static bool IsAlreadyListen(const char* ip, int port)
|
|
{
|
|
int listen_index = 0;
|
|
char sock_ip[IP_LEN] = {0};
|
|
errno_t rc = 0;
|
|
|
|
if (ip == NULL || port <= 0) {
|
|
return false;
|
|
}
|
|
|
|
for (listen_index = 0; listen_index != MAXLISTEN; ++listen_index) {
|
|
if (t_thrd.postmaster_cxt.ListenSocket[listen_index] != PGINVALID_SOCKET) {
|
|
struct sockaddr_in saddr;
|
|
socklen_t slen;
|
|
char* result = NULL;
|
|
rc = memset_s(&saddr, sizeof(saddr), 0, sizeof(saddr));
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
slen = sizeof(saddr);
|
|
if (getsockname(t_thrd.postmaster_cxt.ListenSocket[listen_index],
|
|
(struct sockaddr*)&saddr,
|
|
(socklen_t*)&slen) < 0) {
|
|
ereport(LOG, (errmsg("Error in getsockname int IsAlreadyListen()")));
|
|
continue;
|
|
}
|
|
|
|
if (AF_INET6 == saddr.sin_family) {
|
|
result = inet_net_ntop(AF_INET6, &saddr.sin_addr, 128, sock_ip, IP_LEN);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
} else if (AF_INET == saddr.sin_family) {
|
|
result = inet_net_ntop(AF_INET, &saddr.sin_addr, 32, sock_ip, IP_LEN);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
|
|
} else if (AF_UNIX == saddr.sin_family) {
|
|
continue;
|
|
}
|
|
if ((0 == strcmp(ip, sock_ip)) && (port == ntohs(saddr.sin_port))) {
|
|
return true;
|
|
}
|
|
|
|
// check if all IP addresss of local host has been listened already, which using ”*“ for listen address
|
|
if((AF_INET6 == saddr.sin_family ) && ((0 == strcmp("::", sock_ip)) && (port == ntohs(saddr.sin_port)))) {
|
|
return true;
|
|
} else if ((AF_INET == saddr.sin_family ) && ((0 == strcmp("0.0.0.0", sock_ip)) && (port == ntohs(saddr.sin_port)))) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* whether the sockect is corresponding with IP-Port pair
|
|
*/
|
|
bool CheckSockAddr(struct sockaddr* sock_addr, const char* szIP, int port)
|
|
{
|
|
struct sockaddr_in* ipv4addr = NULL;
|
|
struct sockaddr_in6* ipv6addr = NULL;
|
|
#ifdef HAVE_UNIX_SOCKETS
|
|
struct sockaddr_un* unixaddr = NULL;
|
|
char unixStr[MAX_UNIX_PATH_LEN] = {'\0'};
|
|
#endif
|
|
char IPstr[MAX_IP_STR_LEN] = {'\0'};
|
|
int portNumber = 0;
|
|
bool cmpResult = false;
|
|
char* result = NULL;
|
|
|
|
if ((NULL == sock_addr) || (NULL == szIP) || (0 == port)) {
|
|
ereport(LOG, (errmsg("invalid socket information or IP string or port")));
|
|
return false;
|
|
}
|
|
|
|
switch (sock_addr->sa_family) {
|
|
/* if AF_UNIX, just compare the the name of the socket file */
|
|
#ifdef HAVE_UNIX_SOCKETS
|
|
case AF_UNIX:
|
|
unixaddr = (struct sockaddr_un*)sock_addr;
|
|
UNIXSOCK_PATH(unixStr, port, g_instance.attr.attr_network.UnixSocketDir);
|
|
|
|
if (0 == (strncmp(unixaddr->sun_path, unixStr, MAX_UNIX_PATH_LEN))) {
|
|
cmpResult = true;
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
case AF_INET:
|
|
ipv4addr = (struct sockaddr_in*)sock_addr;
|
|
result = inet_net_ntop(AF_INET, &(ipv4addr->sin_addr), 32, IPstr, MAX_IP_STR_LEN - 1);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
portNumber = ntohs(ipv4addr->sin_port);
|
|
if ((((0 == strncmp(szIP, LOCAL_HOST, MAX_IP_STR_LEN)) &&
|
|
(0 == strncmp(IPstr, LOOP_IP_STRING, MAX_IP_STR_LEN))) ||
|
|
(0 == strncmp(IPstr, szIP, MAX_IP_STR_LEN))) &&
|
|
(port == portNumber)) {
|
|
cmpResult = true;
|
|
}
|
|
break;
|
|
|
|
case AF_INET6:
|
|
ipv6addr = (struct sockaddr_in6*)sock_addr;
|
|
result = inet_net_ntop(AF_INET6, &(ipv6addr->sin6_addr), 128, IPstr, MAX_IP_STR_LEN - 1);
|
|
if (NULL == result) {
|
|
ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
|
|
}
|
|
portNumber = ntohs(ipv6addr->sin6_port);
|
|
if ((((0 == strncmp(szIP, LOCAL_HOST, MAX_IP_STR_LEN)) &&
|
|
(0 == strncmp(IPstr, LOOP_IPV6_IP, MAX_IP_STR_LEN))) ||
|
|
(0 == strncmp(IPstr, szIP, MAX_IP_STR_LEN))) &&
|
|
(port == portNumber)) {
|
|
cmpResult = true;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
ereport(LOG, (errmsg("unkown socket address family")));
|
|
break;
|
|
}
|
|
|
|
return cmpResult;
|
|
}
|
|
|
|
/*
|
|
* According to createmode, create the listen socket
|
|
*/
|
|
void CreateServerSocket(
|
|
char* ipaddr, int portNumber, int enCreatemode, int* success, bool add_localaddr_flag, bool is_create_psql_sock)
|
|
{
|
|
int status = 0;
|
|
int successCount = 0;
|
|
|
|
Assert(ipaddr != NULL);
|
|
Assert(success != NULL);
|
|
|
|
successCount = *success;
|
|
|
|
/* if the createmode is NEED_CREATE_TCPIP or NEED_CREATE_UN_TCPIP and
|
|
the socket has not been created, then create it */
|
|
if (((unsigned int)enCreatemode & NEED_CREATE_TCPIP) && (!IsAlreadyListen(ipaddr, portNumber))) {
|
|
ereport(LOG, (errmsg("Create TCP/IP socket [%s:%d]", ipaddr, portNumber)));
|
|
|
|
if (strcmp(ipaddr, "*") == 0) {
|
|
status = StreamServerPort(AF_UNSPEC,
|
|
NULL,
|
|
(unsigned short)portNumber,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
add_localaddr_flag,
|
|
is_create_psql_sock,
|
|
false);
|
|
} else {
|
|
status = StreamServerPort(AF_UNSPEC,
|
|
ipaddr,
|
|
(unsigned short)portNumber,
|
|
g_instance.attr.attr_network.UnixSocketDir,
|
|
t_thrd.postmaster_cxt.ListenSocket,
|
|
MAXLISTEN,
|
|
add_localaddr_flag,
|
|
is_create_psql_sock,
|
|
false);
|
|
}
|
|
if (status == STATUS_OK) {
|
|
successCount++;
|
|
} else {
|
|
ereport(WARNING,
|
|
(errmsg("could not create listen socket for \"ipaddr:%s,portNumber:%d\"", ipaddr, portNumber)));
|
|
}
|
|
} else {
|
|
successCount++;
|
|
ereport(
|
|
LOG, (errmsg("TCP/IP socket [%s:%d] has been created already, no need recreate it", ipaddr, portNumber)));
|
|
}
|
|
|
|
*success = successCount;
|
|
}
|
|
|
|
/*
|
|
* parse ReplConnArray1 and ReplConnArray2 into ip-port pair and save it in pListenList
|
|
*/
|
|
bool ParseHaListenAddr(LISTEN_ADDRS* pListenList)
|
|
{
|
|
int count = 0, i = 0;
|
|
errno_t rc = 0;
|
|
|
|
Assert(NULL != pListenList);
|
|
if (NULL == pListenList) {
|
|
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("parameter error in ParseHaListenAddr()")));
|
|
return false;
|
|
}
|
|
count = pListenList->usedNum;
|
|
|
|
for (i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
if (t_thrd.postmaster_cxt.ReplConnArray[i] != NULL) {
|
|
pListenList->lsnArray[count].portnum = t_thrd.postmaster_cxt.ReplConnArray[i]->localport;
|
|
pListenList->lsnArray[count].createmodel = NEED_CREATE_TCPIP;
|
|
rc = strncpy_s(pListenList->lsnArray[count].ipaddr,
|
|
sizeof(pListenList->lsnArray[count].ipaddr),
|
|
t_thrd.postmaster_cxt.ReplConnArray[i]->localhost,
|
|
MAX_IPADDR_LEN - 1);
|
|
securec_check(rc, "\0", "\0");
|
|
pListenList->lsnArray[count].ipaddr[MAX_IPADDR_LEN - 1] = '\0';
|
|
count++;
|
|
}
|
|
}
|
|
pListenList->usedNum = count;
|
|
|
|
return ((count == 0) ? false : true);
|
|
}
|
|
|
|
/*
|
|
* according to ReplConnArray, close the socket not in ReplConnArray and create new socket
|
|
* if the socket in ReplConnArray is already created, just ignore and step over
|
|
*/
|
|
static void CreateHaListenSocket(void)
|
|
{
|
|
LISTEN_ADDRS newListenAddrs;
|
|
struct sockaddr_storage sock_addr;
|
|
int i = 0, j = 0, success = 0;
|
|
socklen_t s_len;
|
|
|
|
int ss_rc = memset_s(&newListenAddrs, sizeof(newListenAddrs), 0, sizeof(newListenAddrs));
|
|
securec_check(ss_rc, "\0", "\0");
|
|
|
|
/* parse the ReplConnArray into IP-Port pair */
|
|
if (!ParseHaListenAddr(&newListenAddrs)) {
|
|
ereport(WARNING, (errmsg("\"replconninfo\" has been set null or invalid ")));
|
|
}
|
|
|
|
/*
|
|
* if the IP-Port in newListenAddrs has not been created, then create the socket
|
|
* if already has been created, just set the createmodel and continue.
|
|
*/
|
|
for (i = 0; i < MAXLISTEN; i++) {
|
|
if (PGINVALID_SOCKET == t_thrd.postmaster_cxt.ListenSocket[i] ||
|
|
t_thrd.postmaster_cxt.listen_sock_type[i] != HA_LISTEN_SOCKET) {
|
|
continue;
|
|
}
|
|
|
|
s_len = sizeof(sock_addr);
|
|
if (getsockname(t_thrd.postmaster_cxt.ListenSocket[i], (struct sockaddr*)&sock_addr, (socklen_t*)&s_len) < 0) {
|
|
ereport(LOG, (errmsg("Error in getsockname int IsAlreadyListen()")));
|
|
}
|
|
success = 0;
|
|
|
|
/* if the socket in newListenAddrs is already created, just set the createmodel */
|
|
for (j = 0; j < newListenAddrs.usedNum; j++) {
|
|
if (CheckSockAddr((struct sockaddr*)&sock_addr,
|
|
newListenAddrs.lsnArray[j].ipaddr,
|
|
newListenAddrs.lsnArray[j].portnum)) {
|
|
if (AF_UNIX == ((struct sockaddr*)&sock_addr)->sa_family) {
|
|
if (newListenAddrs.lsnArray[j].portnum != g_instance.attr.attr_network.PoolerPort)
|
|
ereport(WARNING,
|
|
(errmsg("replication socket could not be Unix Domain Socket, "
|
|
"something must be wrong of the Ha listen socket")));
|
|
} else {
|
|
newListenAddrs.lsnArray[j].createmodel = (uint32)newListenAddrs.lsnArray[j].createmodel
|
|
& ~NEED_CREATE_TCPIP;
|
|
}
|
|
success++;
|
|
}
|
|
}
|
|
|
|
/* ha pooler port and ha pooler socket also belongs to HA_LISTEN_SOCKET, no need to close them */
|
|
if (!success && IsHASocketAddr((struct sockaddr*)&sock_addr)) {
|
|
success++;
|
|
}
|
|
|
|
/* if the socket is not match all the IP-Port pair in newListenAddrs, close it and set listen_sock_type */
|
|
if (!success) {
|
|
(void)closesocket(t_thrd.postmaster_cxt.ListenSocket[i]);
|
|
t_thrd.postmaster_cxt.ListenSocket[i] = PGINVALID_SOCKET;
|
|
t_thrd.postmaster_cxt.listen_sock_type[i] = UNUSED_LISTEN_SOCKET;
|
|
}
|
|
}
|
|
|
|
/* according to createmodel, create the socket of IP-Port in newListenAddrs */
|
|
success = 0;
|
|
for (i = 0; i < newListenAddrs.usedNum; i++) {
|
|
CreateServerSocket(newListenAddrs.lsnArray[i].ipaddr,
|
|
newListenAddrs.lsnArray[i].portnum,
|
|
(int)newListenAddrs.lsnArray[i].createmodel,
|
|
&success,
|
|
false,
|
|
false);
|
|
}
|
|
|
|
if (0 == success) {
|
|
ereport(LOG, (errmsg("could not create any TCP/IP sockets for HA listen addresses")));
|
|
} else {
|
|
ereport(LOG, (errmsg("number of valid HA TCP/IP sockets is \"%d\"", success)));
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* move all the non-default value at the head of the array
|
|
*/
|
|
static void IntArrayRegulation(int array[], int len, int def)
|
|
{
|
|
int i = 0, j = 0;
|
|
int* tmp = NULL;
|
|
if (NULL == array || len < 0) {
|
|
ereport(WARNING, (errmsg("The parameter error in array regulation")));
|
|
return;
|
|
}
|
|
|
|
tmp = (int*)palloc(sizeof(int) * len);
|
|
if (NULL == tmp) {
|
|
ereport(WARNING, (errmsg("Error palloc in array regulation")));
|
|
return;
|
|
}
|
|
for (i = 0; i != len; ++i) {
|
|
tmp[i] = array[i];
|
|
array[i] = def;
|
|
}
|
|
|
|
for (i = 0; i != len; ++i) {
|
|
if (tmp[i] != def) {
|
|
array[j++] = tmp[i];
|
|
}
|
|
}
|
|
pfree(tmp);
|
|
}
|
|
|
|
/*
|
|
* adjust the array ListenSocket and listen_sock_type, so that the readmask
|
|
* in ServerLoop can be available
|
|
*/
|
|
static void ListenSocketRegulation(void)
|
|
{
|
|
IntArrayRegulation((int*)t_thrd.postmaster_cxt.ListenSocket, MAXLISTEN, (int)PGINVALID_SOCKET);
|
|
|
|
IntArrayRegulation((int*)t_thrd.postmaster_cxt.listen_sock_type, MAXLISTEN, UNUSED_LISTEN_SOCKET);
|
|
}
|
|
|
|
DbState get_local_dbstate_sub(WalRcvData* walrcv, ServerMode mode)
|
|
{
|
|
bool has_build_reason = true;
|
|
if ((t_thrd.postmaster_cxt.HaShmData->repl_reason[t_thrd.postmaster_cxt.HaShmData->current_repl] ==
|
|
NONE_REBUILD &&
|
|
walrcv != NULL && walrcv->isRuning && (walrcv->conn_target == REPCONNTARGET_PRIMARY || IsCascadeStandby())) ||
|
|
dummyStandbyMode || IS_DISASTER_RECOVER_MODE)
|
|
has_build_reason = false;
|
|
|
|
switch (mode) {
|
|
case NORMAL_MODE:
|
|
case PRIMARY_MODE:
|
|
return NORMAL_STATE;
|
|
case PENDING_MODE:
|
|
case STANDBY_MODE:
|
|
if (has_build_reason)
|
|
return NEEDREPAIR_STATE;
|
|
else {
|
|
/* use keeplive msg from walsender to check if db state is catchup or not.
|
|
Also use local info from walreceiver as a supplement. */
|
|
if (wal_catchup || data_catchup) {
|
|
return CATCHUP_STATE;
|
|
} else if (!WalRcvIsOnline() && !IS_DISASTER_RECOVER_MODE) {
|
|
return STARTING_STATE;
|
|
} else {
|
|
return NORMAL_STATE;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return UNKNOWN_STATE;
|
|
}
|
|
|
|
DbState get_local_dbstate(void)
|
|
{
|
|
volatile WalRcvData* walrcv = t_thrd.walreceiverfuncs_cxt.WalRcv;
|
|
ServerMode mode = t_thrd.postmaster_cxt.HaShmData->current_mode;
|
|
DbState db_state = UNKNOWN_STATE;
|
|
|
|
if (t_thrd.walsender_cxt.WalSndCtl && t_thrd.walsender_cxt.WalSndCtl->demotion > NoDemote)
|
|
db_state = DEMOTING_STATE;
|
|
else if (walrcv && NODESTATE_STANDBY_WAITING == walrcv->node_state)
|
|
db_state = WAITING_STATE;
|
|
else if (walrcv && (NODESTATE_STANDBY_PROMOTING == walrcv->node_state ||
|
|
NODESTATE_STANDBY_FAILOVER_PROMOTING == walrcv->node_state))
|
|
db_state = PROMOTING_STATE;
|
|
else {
|
|
db_state = get_local_dbstate_sub((WalRcvData*)walrcv, mode);
|
|
}
|
|
return db_state;
|
|
}
|
|
|
|
const char* wal_get_db_state_string(DbState db_state)
|
|
{
|
|
switch (db_state) {
|
|
case NORMAL_STATE:
|
|
return "Normal";
|
|
case UNKNOWN_STATE:
|
|
return "Unknown";
|
|
case NEEDREPAIR_STATE:
|
|
return "Need repair";
|
|
case STARTING_STATE:
|
|
return "Starting";
|
|
case WAITING_STATE:
|
|
return "Wait promoting";
|
|
case DEMOTING_STATE:
|
|
return "Demoting";
|
|
case PROMOTING_STATE:
|
|
return "Promoting";
|
|
case BUILDING_STATE:
|
|
return "Building";
|
|
case CATCHUP_STATE:
|
|
return "Catchup";
|
|
case COREDUMP_STATE:
|
|
return "Coredump";
|
|
default:
|
|
return "Unknown";
|
|
}
|
|
}
|
|
|
|
static ServerMode get_cur_mode(void)
|
|
{
|
|
return t_thrd.postmaster_cxt.HaShmData->current_mode;
|
|
}
|
|
|
|
static int get_cur_repl_num(void)
|
|
{
|
|
return t_thrd.postmaster_cxt.HaShmData->repl_list_num;
|
|
}
|
|
|
|
static void PMReadDBStateFile(GaussState* state)
|
|
{
|
|
FILE* statef = NULL;
|
|
|
|
if (NULL == state) {
|
|
ereport(LOG, (errmsg("%s: parameter state is null in PMReadDBStateFile()", progname)));
|
|
return;
|
|
}
|
|
|
|
statef = fopen(gaussdb_state_file, "r");
|
|
if (NULL == statef) {
|
|
ereport(LOG,
|
|
(errmsg("%s: open gaussdb state file \"%s\" failed, could "
|
|
"not read the build infomation: %m",
|
|
progname,
|
|
gaussdb_state_file)));
|
|
return;
|
|
}
|
|
|
|
if (0 == (fread(state, 1, sizeof(GaussState), statef))) {
|
|
ereport(LOG,
|
|
(errmsg(
|
|
"%s: read gaussdb state infomation from the file \"%s\" failed: %m", progname, gaussdb_state_file)));
|
|
}
|
|
fclose(statef);
|
|
}
|
|
|
|
/*
|
|
* update the hashmemdata infomation in gaussdb state file
|
|
*/
|
|
static void PMSetDBStateFile(GaussState* state)
|
|
{
|
|
FILE* statef = NULL;
|
|
char temppath[MAXPGPATH] = {0};
|
|
|
|
/*
|
|
* skip update gaussdb state file in bootstrap and dummy mode.
|
|
*/
|
|
if (IsInitdb || dummyStandbyMode)
|
|
return;
|
|
|
|
Assert(t_thrd.proc_cxt.MyProcPid == PostmasterPid);
|
|
|
|
if (NULL == state) {
|
|
ereport(LOG, (errmsg("%s: parameter state is null in PMSetDBStateFile()", progname)));
|
|
return;
|
|
}
|
|
|
|
int rc = snprintf_s(temppath, MAXPGPATH, MAXPGPATH - 1, "%s.temp", gaussdb_state_file);
|
|
securec_check_intval(rc, , );
|
|
|
|
statef = fopen(temppath, "w");
|
|
if (NULL == statef) {
|
|
ereport(LOG,
|
|
(errmsg("%s: open gaussdb state file \"%s\" failed, could not "
|
|
"update the build infomation: %m",
|
|
progname,
|
|
temppath)));
|
|
return;
|
|
}
|
|
if (0 == (fwrite(state, 1, sizeof(GaussState), statef))) {
|
|
ereport(
|
|
LOG, (errmsg("%s: update gaussdb state infomation from the file \"%s\" failed: %m", progname, temppath)));
|
|
}
|
|
|
|
if (fsync(fileno(statef)) == 0) {
|
|
ereport(LOG, (errmsg("%s: fsync file \"%s\" success", progname, temppath)));
|
|
} else {
|
|
ereport(LOG, (errmsg("%s: fsync file \"%s\" fail", progname, temppath)));
|
|
}
|
|
fclose(statef);
|
|
|
|
if (rename(temppath, gaussdb_state_file) != 0)
|
|
ereport(LOG, (errmsg("can't rename \"%s\" to \"%s\": %m", temppath, gaussdb_state_file)));
|
|
}
|
|
|
|
/*
|
|
* according to input parameters, update the gaussdb state file
|
|
*/
|
|
static void PMUpdateDBState(DbState db_state, ServerMode mode, int conn_num)
|
|
{
|
|
GaussState state;
|
|
|
|
PMReadDBStateFile(&state);
|
|
state.state = db_state;
|
|
if (mode == STANDBY_MODE && t_thrd.postmaster_cxt.HaShmData->is_cascade_standby) {
|
|
state.mode = CASCADE_STANDBY_MODE;
|
|
} else {
|
|
state.mode = mode;
|
|
}
|
|
state.conn_num = conn_num;
|
|
PMSetDBStateFile(&state);
|
|
}
|
|
|
|
static void PMUpdateDBStateLSN(void)
|
|
{
|
|
GaussState state;
|
|
XLogRecPtr recptr = GetXLogReplayRecPtrInPending();
|
|
|
|
PMReadDBStateFile(&state);
|
|
state.lsn = recptr;
|
|
state.term = Max(g_instance.comm_cxt.localinfo_cxt.term_from_file,
|
|
g_instance.comm_cxt.localinfo_cxt.term_from_xlog);
|
|
PMSetDBStateFile(&state);
|
|
}
|
|
|
|
/*
|
|
* Update ha build reason to gaussdb.state file, if gs_ctl can not query
|
|
* Ha status(e.g: recovery thread hold a system table lock), we can read
|
|
* the gaussdb.state file to get it.
|
|
*/
|
|
static void PMUpdateDBStateHaRebuildReason(void)
|
|
{
|
|
GaussState state;
|
|
volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
HaRebuildReason reason = NONE_REBUILD;
|
|
|
|
SpinLockAcquire(&hashmdata->mutex);
|
|
reason = hashmdata->repl_reason[hashmdata->current_repl];
|
|
SpinLockRelease(&hashmdata->mutex);
|
|
|
|
PMReadDBStateFile(&state);
|
|
state.ha_rebuild_reason = reason;
|
|
if (reason != NONE_REBUILD) {
|
|
state.state = NEEDREPAIR_STATE;
|
|
} else {
|
|
state.state = NORMAL_STATE;
|
|
}
|
|
PMSetDBStateFile(&state);
|
|
ereport(LOG,
|
|
(errmsg("update gaussdb state file: build reason(%s), "
|
|
"db state(%s), server mode(%s)",
|
|
wal_get_rebuild_reason_string(reason),
|
|
wal_get_db_state_string(state.state),
|
|
wal_get_role_string(get_cur_mode()))));
|
|
}
|
|
|
|
static int init_stream_comm()
|
|
{
|
|
int error, rc;
|
|
char local_ip[IP_LEN] = {0};
|
|
|
|
char* rawstring = NULL;
|
|
List* elemlist = NULL;
|
|
ListCell* l = NULL;
|
|
MemoryContext oldctx = NULL;
|
|
|
|
g_instance.comm_cxt.comm_global_mem_cxt = AllocSetContextCreate(g_instance.instance_context,
|
|
"CommunnicatorGlobalMemoryContext",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE,
|
|
SHARED_CONTEXT);
|
|
|
|
oldctx = MemoryContextSwitchTo(g_instance.comm_cxt.comm_global_mem_cxt);
|
|
|
|
/* Need a modifiable copy of local_bind_address */
|
|
rawstring = pstrdup(tcp_link_addr);
|
|
|
|
/* Parse string into list of identifiers */
|
|
if (!SplitIdentifierString(rawstring, ',', &elemlist)) {
|
|
/* syntax error in list */
|
|
ereport(
|
|
FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid list syntax for \"listen_addresses\"")));
|
|
}
|
|
|
|
foreach (l, elemlist) {
|
|
char* curhost = (char*)lfirst(l);
|
|
|
|
if (strcmp(curhost, "*") != 0 && strcmp(curhost, "localhost") != 0 && strcmp(curhost, "0.0.0.0") != 0) {
|
|
/* Just use the first ip addr,no support multi-ip now! */
|
|
rc = strncpy_s(local_ip, IP_LEN, curhost, strlen(curhost));
|
|
securec_check(rc, "", "");
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (*local_ip == '\0') {
|
|
errno_t ss_rc = strncpy_s(local_ip, IP_LEN, "127.0.0.1", IP_LEN - 1);
|
|
securec_check(ss_rc, "", "");
|
|
}
|
|
|
|
list_free(elemlist);
|
|
pfree(rawstring);
|
|
|
|
gs_init_hash_table();
|
|
gs_set_hs_shm_data(t_thrd.postmaster_cxt.HaShmData);
|
|
gs_connect_regist_callback(&StreamConsumer::wakeUpConsumerCallBack);
|
|
|
|
error = gs_set_basic_info(local_ip,
|
|
g_instance.attr.attr_common.PGXCNodeName,
|
|
u_sess->attr.attr_network.comm_max_datanode + g_instance.attr.attr_network.MaxCoords,
|
|
t_thrd.libpq_cxt.sock_path);
|
|
|
|
if (error != 0) {
|
|
ereport(FATAL, (errmsg("set basic info of stream failed!")));
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
return error;
|
|
}
|
|
|
|
// pg_logging_comm_status
|
|
// Logging status of internal structures of stream communication layer.
|
|
//
|
|
// This function is here so we don't have to export the
|
|
// GlobalTransactionData struct definition.
|
|
//
|
|
Datum pg_log_comm_status(PG_FUNCTION_ARGS)
|
|
{
|
|
// If this is a datanode and in stream communication mode, do log status
|
|
//
|
|
if (IS_PGXC_DATANODE)
|
|
gs_log_comm_status();
|
|
PG_RETURN_DATUM(0);
|
|
}
|
|
|
|
/*
|
|
* Finish inplace or online upgrade and enable new features of the new gaussdb
|
|
* binary by switching the grand version num to the newer version num.
|
|
*
|
|
* Before calling this function, all system catalog changes and other upgrade
|
|
* procedures should have been completed. After calling this function, the upgrade
|
|
* process are nominally succeeded and usually can not be rollbacked.
|
|
*
|
|
* In effect, this function only changes the grand version of postmaster thread.
|
|
* Any newly started backend will inherit this version. However, for pre-existing
|
|
* backends, including those newly started backends caused by pre-existing backends,
|
|
* e.g. newly started DN backends to process queries or planes passed down by
|
|
* pre-exisitng CN backends, their backend versions are not changed and they still
|
|
* adopt pre-upgrade behavior of the old-version gaussdb binary.
|
|
*/
|
|
Datum set_working_grand_version_num_manually(PG_FUNCTION_ARGS)
|
|
{
|
|
uint32 tmp_version = 0;
|
|
|
|
if (u_sess->upg_cxt.InplaceUpgradeSwitch)
|
|
tmp_version = PG_GETARG_UINT32(0);
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OPERATION),
|
|
errmsg("could not set WorkingGrandVersionNum manually while not performing upgrade")));
|
|
|
|
if (!tmp_version)
|
|
tmp_version = GRAND_VERSION_NUM;
|
|
|
|
pg_atomic_write_u32(&WorkingGrandVersionNum, tmp_version);
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* @Description: check the value from environment variablethe to prevent command injection.
|
|
* @in input_env_value : the input value need be checked.
|
|
*/
|
|
bool backend_env_valid(const char* input_env_value, const char* stamp)
|
|
{
|
|
#define MAXENVLEN 1024
|
|
|
|
const char* danger_character_list[] = {";", "`", "\\", "'", "\"", ">", "<", "$", "&", "|", "!", "\n", "../", NULL};
|
|
int i = 0;
|
|
|
|
if (input_env_value == NULL || strlen(input_env_value) >= MAXENVLEN) {
|
|
ereport(LOG, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("environment variable $%s is NULL or size is out of %d\n",
|
|
stamp, MAXENVLEN)));
|
|
return false;
|
|
}
|
|
|
|
for (i = 0; danger_character_list[i] != NULL; i++) {
|
|
if (strstr((const char*)input_env_value, danger_character_list[i])) {
|
|
ereport(LOG, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("Error: environment variable $%s(%s) contain invaild symbol \"%s\".\n",
|
|
stamp, input_env_value, danger_character_list[i])));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* @Description: check the value from environment variablethe to prevent command injection.
|
|
* @in input_env_value : the input value need be checked.
|
|
*/
|
|
void check_backend_env(const char* input_env_value)
|
|
{
|
|
#define MAXENVLEN 1024
|
|
|
|
const char* danger_character_list[] = {";", "`", "\\", "'", "\"", ">", "<", "$", "&", "|", "!", "\n", NULL};
|
|
int i = 0;
|
|
|
|
if (input_env_value == NULL || strlen(input_env_value) >= MAXENVLEN) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("wrong environment variable \"%s\"", input_env_value)));
|
|
}
|
|
|
|
for (i = 0; danger_character_list[i] != NULL; i++) {
|
|
if (strstr((const char*)input_env_value, danger_character_list[i])) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("Error: environment variable \"%s\" contain invaild symbol \"%s\".\n",
|
|
input_env_value,
|
|
danger_character_list[i])));
|
|
}
|
|
}
|
|
}
|
|
|
|
void CleanSystemCaches(bool is_in_read_command)
|
|
{
|
|
int64 usedSize = 0;
|
|
|
|
usedSize = ((AllocSet)u_sess->cache_mem_cxt)->totalSpace - ((AllocSet)u_sess->cache_mem_cxt)->freeSpace;
|
|
|
|
/* Over threshold, need to clean cache. */
|
|
if (usedSize > g_instance.attr.attr_memory.local_syscache_threshold*1024) {
|
|
ereport(DEBUG1,
|
|
(errmsg("CleanSystemCaches due to "
|
|
"SystemCache(%ld) greater than (%d),in_read_command(%d).",
|
|
usedSize,
|
|
g_instance.attr.attr_memory.local_syscache_threshold*1024,
|
|
(is_in_read_command ? 1 : 0))));
|
|
|
|
if (IsTransactionOrTransactionBlock() || !is_in_read_command) {
|
|
InvalidateSystemCaches();
|
|
}
|
|
}
|
|
}
|
|
|
|
static void check_and_reset_ha_listen_port(void)
|
|
{
|
|
int j;
|
|
|
|
/*
|
|
* when Ha replconninfo have changed and current_mode is not NORMAL,
|
|
* dynamically modify the ha socket.
|
|
*/
|
|
for (j = 1; j < MAX_REPLNODE_NUM; j++) {
|
|
if (t_thrd.postmaster_cxt.ReplConnChanged[j])
|
|
break;
|
|
}
|
|
|
|
if (j < MAX_REPLNODE_NUM) {
|
|
int i, repl_list_num = 0;
|
|
|
|
CreateHaListenSocket();
|
|
|
|
repl_list_num = 0;
|
|
for (i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
t_thrd.postmaster_cxt.ReplConnChanged[i] = false;
|
|
if (t_thrd.postmaster_cxt.ReplConnArray[i] != NULL)
|
|
repl_list_num++;
|
|
}
|
|
|
|
/* send SIGTERM to end process senders and receiver */
|
|
t_thrd.postmaster_cxt.HaShmData->repl_list_num = repl_list_num;
|
|
|
|
(void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_WALSND);
|
|
(void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_DATASND);
|
|
if (g_instance.pid_cxt.WalRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalRcvWriterPID, SIGTERM);
|
|
if (g_instance.pid_cxt.WalReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.WalReceiverPID, SIGTERM);
|
|
if (g_instance.pid_cxt.DataRcvWriterPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataRcvWriterPID, SIGTERM);
|
|
if (g_instance.pid_cxt.DataReceiverPID != 0)
|
|
signal_child(g_instance.pid_cxt.DataReceiverPID, SIGTERM);
|
|
if (g_instance.pid_cxt.RemoteServicePID != 0)
|
|
signal_child(g_instance.pid_cxt.RemoteServicePID, SIGTERM);
|
|
|
|
ListenSocketRegulation();
|
|
}
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
if (t_thrd.postmaster_cxt.HaShmData != NULL &&
|
|
t_thrd.postmaster_cxt.HaShmData->repl_list_num == 0 &&
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode == PRIMARY_MODE) {
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode = NORMAL_MODE;
|
|
SetServerMode(NORMAL_MODE);
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
bool IsCBMWriterRunning(void)
|
|
{
|
|
if (g_instance.pid_cxt.CBMWriterPID != 0)
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
bool PMstateIsRun(void)
|
|
{
|
|
return PM_RUN == pmState;
|
|
}
|
|
|
|
/* malloc api of cJSON at backend side */
|
|
static void* cJSON_internal_malloc(size_t size)
|
|
{
|
|
return palloc(size);
|
|
}
|
|
|
|
/* free api of cJSON at backend side */
|
|
void cJSON_internal_free(void* pointer)
|
|
{
|
|
pfree(pointer);
|
|
}
|
|
|
|
/*
|
|
* Begin shutdown of an auxiliary process. This is approximately the equivalent
|
|
* of ShutdownPostgres() in postinit.c. We can't run transactions in an
|
|
* auxiliary process, so most of the work of AbortTransaction() is not needed,
|
|
* but we do need to make sure we've released any LWLocks we are holding.
|
|
* (This is only critical during an error exit.)
|
|
*/
|
|
static void ShutdownAuxiliaryProcess(int code, Datum arg)
|
|
{
|
|
LWLockReleaseAll();
|
|
/* Clear wait information */
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
}
|
|
|
|
template <knl_thread_role thread_role>
|
|
static void SetAuxType()
|
|
{
|
|
switch (thread_role) {
|
|
case PAGEREDO:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = PageRedoProcess;
|
|
break;
|
|
case TWOPASECLEANER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = TwoPhaseCleanerProcess;
|
|
break;
|
|
case FAULTMONITOR:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = FaultMonitorProcess;
|
|
break;
|
|
case BGWRITER:
|
|
if (g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = MultiBgWriterProcess;
|
|
} else {
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = BgWriterProcess;
|
|
}
|
|
break;
|
|
case CHECKPOINT_THREAD:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = CheckpointerProcess;
|
|
break;
|
|
case WALWRITER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = WalWriterProcess;
|
|
break;
|
|
case WALWRITERAUXILIARY:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = WalWriterAuxiliaryProcess;
|
|
break;
|
|
case WALRECEIVER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = WalReceiverProcess;
|
|
break;
|
|
case WALRECWRITE:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = WalRcvWriterProcess;
|
|
break;
|
|
case DATARECIVER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = DataReceiverProcess;
|
|
break;
|
|
case DATARECWRITER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = DataRcvWriterProcess;
|
|
break;
|
|
case CBMWRITER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = CBMWriterProcess;
|
|
break;
|
|
case RPC_SERVICE:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = RemoteServiceProcess;
|
|
break;
|
|
case STARTUP:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = StartupProcess;
|
|
break;
|
|
case PAGEWRITER_THREAD:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = PageWriterProcess;
|
|
break;
|
|
case THREADPOOL_LISTENER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = TpoolListenerProcess;
|
|
break;
|
|
case THREADPOOL_SCHEDULER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = TpoolSchdulerProcess;
|
|
break;
|
|
case HEARTBEAT:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = HeartbeatProcess;
|
|
break;
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
case TS_COMPACTION:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = TsCompactionProcess;
|
|
break;
|
|
case TS_COMPACTION_CONSUMER:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = TsCompactionConsumerProcess;
|
|
break;
|
|
case TS_COMPACTION_AUXILIAY:
|
|
t_thrd.bootstrap_cxt.MyAuxProcType = TsCompactionAuxiliaryProcess;
|
|
break;
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
default:
|
|
ereport(ERROR, (errmsg("unrecorgnized proc type %d", thread_role)));
|
|
}
|
|
}
|
|
|
|
template <knl_thread_role role>
|
|
void SetExtraThreadInfo(knl_thread_arg* arg)
|
|
{
|
|
if (arg->payload == NULL)
|
|
return;
|
|
|
|
switch (role) {
|
|
case THREADPOOL_WORKER: {
|
|
t_thrd.threadpool_cxt.worker = (ThreadPoolWorker*)arg->payload;
|
|
t_thrd.threadpool_cxt.group = t_thrd.threadpool_cxt.worker->GetGroup();
|
|
break;
|
|
}
|
|
case STREAM_WORKER: {
|
|
StreamProducer* proObj = (StreamProducer*)arg->payload;
|
|
SetStreamWorkerInfo(proObj);
|
|
break;
|
|
}
|
|
case THREADPOOL_STREAM: {
|
|
t_thrd.threadpool_cxt.stream = (ThreadPoolStream*)arg->payload;
|
|
t_thrd.threadpool_cxt.group = t_thrd.threadpool_cxt.stream->GetGroup();
|
|
StreamProducer* proObj = (StreamProducer*)t_thrd.threadpool_cxt.stream->GetProducer();
|
|
SetStreamWorkerInfo(proObj);
|
|
break;
|
|
}
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
case TS_COMPACTION_CONSUMER: {
|
|
CompactionWorkerProcess::SetConsumerThreadLocal(arg);
|
|
break;
|
|
}
|
|
#endif
|
|
case THREADPOOL_LISTENER: {
|
|
t_thrd.threadpool_cxt.listener = (ThreadPoolListener*)arg->payload;
|
|
break;
|
|
}
|
|
case THREADPOOL_SCHEDULER: {
|
|
t_thrd.threadpool_cxt.scheduler = (ThreadPoolScheduler*)arg->payload;
|
|
break;
|
|
}
|
|
case PAGEREDO: {
|
|
SetMyPageRedoWorker(arg);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void InitProcessAndShareMemory()
|
|
{
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
|
|
InitProcess();
|
|
|
|
CHECK_FOR_PROCDIEPENDING();
|
|
|
|
/*
|
|
* Attach process to shared data structures. If testing EXEC_BACKEND
|
|
* on Linux, you must run this as root before starting the postmaster:
|
|
*
|
|
* echo 0 >/proc/sys/kernel/randomize_va_space
|
|
*
|
|
* This prevents a randomized stack base address that causes child
|
|
* shared memory to be at a different address than the parent, making
|
|
* it impossible to attached to shared memory. Return the value to
|
|
* '1' when finished.
|
|
*/
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
}
|
|
|
|
template <knl_thread_role thread_role>
|
|
int GaussDbAuxiliaryThreadMain(knl_thread_arg* arg)
|
|
{
|
|
t_thrd.role = arg->role;
|
|
Assert(thread_role == t_thrd.role);
|
|
Assert(thread_role == arg->role);
|
|
|
|
/*
|
|
* initialize globals
|
|
*/
|
|
t_thrd.proc_cxt.MyProcPid = gs_thread_self();
|
|
t_thrd.proc_cxt.MyStartTime = time(NULL);
|
|
/*
|
|
* Initialize random() for the first time, like PostmasterMain() would.
|
|
* In a regular IsUnderPostmaster backend, BackendRun() computes a
|
|
* high-entropy seed before any user query. Fewer distinct initial seeds
|
|
* can occur here.
|
|
*/
|
|
srandom((unsigned int)(t_thrd.proc_cxt.MyProcPid ^ (unsigned int)t_thrd.proc_cxt.MyStartTime));
|
|
t_thrd.proc_cxt.MyProgName = "Auxiliary";
|
|
|
|
/* register thread information to PGPROC structure */
|
|
init_ps_display("Auxiliary", "", "", "");
|
|
|
|
/* Validate we have been given a reasonable-looking t_thrd.proc_cxt.DataDir */
|
|
Assert(t_thrd.proc_cxt.DataDir);
|
|
ValidatePgVersion(t_thrd.proc_cxt.DataDir);
|
|
|
|
SetProcessingMode(BootstrapProcessing);
|
|
u_sess->attr.attr_common.IgnoreSystemIndexes = true;
|
|
BaseInit();
|
|
|
|
/*
|
|
* When we are an auxiliary process, we aren't going to do the full
|
|
* InitPostgres pushups, but there are a couple of things that need to get
|
|
* lit up even in an auxiliary process.
|
|
*/
|
|
if (IsUnderPostmaster) {
|
|
/*
|
|
* Create a PGPROC so we can use LWLocks. In the EXEC_BACKEND case,
|
|
* this was already done by SubPostmasterMain().
|
|
*/
|
|
#ifndef EXEC_BACKEND
|
|
InitAuxiliaryProcess();
|
|
#endif
|
|
|
|
/*
|
|
* Assign the ProcSignalSlot for an auxiliary process. Since it
|
|
* doesn't have a BackendId, the slot is statically allocated based on
|
|
* the auxiliary process type (MyAuxProcType). Backends use slots
|
|
* indexed in the range from 1 to g_instance.shmem_cxt.MaxBackends (inclusive), so we use
|
|
* g_instance.shmem_cxt.MaxBackends + 1 as the base index of the slot for an
|
|
* auxiliary process.
|
|
*/
|
|
int index = GetAuxProcEntryIndex(g_instance.shmem_cxt.MaxBackends + 1);
|
|
|
|
ProcSignalInit(index);
|
|
|
|
/* finish setting up bufmgr.c */
|
|
InitBufferPoolBackend();
|
|
|
|
/* register a shutdown callback for LWLock cleanup */
|
|
on_shmem_exit(ShutdownAuxiliaryProcess, 0);
|
|
}
|
|
|
|
pgstat_initialize();
|
|
pgstat_bestart();
|
|
/*
|
|
* XLOG operations
|
|
*/
|
|
SetProcessingMode(NormalProcessing);
|
|
|
|
switch (thread_role) {
|
|
case TWOPASECLEANER:
|
|
TwoPhaseCleanerMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case FAULTMONITOR:
|
|
FaultMonitorMain();
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case STARTUP:
|
|
/* don't set signals, startup process has its own agenda */
|
|
StartupProcessMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case PAGEREDO:
|
|
/* don't set signals, pageredo process has its own agenda */
|
|
MultiRedoMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case BGWRITER:
|
|
if (g_instance.attr.attr_storage.enableIncrementalCheckpoint) {
|
|
incre_ckpt_background_writer_main();
|
|
proc_exit(1);
|
|
break;
|
|
} else {
|
|
/* don't set signals, bgwriter has its own agenda */
|
|
BackgroundWriterMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
}
|
|
|
|
case CHECKPOINT_THREAD:
|
|
/* don't set signals, checkpointer has its own agenda */
|
|
CheckpointerMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case WALWRITER:
|
|
/* don't set signals, walwriter has its own agenda */
|
|
InitXLOGAccess();
|
|
WalWriterMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case WALWRITERAUXILIARY:
|
|
/* don't set signals, walwriterauxiliary has its own agenda */
|
|
WalWriterAuxiliaryMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case WALRECEIVER:
|
|
/* don't set signals, walreceiver has its own agenda */
|
|
WalReceiverMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case WALRECWRITE:
|
|
/* don't set signals, walrcvwriter has its own agenda */
|
|
walrcvWriterMain(); /* should never return */
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case DATARECIVER:
|
|
/* don't set signals, datareceiver has its own agenda */
|
|
DataReceiverMain(); /* should never return */
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case DATARECWRITER:
|
|
/* don't set signals, datarcvwriter has its own agenda */
|
|
DataRcvWriterMain(); /* should never return */
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case CBMWRITER:
|
|
/* don't set signals, cbmwriter has its own agenda */
|
|
CBMWriterMain(); /* should never return */
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case RPC_SERVICE:
|
|
RemoteServiceMain();
|
|
proc_exit(1); /* should never return */
|
|
break;
|
|
|
|
case PAGEWRITER_THREAD:
|
|
ckpt_pagewriter_main();
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case THREADPOOL_LISTENER:
|
|
TpoolListenerMain(t_thrd.threadpool_cxt.listener);
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case THREADPOOL_SCHEDULER:
|
|
TpoolSchedulerMain(t_thrd.threadpool_cxt.scheduler);
|
|
proc_exit(1);
|
|
break;
|
|
|
|
case HEARTBEAT:
|
|
heartbeat_main();
|
|
proc_exit(1);
|
|
break;
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
case TS_COMPACTION:
|
|
CompactionProcess::compaction_main();
|
|
proc_exit(1);
|
|
break;
|
|
case TS_COMPACTION_CONSUMER:
|
|
CompactionWorkerProcess::compaction_consumer_main();
|
|
proc_exit(1);
|
|
break;
|
|
case TS_COMPACTION_AUXILIAY:
|
|
CompactionAuxiliaryProcess::compaction_auxiliary_main();
|
|
proc_exit(1);
|
|
break;
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
default:
|
|
ereport(PANIC, (errmsg("unrecognized process type: %d", (int)t_thrd.bootstrap_cxt.MyAuxProcType)));
|
|
proc_exit(1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void is_memory_backend_reserved(const knl_thread_arg* arg)
|
|
{
|
|
if (arg->role == WORKER) {
|
|
Port port = ((BackendParameters*)(arg->save_para))->port;
|
|
if (processMemInChunks >= maxChunksPerProcess * 0.8 &&
|
|
IsHAPort(&port)) {
|
|
t_thrd.utils_cxt.backend_reserved = true;
|
|
} else {
|
|
t_thrd.utils_cxt.backend_reserved = false;
|
|
}
|
|
return;
|
|
}
|
|
|
|
switch (arg->role) {
|
|
case WALWRITER:
|
|
case WALRECEIVER:
|
|
case WALRECWRITE:
|
|
case DATARECIVER:
|
|
case DATARECWRITER:
|
|
t_thrd.utils_cxt.backend_reserved = true;
|
|
break;
|
|
default:
|
|
t_thrd.utils_cxt.backend_reserved = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
template <knl_thread_role thread_role>
|
|
int GaussDbThreadMain(knl_thread_arg* arg)
|
|
{
|
|
Port port;
|
|
char* p_name_thread = NULL;
|
|
/* Do this sooner rather than later... */
|
|
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
|
|
Assert(thread_role == arg->role);
|
|
/* Check this thread will use reserved memory or not */
|
|
is_memory_backend_reserved(arg);
|
|
/* Initialize the Memory Protection at the thread level */
|
|
gs_memprot_thread_init();
|
|
MemoryContextInit();
|
|
knl_thread_init(thread_role);
|
|
|
|
MemoryContextSwitchTo(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT));
|
|
t_thrd.fake_session = create_session_context(t_thrd.top_mem_cxt, 0);
|
|
t_thrd.fake_session->status = KNL_SESS_FAKE;
|
|
u_sess = t_thrd.fake_session;
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
/* tsdb compaction need to switch database, so control the session lifecycle */
|
|
if (thread_role == TS_COMPACTION || thread_role == TS_COMPACTION_CONSUMER
|
|
|| thread_role == TS_COMPACTION_AUXILIAY) {
|
|
u_sess = SessionControl::create_compaction_session_context(t_thrd.top_mem_cxt);
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
t_thrd.proc_cxt.MyProcPid = gs_thread_self(); /* reset t_thrd.proc_cxt.MyProcPid */
|
|
t_thrd.proc_cxt.MyStartTime = time(NULL);
|
|
t_thrd.role = arg->role;
|
|
|
|
(void)ShowThreadName(GetThreadName(arg->role));
|
|
init_ps_display(GetThreadName(arg->role), "", "", "");
|
|
|
|
SetExtraThreadInfo<thread_role>(arg);
|
|
|
|
/* Lose the postmaster's on-exit routines (really a no-op) */
|
|
on_exit_reset();
|
|
|
|
/* In EXEC_BACKEND case we will not have inherited these settings */
|
|
IsPostmasterEnvironment = true;
|
|
t_thrd.postgres_cxt.whereToSendOutput = DestNone;
|
|
|
|
init_plog_global_mem();
|
|
|
|
SelfMemoryContext = THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT);
|
|
|
|
/* create timer with thread safe */
|
|
if (gs_signal_createtimer() < 0) {
|
|
ereport(FATAL, (errmsg("create timer fail at thread : %lu", t_thrd.proc_cxt.MyProcPid)));
|
|
}
|
|
|
|
InitializeGUCOptions();
|
|
/*
|
|
* Set reference point for stack-depth checking
|
|
*/
|
|
set_stack_base();
|
|
|
|
gs_signal_block_sigusr2();
|
|
gs_signal_startup_siginfo(p_name_thread);
|
|
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
|
|
|
|
PortInitialize(&port, arg);
|
|
|
|
t_thrd.bn = GetBackend(t_thrd.proc_cxt.MyPMChildSlot);
|
|
|
|
/* We don't need read GUC variables */
|
|
if (!FencedUDFMasterMode) {
|
|
/* Read in remaining GUC variables */
|
|
read_nondefault_variables();
|
|
}
|
|
|
|
if ((thread_role != WORKER && thread_role != THREADPOOL_WORKER && thread_role != STREAM_WORKER) &&
|
|
u_sess->attr.attr_resource.use_workload_manager && g_instance.attr.attr_resource.enable_backend_control &&
|
|
g_instance.wlm_cxt->gscgroup_init_done) {
|
|
if (thread_role == AUTOVACUUM_WORKER)
|
|
(void)gscgroup_attach_backend_task(GSCGROUP_VACUUM, false);
|
|
else
|
|
(void)gscgroup_attach_backend_task(GSCGROUP_DEFAULT_BACKEND, false);
|
|
}
|
|
|
|
t_thrd.wlm_cxt.query_resource_track_mcxt = AllocSetContextCreate(t_thrd.top_mem_cxt,
|
|
"QueryResourceTrackContext",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
|
|
/*
|
|
* Reload any libraries that were preloaded by the postmaster. Since we
|
|
* exec'd this process, those libraries didn't come along with us; but we
|
|
* should load them into all child processes to be consistent with the
|
|
* non-EXEC_BACKEND behavior.
|
|
*/
|
|
process_shared_preload_libraries();
|
|
|
|
switch (thread_role) {
|
|
case STREAM_WORKER:
|
|
case THREADPOOL_STREAM: {
|
|
/* restore child slot */
|
|
if (thread_role == STREAM_WORKER) {
|
|
t_thrd.proc_cxt.MyPMChildSlot = u_sess->stream_cxt.producer_obj->getChildSlot();
|
|
} else {
|
|
t_thrd.proc_cxt.MyPMChildSlot = t_thrd.threadpool_cxt.stream->GetProducer()->getChildSlot();
|
|
}
|
|
|
|
InitProcessAndShareMemory();
|
|
|
|
/* And run the backend */
|
|
proc_exit(StreamMain());
|
|
} break;
|
|
case WORKER:
|
|
CheckClientIp(&port); /* For THREADPOOL_WORKER check in InitPort */
|
|
/* fall through */
|
|
case THREADPOOL_WORKER: {
|
|
/* Module load callback */
|
|
pgaudit_agent_init();
|
|
auto_explain_init();
|
|
|
|
/* unique sql hooks */
|
|
instr_unique_sql_register_hook();
|
|
|
|
/* hypopg index hooks */
|
|
hypopg_register_hook();
|
|
|
|
/*
|
|
* Perform additional initialization and collect startup packet.
|
|
*
|
|
* We want to do this before InitProcess() for a couple of reasons: 1.
|
|
* so that we aren't eating up a PGPROC slot while waiting on the
|
|
* client. 2. so that if InitProcess() fails due to being out of
|
|
* PGPROC slots, we have already initialized libpq and are able to
|
|
* report the error to the client.
|
|
*/
|
|
BackendInitialize(&port);
|
|
|
|
InitProcessAndShareMemory();
|
|
|
|
/* CN has sent a stop query request. For non-stream node,
|
|
* we can send sigusr1 to tell the backend thread. sigusr1 can not be used until ProcSignalSlots
|
|
* be initialized in CreateSharedMemoryAndSemaphores.
|
|
*/
|
|
if (u_sess->stream_cxt.stop_mythread) {
|
|
t_thrd.sig_cxt.gs_sigale_check_type = SIGNAL_CHECK_EXECUTOR_STOP;
|
|
SendProcSignal(u_sess->stream_cxt.stop_pid, PROCSIG_EXECUTOR_FLAG, InvalidBackendId);
|
|
u_sess->stream_cxt.stop_mythread = false;
|
|
u_sess->stream_cxt.stop_pid = 0;
|
|
u_sess->stream_cxt.stop_query_id = 0;
|
|
proc_exit(0);
|
|
}
|
|
|
|
CHECK_FOR_PROCDIEPENDING();
|
|
|
|
/* And run the backend */
|
|
proc_exit(BackendRun(&port));
|
|
} break;
|
|
|
|
case PAGEREDO:
|
|
case TWOPASECLEANER:
|
|
case FAULTMONITOR:
|
|
case BGWRITER:
|
|
case CHECKPOINT_THREAD:
|
|
case WALWRITER:
|
|
case WALWRITERAUXILIARY:
|
|
case WALRECEIVER:
|
|
case WALRECWRITE:
|
|
case DATARECIVER:
|
|
case DATARECWRITER:
|
|
case CBMWRITER:
|
|
case RPC_SERVICE:
|
|
case STARTUP:
|
|
case PAGEWRITER_THREAD:
|
|
case HEARTBEAT:
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
case TS_COMPACTION:
|
|
case TS_COMPACTION_CONSUMER:
|
|
case TS_COMPACTION_AUXILIAY:
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
case THREADPOOL_LISTENER:
|
|
case THREADPOOL_SCHEDULER: {
|
|
SetAuxType<thread_role>();
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
|
|
InitAuxiliaryProcess();
|
|
/* Attach process to shared data structures */
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
GaussDbAuxiliaryThreadMain<thread_role>(arg);
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case AUTOVACUUM_LAUNCHER: {
|
|
InitProcessAndShareMemory();
|
|
AutoVacLauncherMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case AUTOVACUUM_WORKER: {
|
|
InitProcessAndShareMemory();
|
|
AutoVacWorkerMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case JOB_SCHEDULER: {
|
|
InitProcessAndShareMemory();
|
|
JobScheduleMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case JOB_WORKER: {
|
|
InitProcessAndShareMemory();
|
|
JobExecuteWorkerMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case TRACK_STMT_CLEANER: {
|
|
InitProcessAndShareMemory();
|
|
CleanStatementMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case CATCHUP: {
|
|
InitProcessAndShareMemory();
|
|
CatchupMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case WLM_WORKER: {
|
|
t_thrd.role = WLM_WORKER;
|
|
|
|
// restore child slot;
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return -1;
|
|
}
|
|
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
InitProcess();
|
|
|
|
WLMWorkerInitialize(&port);
|
|
/* Attach process to shared data structures */
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
WLMProcessThreadMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case WLM_MONITOR: {
|
|
t_thrd.role = WLM_MONITOR;
|
|
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
|
|
InitProcess();
|
|
WLMWorkerInitialize(&port);
|
|
/* Attach process to shared data structures */
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
WLMmonitorMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case WLM_ARBITER: {
|
|
t_thrd.role = WLM_ARBITER;
|
|
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
|
|
InitProcess();
|
|
|
|
WLMWorkerInitialize(&port);
|
|
|
|
/* Attach process to shared data structures */
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
|
|
WLMarbiterMain();
|
|
|
|
proc_exit(0);
|
|
|
|
} break;
|
|
|
|
case WLM_CPMONITOR: {
|
|
t_thrd.role = WLM_CPMONITOR;
|
|
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
|
|
InitProcess();
|
|
|
|
WLMWorkerInitialize(&port);
|
|
|
|
/* Attach process to shared data structures */
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
|
|
CPmonitorMain();
|
|
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case ARCH: {
|
|
t_thrd.role = ARCH;
|
|
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
|
|
InitProcess();
|
|
|
|
/* Attach process to shared data structures */
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
|
|
PgArchiverMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case PGSTAT: {
|
|
/* Restore basic shared memory pointers */
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
|
|
InitAuxiliaryProcess();
|
|
|
|
/* Attach process to shared data structures */
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
|
|
/* Do not want to attach to shared memory */
|
|
PgstatCollectorMain();
|
|
proc_exit(0);
|
|
|
|
} break;
|
|
|
|
case SYSLOGGER: {
|
|
/* Do not want to attach to shared memory */
|
|
SysLoggerMain(arg->extra_payload.log_thread.syslog_handle);
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case ALARMCHECK: {
|
|
AlarmCheckerMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case REAPER: {
|
|
/* Do not want to attach to shared memory */
|
|
ReaperBackendMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case AUDITOR: {
|
|
t_thrd.role = AUDITOR;
|
|
/* Do not want to attach to shared memory */
|
|
PgAuditorMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case RPC_WORKER: {
|
|
/* Do not init this global variable to prevent illegal access. */
|
|
if (u_sess->proc_cxt.MyProcPort != NULL)
|
|
u_sess->proc_cxt.MyProcPort = NULL;
|
|
|
|
t_thrd.postmaster_cxt.IsRPCWorkerThread = true;
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
InitProcessAndShareMemory();
|
|
|
|
/* Early initialization */
|
|
BaseInit();
|
|
return 0;
|
|
} break;
|
|
|
|
case SNAPSHOT_WORKER: {
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
InitProcess();
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
SnapshotMain();
|
|
} break;
|
|
|
|
case ASH_WORKER: {
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
InitProcess();
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
ActiveSessionCollectMain();
|
|
} break;
|
|
|
|
case TRACK_STMT_WORKER: {
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
InitProcess();
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
StatementFlushMain();
|
|
} break;
|
|
|
|
case PERCENTILE_WORKER: {
|
|
InitShmemAccess(UsedShmemSegAddr);
|
|
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
InitProcess();
|
|
CreateSharedMemoryAndSemaphores(false, 0);
|
|
PercentileMain();
|
|
} break;
|
|
|
|
case COMM_RECEIVER: {
|
|
commReceiverMain(arg->payload);
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case COMM_SENDERFLOWER: {
|
|
commSenderFlowMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case COMM_RECEIVERFLOWER: {
|
|
commReceiverFlowMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case COMM_AUXILIARY: {
|
|
commAuxiliaryMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case BARRIER_CREATOR: {
|
|
if (START_BARRIER_CREATOR) {
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
InitProcessAndShareMemory();
|
|
barrier_creator_main();
|
|
proc_exit(0);
|
|
}
|
|
} break;
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
case COMM_POOLER_CLEAN: {
|
|
InitProcessAndShareMemory();
|
|
commPoolCleanerMain();
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case STREAMING_BACKEND: {
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
InitProcessAndShareMemory();
|
|
streaming_backend_main(arg);
|
|
proc_exit(0);
|
|
} break;
|
|
|
|
case CSNMIN_SYNC: {
|
|
if (GTM_LITE_CN) {
|
|
t_thrd.proc_cxt.MyPMChildSlot = AssignPostmasterChildSlot();
|
|
if (t_thrd.proc_cxt.MyPMChildSlot == -1) {
|
|
return STATUS_ERROR;
|
|
}
|
|
|
|
InitProcessAndShareMemory();
|
|
csnminsync_main();
|
|
proc_exit(0);
|
|
}
|
|
} break;
|
|
#endif
|
|
default:
|
|
ereport(PANIC, (errmsg("unsupport thread role type %d", arg->role)));
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* the order of role must be the same with enum knl_thread_role */
|
|
static ThreadMetaData GaussdbThreadGate[] = {
|
|
{ GaussDbThreadMain<MASTER_THREAD>, MASTER_THREAD, "main", "main thread" },
|
|
{ GaussDbThreadMain<WORKER>, WORKER, "worker", "woker thread" },
|
|
{ GaussDbThreadMain<THREADPOOL_WORKER>, THREADPOOL_WORKER, "TPLworker", "thread pool worker" },
|
|
{ GaussDbThreadMain<THREADPOOL_LISTENER>, THREADPOOL_LISTENER, "TPLlistener", "thread pool listner" },
|
|
{ GaussDbThreadMain<THREADPOOL_SCHEDULER>, THREADPOOL_SCHEDULER, "TPLscheduler", "thread pool scheduler" },
|
|
{ GaussDbThreadMain<THREADPOOL_STREAM>, THREADPOOL_STREAM, "TPLstream", "thread pool stream" },
|
|
{ GaussDbThreadMain<STREAM_WORKER>, STREAM_WORKER, "streamworker", "stream worker" },
|
|
{ GaussDbThreadMain<AUTOVACUUM_LAUNCHER>, AUTOVACUUM_LAUNCHER, "AVClauncher", "autovacuum launcher" },
|
|
{ GaussDbThreadMain<AUTOVACUUM_WORKER>, AUTOVACUUM_WORKER, "AVCworker", "autovacuum worker" },
|
|
{ GaussDbThreadMain<JOB_SCHEDULER>, JOB_SCHEDULER, "Jobscheduler", "job scheduler" },
|
|
{ GaussDbThreadMain<JOB_WORKER>, JOB_WORKER, "Jobworker", "job execute worker" },
|
|
{ GaussDbThreadMain<WLM_WORKER>, WLM_WORKER, "WLMworker", "wlm statistics collector" },
|
|
{ GaussDbThreadMain<WLM_MONITOR>, WLM_MONITOR, "WLMmonitor", "wlm monitor launcher" },
|
|
{ GaussDbThreadMain<WLM_ARBITER>, WLM_ARBITER, "WLMarbiter", "wlm arbiter launcher" },
|
|
{ GaussDbThreadMain<WLM_CPMONITOR>, WLM_CPMONITOR, "CPmonitor", "CPmonitor launcher" },
|
|
{ GaussDbThreadMain<AUDITOR>, AUDITOR, "auditor", "system auditor" },
|
|
{ GaussDbThreadMain<PGSTAT>, PGSTAT, "statscollector", "statistics collector" },
|
|
{ GaussDbThreadMain<SYSLOGGER>, SYSLOGGER, "syslogger", "system logger" },
|
|
{ GaussDbThreadMain<RPC_WORKER>, RPC_WORKER, "rpcworker", "remote service" },
|
|
{ GaussDbThreadMain<RPC_SERVICE>, RPC_SERVICE, "rpcservice", "remote service" },
|
|
{ GaussDbThreadMain<CATCHUP>, CATCHUP, "catchup", "catchup" },
|
|
{ GaussDbThreadMain<ARCH>, ARCH, "archiver", "archiver" },
|
|
{ GaussDbThreadMain<ALARMCHECK>, ALARMCHECK, "alarm", "alarm" },
|
|
{ GaussDbThreadMain<REAPER>, REAPER, "reaper", "reaper backend" },
|
|
{ GaussDbThreadMain<PAGEREDO>, PAGEREDO, "pageredo", "page redo" },
|
|
{ GaussDbThreadMain<TWOPASECLEANER>, TWOPASECLEANER, "2pccleaner", "twophase cleaner" },
|
|
{ GaussDbThreadMain<STARTUP>, STARTUP, "startup", "startup" },
|
|
{ GaussDbThreadMain<FAULTMONITOR>, FAULTMONITOR, "faultmonitor", "fault monitor" },
|
|
{ GaussDbThreadMain<BGWRITER>, BGWRITER, "bgwriter", "background writer" },
|
|
{ GaussDbThreadMain<PERCENTILE_WORKER>, PERCENTILE_WORKER, "percentworker", "statistics collector" },
|
|
{ GaussDbThreadMain<SNAPSHOT_WORKER>, SNAPSHOT_WORKER, "snapshotworker", "snapshot" },
|
|
{ GaussDbThreadMain<ASH_WORKER>, ASH_WORKER, "ashworker", "ash worker" },
|
|
{ GaussDbThreadMain<TRACK_STMT_WORKER>, TRACK_STMT_WORKER, "TrackStmtWorker", "track stmt worker" },
|
|
{ GaussDbThreadMain<TRACK_STMT_CLEANER>, TRACK_STMT_CLEANER, "TrackStmtClean", "track stmt clean worker" },
|
|
{ GaussDbThreadMain<CHECKPOINT_THREAD>, CHECKPOINT_THREAD, "checkpointer", "checkpointer" },
|
|
{ GaussDbThreadMain<WALWRITER>, WALWRITER, "WALwriter", "WAL writer" },
|
|
{ GaussDbThreadMain<WALWRITERAUXILIARY>, WALWRITERAUXILIARY, "WALwriteraux", "WAL writer auxiliary" },
|
|
{ GaussDbThreadMain<WALRECEIVER>, WALRECEIVER, "WALreceiver", "WAL receiver" },
|
|
{ GaussDbThreadMain<WALRECWRITE>, WALRECWRITE, "WALrecwriter", "WAL receive writer" },
|
|
{ GaussDbThreadMain<DATARECIVER>, DATARECIVER, "datareceiver", "data receiver" },
|
|
{ GaussDbThreadMain<DATARECWRITER>, DATARECWRITER, "datarecwriter", "data receive writer" },
|
|
{ GaussDbThreadMain<CBMWRITER>, CBMWRITER, "CBMwriter", "CBM writer" },
|
|
{ GaussDbThreadMain<PAGEWRITER_THREAD>, PAGEWRITER_THREAD, "pagewriter", "page writer" },
|
|
{ GaussDbThreadMain<HEARTBEAT>, HEARTBEAT, "heartbeat", "heart beat" },
|
|
{ GaussDbThreadMain<COMM_SENDERFLOWER>, COMM_SENDERFLOWER, "COMMsendflow", "communicator sender flower" },
|
|
{ GaussDbThreadMain<COMM_RECEIVERFLOWER>, COMM_RECEIVERFLOWER, "COMMrecvflow", "communicator receiver flower" },
|
|
{ GaussDbThreadMain<COMM_RECEIVER>, COMM_RECEIVER, "COMMrecloop", "communicator receiver loop" },
|
|
{ GaussDbThreadMain<COMM_AUXILIARY>, COMM_AUXILIARY, "COMMaux", "communicator auxiliary" },
|
|
{ GaussDbThreadMain<COMM_POOLER_CLEAN>, COMM_POOLER_CLEAN, "COMMpoolcleaner", "communicator pooler auto cleaner" },
|
|
{ GaussDbThreadMain<CSNMIN_SYNC>, CSNMIN_SYNC, "csnminsync", "csnmin sync" },
|
|
{ GaussDbThreadMain<BARRIER_CREATOR>, BARRIER_CREATOR, "barriercreator", "barrier creator" },
|
|
|
|
/* Keep the block in the end if it may be absent !!! */
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
{ GaussDbThreadMain<TS_COMPACTION>, TS_COMPACTION, "TScompaction",
|
|
"timeseries compaction" },
|
|
{ GaussDbThreadMain<TS_COMPACTION_CONSUMER>, TS_COMPACTION_CONSUMER, "TScompconsumer",
|
|
"timeseries consumer compaction" },
|
|
{ GaussDbThreadMain<TS_COMPACTION_AUXILIAY>, TS_COMPACTION_AUXILIAY, "TScompaux",
|
|
"compaction auxiliary" },
|
|
{ GaussDbThreadMain<STREAMING_BACKEND>, STREAMING_BACKEND, "streambackend",
|
|
"streaming backend" },
|
|
{ GaussDbThreadMain<STREAMING_ROUTER_BACKEND>, STREAMING_ROUTER_BACKEND, "streamrouter",
|
|
"streaming router backend" },
|
|
{ GaussDbThreadMain<STREAMING_WORKER_BACKEND>, STREAMING_WORKER_BACKEND, "streamworker",
|
|
"streaming worker backend" },
|
|
{ GaussDbThreadMain<STREAMING_COLLECTOR_BACKEND>, STREAMING_COLLECTOR_BACKEND, "streamcollector",
|
|
"streaming collector backend" },
|
|
{ GaussDbThreadMain<STREAMING_QUEUE_BACKEND>, STREAMING_QUEUE_BACKEND, "streamqueue",
|
|
"streaming queue backend" },
|
|
{ GaussDbThreadMain<STREAMING_REAPER_BACKEND>, STREAMING_REAPER_BACKEND, "streamreaper",
|
|
"streaming reaper backend" }
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
};
|
|
|
|
GaussdbThreadEntry GetThreadEntry(knl_thread_role role)
|
|
{
|
|
Assert(role > MASTER_THREAD && role < THREAD_ENTRY_BOUND);
|
|
Assert(GaussdbThreadGate[static_cast<int>(role)].role == role);
|
|
|
|
return GaussdbThreadGate[static_cast<int>(role)].func;
|
|
}
|
|
|
|
const char* GetThreadName(knl_thread_role role)
|
|
{
|
|
Assert(role > MASTER_THREAD && role < THREAD_ENTRY_BOUND);
|
|
Assert(GaussdbThreadGate[static_cast<int>(role)].role == role);
|
|
/* pthread_setname_np requires thread name is no longer than 16 including the ending '\0' */
|
|
Assert(strlen(GaussdbThreadGate[static_cast<int>(role)].thr_name) < 16);
|
|
|
|
return GaussdbThreadGate[static_cast<int>(role)].thr_name;
|
|
}
|
|
|
|
static void* InternalThreadFunc(void* args)
|
|
{
|
|
knl_thread_arg* thr_argv = (knl_thread_arg*)args;
|
|
|
|
gs_thread_exit((GetThreadEntry(thr_argv->role))(thr_argv));
|
|
return (void*)NULL;
|
|
}
|
|
|
|
ThreadId initialize_thread(ThreadArg* thr_argv)
|
|
{
|
|
|
|
gs_thread_t thread;
|
|
int error_code = gs_thread_create(&thread, InternalThreadFunc, 1, (void*)thr_argv);
|
|
if (error_code != 0) {
|
|
ereport(LOG,
|
|
(errmsg("can not fork thread[%s], errcode:%d, %m",
|
|
GetThreadName(thr_argv->m_thd_arg.role), error_code)));
|
|
gs_thread_release_args_slot(thr_argv);
|
|
return InvalidTid;
|
|
}
|
|
|
|
return gs_thread_id(thread);
|
|
}
|
|
|
|
ThreadId initialize_util_thread(knl_thread_role role, void* payload)
|
|
{
|
|
ThreadArg* thr_argv = gs_thread_get_args_slot();
|
|
if (thr_argv == NULL) {
|
|
return 0;
|
|
}
|
|
thr_argv->m_thd_arg.role = role;
|
|
thr_argv->m_thd_arg.payload = payload;
|
|
Port port;
|
|
ThreadId pid;
|
|
errno_t rc;
|
|
/* This entry point passes dummy values for the Port variables */
|
|
rc = memset_s(&port, sizeof(port), 0, sizeof(port));
|
|
securec_check(rc, "", "");
|
|
port.sock = PGINVALID_SOCKET;
|
|
port.SessionVersionNum = pg_atomic_read_u32(&WorkingGrandVersionNum);
|
|
|
|
if (!save_backend_variables((BackendParameters*)(thr_argv->m_thd_arg.save_para), &port)) {
|
|
gs_thread_release_args_slot(thr_argv);
|
|
return 0; /* log made by save_backend_variables */
|
|
}
|
|
|
|
if (role == SYSLOGGER) {
|
|
if (t_thrd.logger.syslogFile != NULL)
|
|
thr_argv->m_thd_arg.extra_payload.log_thread.syslog_handle = fileno(t_thrd.logger.syslogFile);
|
|
else
|
|
thr_argv->m_thd_arg.extra_payload.log_thread.syslog_handle = -1;
|
|
}
|
|
|
|
pid = initialize_thread(thr_argv);
|
|
|
|
if (pid == InvalidTid) {
|
|
/*
|
|
* fork failure is fatal during startup, but there's no need to choke
|
|
* immediately if starting other child types fails.
|
|
*/
|
|
if (role == STARTUP)
|
|
ExitPostmaster(1);
|
|
return 0;
|
|
}
|
|
return pid;
|
|
}
|
|
|
|
ThreadId initialize_worker_thread(knl_thread_role role, Port* port, void* payload)
|
|
{
|
|
ThreadArg* thr_argv = gs_thread_get_args_slot();
|
|
if (thr_argv == NULL) {
|
|
return InvalidTid;
|
|
}
|
|
|
|
thr_argv->m_thd_arg.role = role;
|
|
thr_argv->m_thd_arg.payload = payload;
|
|
|
|
/*
|
|
* We initialize the backend version to be the same as
|
|
* postmaster, which should be the case at most of the time.
|
|
*/
|
|
port->SessionVersionNum = pg_atomic_read_u32(&WorkingGrandVersionNum);
|
|
|
|
if (!save_backend_variables((BackendParameters*)(thr_argv->m_thd_arg.save_para), port)) {
|
|
gs_thread_release_args_slot(thr_argv);
|
|
return InvalidTid; /* log made by save_backend_variables */
|
|
}
|
|
|
|
return initialize_thread(thr_argv);
|
|
}
|
|
|
|
bool isVaildIp(const char* ip)
|
|
{
|
|
int dots = 0;
|
|
int setions = 0;
|
|
|
|
if (NULL == ip || *ip == '.') {
|
|
return false;
|
|
}
|
|
|
|
while (*ip) {
|
|
if (*ip == '.') {
|
|
dots++;
|
|
if (setions >= 0 && setions <= 255) {
|
|
setions = 0;
|
|
ip++;
|
|
continue;
|
|
}
|
|
return false;
|
|
} else if (*ip >= '0' && *ip <= '9') {
|
|
setions = setions * 10 + (*ip - '0');
|
|
} else {
|
|
return false;
|
|
}
|
|
ip++;
|
|
}
|
|
|
|
if (setions >= 0 && setions <= 255) {
|
|
if (dots == 3) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* set disable_conn_primary, deny connection to this node.
|
|
*/
|
|
Datum disable_conn(PG_FUNCTION_ARGS)
|
|
{
|
|
knl_g_disconn_node_context_data disconn_node;
|
|
text* arg0 = (text*)PG_GETARG_DATUM(0);
|
|
text* arg1 = (text*)PG_GETARG_DATUM(1);
|
|
bool redoDone = false;
|
|
int checkTimes = CHECK_TIMES;
|
|
if (arg0 == NULL) {
|
|
ereport(
|
|
ERROR, (errcode(ERRCODE_INVALID_ATTRIBUTE), errmsg("Invalid null pointer attribute for disable_conn()")));
|
|
}
|
|
char* host = NULL;
|
|
|
|
if (getObsReplicationSlot() && IsServerModeStandby() && !XLogArchivingActive()) {
|
|
ereport(ERROR, (errcode(ERRCODE_INVALID_OPERATION),
|
|
errmsg("can not execute in obs recovery mode")));
|
|
}
|
|
|
|
const char* disconn_mode = TextDatumGetCString(arg0);
|
|
if (!superuser()) {
|
|
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
errmsg("must be superuser/sysadmin account to perform disable_conn()")));
|
|
}
|
|
ValidateName(disconn_mode);
|
|
|
|
if (0 == strcmp(disconn_mode, POLLING_CONNECTION_STR)) {
|
|
disconn_node.conn_mode = POLLING_CONNECTION;
|
|
} else if (0 == strcmp(disconn_mode, SPECIFY_CONNECTION_STR)) {
|
|
disconn_node.conn_mode = SPECIFY_CONNECTION;
|
|
} else if (0 == strcmp(disconn_mode, PROHIBIT_CONNECTION_STR)) {
|
|
disconn_node.conn_mode = PROHIBIT_CONNECTION;
|
|
} else {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OPERATION),
|
|
errmsg("Connection mode should be polling_connection or specify_connection or prohibit_connection")));
|
|
}
|
|
|
|
/*
|
|
* Make sure that all xlog has been redo before locking.
|
|
* Sleep 0.5s is an auxiliary way to check whether all xlog has been redone.
|
|
*/
|
|
if (disconn_node.conn_mode == PROHIBIT_CONNECTION) {
|
|
while (checkTimes--) {
|
|
if (knl_g_get_redo_finish_status()) {
|
|
redoDone = true;
|
|
break;
|
|
}
|
|
ereport(LOG, (errmsg("%d redo_done", redoDone)));
|
|
sleep(0.01);
|
|
}
|
|
ereport(LOG, (errmsg("%d redo_done", redoDone)));
|
|
if (!redoDone) {
|
|
g_instance.comm_cxt.localinfo_cxt.need_disable_connection_node = true;
|
|
ereport(ERROR, (errcode_for_file_access(),
|
|
errmsg("could not add lock when DN is not redo all xlog, redo done flag is false")));
|
|
}
|
|
|
|
XLogRecPtr replay1 = GetXLogReplayRecPtrInPending();
|
|
sleep(0.5);
|
|
XLogRecPtr replay2 = GetXLogReplayRecPtrInPending();
|
|
if (replay1 != replay2) {
|
|
ereport(ERROR, (errcode_for_file_access(), errmsg("could not add lock when DN is not redo all xlog.")));
|
|
}
|
|
} else {
|
|
g_instance.comm_cxt.localinfo_cxt.need_disable_connection_node = false;
|
|
}
|
|
|
|
if (disconn_node.conn_mode != SPECIFY_CONNECTION) {
|
|
disconn_node.disable_conn_node_host[0] = '\0';
|
|
} else {
|
|
if (arg1 == NULL) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_ATTRIBUTE), errmsg("Invalid null pointer attribute for disable_conn()")));
|
|
}
|
|
host = TextDatumGetCString(arg1);
|
|
ValidateName(host);
|
|
if (!isVaildIp(host)) {
|
|
ereport(ERROR, (errcode(ERRCODE_INVALID_OPERATION), errmsg("host is invalid")));
|
|
}
|
|
errno_t rc = memcpy_s(disconn_node.disable_conn_node_host, NAMEDATALEN, host, strlen(host) + 1);
|
|
securec_check(rc, "\0", "\0");
|
|
}
|
|
if (disconn_node.conn_mode != SPECIFY_CONNECTION) {
|
|
disconn_node.disable_conn_node_port = 0;
|
|
} else {
|
|
disconn_node.disable_conn_node_port = PG_GETARG_INT32(2);
|
|
}
|
|
int fd;
|
|
fd = BasicOpenFile(disable_conn_file, O_CREAT | O_WRONLY | PG_BINARY, S_IRUSR | S_IWUSR);
|
|
if (fd < 0) {
|
|
ereport(ERROR, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", disable_conn_file)));
|
|
}
|
|
|
|
/*
|
|
* save primary node info on disk.
|
|
*/
|
|
pgstat_report_waitevent(WAIT_EVENT_DISABLE_CONNECT_FILE_WRITE);
|
|
if (write(fd, &disconn_node, sizeof(disconn_node)) != sizeof(disconn_node)) {
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
close(fd);
|
|
ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", disable_conn_file)));
|
|
}
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
|
|
pgstat_report_waitevent(WAIT_EVENT_DISABLE_CONNECT_FILE_SYNC);
|
|
if (pg_fsync(fd) != 0) {
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
close(fd);
|
|
ereport(ERROR, (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", disable_conn_file)));
|
|
}
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
close(fd);
|
|
|
|
SpinLockAcquire(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data = disconn_node;
|
|
SpinLockRelease(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
ereport(LOG, (errcode(ERRCODE_LOG), errmsg("disable_conn set mode to %s", disconn_mode)));
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* return disable_conn_primary info.
|
|
*/
|
|
Datum read_disable_conn_file(PG_FUNCTION_ARGS)
|
|
{
|
|
TupleDesc tupdesc;
|
|
Datum values[6];
|
|
bool nulls[6];
|
|
HeapTuple tuple;
|
|
errno_t rc;
|
|
Datum result;
|
|
char disconn_node_port_str[NAMEDATALEN];
|
|
char* next_key = NULL;
|
|
char* key_position = NULL;
|
|
char local_host[NAMEDATALEN];
|
|
char local_port[NAMEDATALEN];
|
|
char local_info[NAMEDATALEN];
|
|
const int MAX_LOCAL_ADDRESS_LENGTH = 50;
|
|
rc = memset_s(local_info, NAMEDATALEN, 0, NAMEDATALEN);
|
|
securec_check(rc, "\0", "\0");
|
|
if (t_thrd.postmaster_cxt.ReplConnChanged[1] == false || u_sess->attr.attr_storage.ReplConnInfoArr[1] == NULL) {
|
|
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("Can't get local connection address.")));
|
|
}
|
|
rc = memcpy_s(local_info, NAMEDATALEN - 1, u_sess->attr.attr_storage.ReplConnInfoArr[1], MAX_LOCAL_ADDRESS_LENGTH);
|
|
securec_check(rc, "\0", "\0");
|
|
key_position = strtok_s(local_info, " ", &next_key);
|
|
if (key_position == NULL || sscanf_s(key_position, "localhost=%s", local_host, NAMEDATALEN - 1) != 1) {
|
|
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("get local host failed!")));
|
|
}
|
|
|
|
key_position = strtok_s(NULL, " ", &next_key);
|
|
if (key_position == NULL || sscanf_s(key_position, "localport=%s", local_port, NAMEDATALEN - 1) != 1) {
|
|
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("get local port failed!")));
|
|
}
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
|
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("return type must be a row type")));
|
|
|
|
SpinLockAcquire(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
switch (g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.conn_mode) {
|
|
case POLLING_CONNECTION:
|
|
values[0] = CStringGetTextDatum(POLLING_CONNECTION_STR);
|
|
break;
|
|
case SPECIFY_CONNECTION:
|
|
values[0] = CStringGetTextDatum(SPECIFY_CONNECTION_STR);
|
|
break;
|
|
case PROHIBIT_CONNECTION:
|
|
values[0] = CStringGetTextDatum(PROHIBIT_CONNECTION_STR);
|
|
break;
|
|
}
|
|
rc = snprintf_s(disconn_node_port_str,
|
|
sizeof(disconn_node_port_str),
|
|
sizeof(disconn_node_port_str) - 1,
|
|
"%d",
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.disable_conn_node_port);
|
|
securec_check_ss_c(rc, "\0", "\0");
|
|
values[1] = CStringGetTextDatum(
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.disable_conn_node_host);
|
|
values[2] = CStringGetTextDatum(disconn_node_port_str);
|
|
values[3] = CStringGetTextDatum(local_host);
|
|
values[4] = CStringGetTextDatum(local_port);
|
|
if (knl_g_get_redo_finish_status()) {
|
|
values[5] = CStringGetTextDatum("true");
|
|
} else {
|
|
values[5] = CStringGetTextDatum("false");
|
|
}
|
|
SpinLockRelease(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
rc = memset_s(nulls, sizeof(nulls), 0, sizeof(nulls));
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
tuple = heap_form_tuple(tupdesc, values, nulls);
|
|
result = HeapTupleGetDatum(tuple);
|
|
PG_RETURN_DATUM(result);
|
|
}
|
|
|
|
void set_disable_conn_mode()
|
|
{
|
|
int fd = 0;
|
|
size_t cnt = 0;
|
|
knl_g_disconn_node_context_data* disconn_node =
|
|
(knl_g_disconn_node_context_data*)palloc(sizeof(knl_g_disconn_node_context_data));
|
|
|
|
fd = BasicOpenFile(disable_conn_file, O_RDONLY | PG_BINARY, 0);
|
|
|
|
/*
|
|
* We do not need to handle this as we are rename()ing the directory into
|
|
* place only after we fsync()ed the state file.
|
|
*/
|
|
if (fd >= 0) {
|
|
pgstat_report_waitevent(WAIT_EVENT_DISABLE_CONNECT_FILE_SYNC);
|
|
if (pg_fsync(fd) != 0) {
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
close(fd);
|
|
ereport(ERROR, (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", disable_conn_file)));
|
|
}
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
|
|
pgstat_report_waitevent(WAIT_EVENT_DISABLE_CONNECT_FILE_READ);
|
|
|
|
cnt = read(fd, (void*)disconn_node, sizeof(knl_g_disconn_node_context_data));
|
|
|
|
close(fd);
|
|
fd = 0;
|
|
if (cnt != sizeof(knl_g_disconn_node_context_data)) {
|
|
ereport(ERROR, (0, errmsg("cannot read disable connection file: \"%s\" \n", disable_conn_file)));
|
|
}
|
|
|
|
pgstat_report_waitevent(WAIT_EVENT_END);
|
|
|
|
SpinLockAcquire(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.conn_mode = disconn_node->conn_mode;
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.disable_conn_node_port =
|
|
disconn_node->disable_conn_node_port;
|
|
|
|
errno_t rc = memcpy_s(
|
|
(void*)g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.disable_conn_node_host,
|
|
NAMEDATALEN,
|
|
(void*)disconn_node->disable_conn_node_host,
|
|
NAMEDATALEN);
|
|
securec_check(rc, "\0", "\0");
|
|
SpinLockRelease(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
} else {
|
|
SpinLockAcquire(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.conn_mode = POLLING_CONNECTION;
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.disable_conn_node_host[0] = 0;
|
|
g_instance.comm_cxt.localinfo_cxt.disable_conn_node.disable_conn_node_data.disable_conn_node_port = 0;
|
|
SpinLockRelease(&g_instance.comm_cxt.localinfo_cxt.disable_conn_node.info_lck);
|
|
}
|
|
pfree_ext(disconn_node);
|
|
return;
|
|
}
|
|
|
|
static bool NeedHeartbeat()
|
|
{
|
|
if (!(IS_PGXC_DATANODE && g_instance.pid_cxt.HeartbeatPID == 0 &&
|
|
(pmState == PM_RUN || pmState == PM_HOT_STANDBY) &&
|
|
(t_thrd.postmaster_cxt.HaShmData->current_mode == PRIMARY_MODE ||
|
|
t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE)) ||
|
|
dummyStandbyMode) {
|
|
return false;
|
|
}
|
|
|
|
struct replconninfo* replconn = NULL;
|
|
/* at least one replconninfo configures heartbeat port */
|
|
for (int i = 1; i < MAX_REPLNODE_NUM; i++) {
|
|
replconn = t_thrd.postmaster_cxt.ReplConnArray[i];
|
|
if (replconn != NULL && replconn->localheartbeatport != 0 && replconn->remoteheartbeatport != 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Current mode must be get in this function */
|
|
ServerMode GetHaShmemMode(void)
|
|
{
|
|
volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData;
|
|
ServerMode tmpMode;
|
|
Assert(t_thrd.postmaster_cxt.HaShmData != NULL);
|
|
SpinLockAcquire(&hashmdata->mutex);
|
|
tmpMode = hashmdata->current_mode;
|
|
SpinLockRelease(&hashmdata->mutex);
|
|
return tmpMode;
|
|
}
|
|
|
|
void GenerateCancelKey(bool isThreadPoolSession)
|
|
{
|
|
if (isThreadPoolSession) {
|
|
u_sess->cancel_key = gs_random();
|
|
u_sess->cancel_key = (unsigned long)u_sess->cancel_key & ~0x1;
|
|
} else {
|
|
/*
|
|
* Compute the cancel key that will be assigned to this backend. The
|
|
* backend will have its own copy in the forked-off process' value of
|
|
* t_thrd.proc_cxt.MyCancelKey, so that it can transmit the key to the frontend.
|
|
*/
|
|
t_thrd.proc_cxt.MyCancelKey = PostmasterRandom();
|
|
t_thrd.proc_cxt.MyCancelKey = (unsigned long)t_thrd.proc_cxt.MyCancelKey | 0x1;
|
|
}
|
|
}
|