/* ------------------------------------------------------------------------- * * main.c * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 2010-2012 Postgres-XC Development Group * * * IDENTIFICATION * $PostgreSQL$ * * ------------------------------------------------------------------------- */ #include #include #include #include #include #include #include #include #include #include #include #include "gtm/gtm_c.h" #include "gtm/path.h" #include "gtm/gtm.h" #include "gtm/elog.h" #include "gtm/memutils.h" #include "gtm/gtm_list.h" #include "gtm/gtm_seq.h" #include "gtm/standby_utils.h" #include "gtm/gtm_standby.h" #include "gtm/libpq.h" #include "gtm/libpq-fe.h" #include "gtm/libpq-be.h" #include "gtm/pqsignal.h" #include "gtm/pqformat.h" #include "gtm/assert.h" #include "gtm/register.h" #include "gtm/replication.h" #include "gtm/gtm_txn.h" #include "gtm/gtm_seq.h" #include "gtm/gtm_msg.h" #include "gtm/gtm_opt.h" #include "gtm/gtm_utils.h" #include "gtm/gtm_backup.h" extern int optind; extern char* optarg; #define GTM_MAX_PATH 1024 #define GTM_DEFAULT_HOSTNAME "*" #define GTM_DEFAULT_PORT 6666 #define GTM_CONTROL_FILE "gtm.control" #define GTM_PID_FILE "gtm.pid" #define GTM_LOG_FILE "gtm.log" static char* progname = "gtm"; char* ListenAddresses; int GTMPortNumber; char GTMControlFile[GTM_MAX_PATH]; char* GTMDataDir; char* NodeName; bool Backup_synchronously = false; char* active_addr; int active_port; int tcp_keepalives_idle; int tcp_keepalives_interval; int tcp_keepalives_count; char* error_reporter; char* status_reader; bool isStartUp; /* If this is GTM or not */ /* * Used to determine if given Port is in GTM or in GT_Proxy. * If it is in GTM, we should consider to flush GTM_Conn before * writing anything to Port. */ bool isGTM = true; GTM_ThreadID TopMostThreadID; /* The socket(s) we're listening to. */ #define MAXLISTEN 64 static int ListenSocket[MAXLISTEN]; pthread_key_t threadinfo_key; static bool GTMAbortPending = false; static Port* ConnCreate(int serverFd); static int ServerLoop(void); static int initMasks(fd_set* rmask); void* GTM_ThreadMain(void* argp); static int GTMAddConnection(Port* port, GTM_Conn* standby); static int ReadCommand(Port* myport, StringInfo inBuf); static void ProcessCommand(Port* myport, StringInfo input_message); static void ProcessPGXCNodeCommand(Port* myport, GTM_MessageType mtype, StringInfo message); static void ProcessTransactionCommand(Port* myport, GTM_MessageType mtype, StringInfo message); static void ProcessSnapshotCommand(Port* myport, GTM_MessageType mtype, StringInfo message); static void ProcessSequenceCommand(Port* myport, GTM_MessageType mtype, StringInfo message); static void ProcessQueryCommand(Port* myport, GTM_MessageType mtype, StringInfo message); static void GTM_RegisterPGXCNode(Port* myport, char* PGXCNodeName); static bool CreateOptsFile(int argc, char* argv[]); static void CreateDataDirLockFile(void); static void CreateLockFile(const char* filename, const char* refName); static void SetDataDir(void); static void ChangeToDataDir(void); static void checkDataDir(void); static void DeleteLockFile(const char* filename); static void PromoteToActive(void); static void ProcessSyncStandbyCommand(Port* myport, GTM_MessageType mtype, StringInfo message); static void ProcessBarrierCommand(Port* myport, GTM_MessageType mtype, StringInfo message); /* * One-time initialization. It's called immediately after the main process * starts */ static GTM_ThreadInfo* MainThreadInit() { GTM_ThreadInfo* thrinfo; pthread_key_create(&threadinfo_key, NULL); /* * Initialize the lock protecting the global threads info and backup lock info. */ GTM_RWLockInit(>MThreads->gt_lock); GTM_RWLockInit(>m_bkup_lock); /* * We are called even before memory context management is setup. We must * use malloc */ thrinfo = (GTM_ThreadInfo*)malloc(sizeof(GTM_ThreadInfo)); if (thrinfo == NULL) { fprintf(stderr, "malloc failed: %d", errno); fflush(stdout); fflush(stderr); exit(1); } errno_t rc; rc = memset_s(thrinfo, sizeof(GTM_ThreadInfo), 0, sizeof(GTM_ThreadInfo)); securec_check(rc, "\0", "\0"); thrinfo->is_main_thread = true; GTM_RWLockInit(&thrinfo->thr_lock); GTM_RWLockAcquire(&thrinfo->thr_lock, GTM_LOCKMODE_WRITE); if (SetMyThreadInfo(thrinfo)) { fprintf(stderr, "SetMyThreadInfo failed: %d", errno); fflush(stdout); fflush(stderr); exit(1); } TopMostThreadID = pthread_self(); return thrinfo; } static void BaseInit() { GTM_ThreadInfo* thrinfo; int rc; thrinfo = MainThreadInit(); MyThreadID = pthread_self(); MemoryContextInit(); checkDataDir(); SetDataDir(); ChangeToDataDir(); CreateDataDirLockFile(); rc = sprintf_s(GTMControlFile, GTM_MAX_PATH, "%s/%s", GTMDataDir, GTM_CONTROL_FILE); securec_check_ss(rc, "\0", "\0"); if (GTMLogFile == NULL) { GTMLogFile = (char*)malloc(GTM_MAX_PATH); rc = sprintf_s(GTMLogFile, GTM_MAX_PATH, "%s/%s", GTMDataDir, GTM_LOG_FILE); securec_check_ss(rc, "\0", "\0"); } /* Save Node Register File in register.c */ Recovery_SaveRegisterFileName(GTMDataDir); DebugFileOpen(); GTM_InitTxnManager(); GTM_InitSeqManager(); /* * The memory context is now set up. * Add the thrinfo structure in the global array */ if (GTM_ThreadAdd(thrinfo) == -1) { fprintf(stderr, "GTM_ThreadAdd for main thread failed: %d", errno); fflush(stdout); fflush(stderr); } } static void GTM_SigleHandler(int signal) { fprintf(stderr, "Received signal %d\n", signal); switch (signal) { case SIGKILL: case SIGTERM: case SIGQUIT: case SIGINT: case SIGHUP: break; case SIGUSR1: if (Recovery_IsStandby()) PromoteToActive(); return; default: fprintf(stderr, "Unknown signal %d\n", signal); return; } /* * XXX We should do a clean shutdown here. */ /* Rewrite Register Information (clean up unregister records) */ Recovery_SaveRegisterInfo(); /* Delete pid file before shutting down */ DeleteLockFile(GTM_PID_FILE); PG_SETMASK(&BlockSig); GTMAbortPending = true; return; } /* * Help display should match */ static void help(const char* progname) { printf(_("This is the GTM server.\n\n")); printf(_("Usage:\n %s [OPTION]...\n\n"), progname); printf(_("Options:\n")); printf(_(" -h hostname GTM server hostname/IP to listen.\n")); printf(_(" -p port GTM server port number to listen.\n")); printf(_(" -n nodename Node name for GTM server.\n")); printf(_(" -x xid Starting GXID \n")); printf(_(" -D directory GTM working directory\n")); printf(_(" -l filename GTM server log file name \n")); printf(_(" -c show server status, then exit\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" --help show this help, then exit\n")); printf(_("\n")); printf(_("Options for Standby mode:\n")); printf(_(" -s Start as a GTM standby server.\n")); printf(_(" -i hostname Active GTM server hostname/IP to connect.\n")); printf(_(" -q port Active GTM server port number to connect.\n")); printf(_("\n")); } static void gtm_status() { fprintf(stderr, "gtm_status(): must be implemented to scan the shmem.\n"); exit(0); } int main(int argc, char* argv[]) { int opt; int status; int i; GlobalTransactionId next_gxid = InvalidGlobalTransactionId; FILE* ctlf; /* * Local variable to hold command line options. * * if -c option is specified, then only -D option will be taken to locate * GTM data directory. All the other options are ignored. GTM status * will be printed out based on the specified data directory and GTM will * simply exit. If -D option is not specified in this case, current directory * will be used. * * In other case, all the command line options are analyzed. * * They're first analyzed and then -D option are used to locate configuration file. * If -D option is not specified, then the default value will be used. * * Please note that configuration specified in the configuration file (gtm.conf) * will be analyzed first and then will be overridden by the value specified * in command line options. -D and -C options are handled separately and used * to determine configuration file location. * * Please also note that -x option (startup GXID) will be handled in this section. * It has no corresponding configuration from the configuration file. */ bool is_gtm_status = false; /* "false" means no -c option was found */ char* listen_addresses = NULL; char* node_name = NULL; char* port_number = NULL; char* data_dir = NULL; char* log_file = NULL; char* is_standby_mode = NULL; char* dest_addr = NULL; char* dest_port = NULL; isStartUp = true; /* * At first, initialize options. Also moved something from BaseInit() here. */ InitializeGTMOptions(); /* * Catch standard options before doing much else */ if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(argv[0]); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { puts("gtm (Postgres-XC) " PGXC_VERSION); exit(0); } } ListenAddresses = strdup(GTM_DEFAULT_HOSTNAME); if (ListenAddresses == NULL) { write_stderr("out of memory\n"); exit(1); } GTMPortNumber = GTM_DEFAULT_PORT; /* * Parse the command like options and set variables */ while ((opt = getopt(argc, argv, "ch:n:p:x:D:l:si:q:")) != -1) { switch (opt) { case 'c': gtm_status(); break; /* never reach here. */ case 'h': if (listen_addresses) free(listen_addresses); listen_addresses = strdup(optarg); if (listen_addresses == NULL) { write_stderr("out of memory\n"); exit(1); } break; case 'n': if (NodeName) free(NodeName); NodeName = strdup(optarg); if (NodeName == NULL) { write_stderr("out of memory\n"); exit(1); } break; case 'p': if (port_number) free(port_number); port_number = strdup(optarg); if (port_number == NULL) { write_stderr("out of memory\n"); exit(1); } break; case 'x': next_gxid = (GlobalTransactionId)atoll(optarg); break; case 'D': if (data_dir) free(data_dir); data_dir = strdup(optarg); if (data_dir == NULL) { write_stderr("out of memory\n"); exit(1); } canonicalize_path(data_dir); break; case 'l': if (log_file) free(log_file); log_file = strdup(optarg); if (log_file == NULL) { write_stderr("out of memory\n"); exit(1); } break; case 's': if (is_standby_mode) free(is_standby_mode); is_standby_mode = strdup("standby"); if (is_standby_mode == NULL) { write_stderr("out of memory\n"); exit(1); } break; case 'i': if (dest_addr) free(dest_addr); dest_addr = strdup(optarg); if (dest_addr == NULL) { write_stderr("out of memory\n"); exit(1); } break; case 'q': if (dest_port) free(dest_port); dest_port = strdup(optarg); if (dest_port == NULL) { write_stderr("out of memory\n"); exit(1); } break; default: write_stderr("Try \"%s --help\" for more information.\n", progname); } } /* * Handle status, if any */ if (is_gtm_status) gtm_status(); /* Never reach beyond here */ /* * Setup work directory */ if (data_dir) SetConfigOption(GTM_OPTNAME_DATA_DIR, data_dir, GTMC_STARTUP, GTMC_S_OVERRIDE); /* * Setup configuration file */ if (!SelectConfigFiles(data_dir, progname)) exit(1); /* * Parse config file */ ProcessConfigFile(GTMC_STARTUP); /* * Override with command line options */ if (log_file) { SetConfigOption(GTM_OPTNAME_LOG_FILE, log_file, GTMC_STARTUP, GTMC_S_OVERRIDE); free(log_file); log_file = NULL; } if (listen_addresses) { SetConfigOption(GTM_OPTNAME_LISTEN_ADDRESSES, listen_addresses, GTMC_STARTUP, GTMC_S_OVERRIDE); free(listen_addresses); listen_addresses = NULL; } if (node_name) { SetConfigOption(GTM_OPTNAME_NODENAME, node_name, GTMC_STARTUP, GTMC_S_OVERRIDE); free(node_name); node_name = NULL; } if (port_number) { SetConfigOption(GTM_OPTNAME_PORT, port_number, GTMC_STARTUP, GTMC_S_OVERRIDE); free(port_number); port_number = NULL; } if (is_standby_mode) { SetConfigOption(GTM_OPTNAME_STARTUP, is_standby_mode, GTMC_STARTUP, GTMC_S_OVERRIDE); free(is_standby_mode); is_standby_mode = NULL; } if (dest_addr) { SetConfigOption(GTM_OPTNAME_ACTIVE_HOST, dest_addr, GTMC_STARTUP, GTMC_S_OVERRIDE); free(dest_addr); dest_addr = NULL; } if (dest_port) { SetConfigOption(GTM_OPTNAME_ACTIVE_PORT, dest_port, GTMC_STARTUP, GTMC_S_OVERRIDE); free(dest_port); dest_port = NULL; } /* * Check options for the standby mode. */ if (Recovery_IsStandby()) { if (active_addr == NULL || active_port < 1) { help(argv[0]); exit(1); } Recovery_StandbySetConnInfo(active_addr, active_port); } if (GTMDataDir == NULL) { write_stderr("GTM data directory must be specified\n"); write_stderr("Try \"%s --help\" for more information.\n", progname); exit(1); } if (NodeName == NULL || NodeName[0] == 0) { write_stderr("Nodename must be specified\n"); write_stderr("Try \"%s --help\" for more information.\n", progname); exit(1); } if (ListenAddresses == NULL || ListenAddresses[0] == 0) { write_stderr("Listen_addresses must be specified\n"); write_stderr("Try \"%s --help\" for more information.\n", progname); exit(1); } /* * GTM accepts no non-option switch arguments. */ if (optind < argc) { write_stderr("%s: invalid argument: \"%s\"\n", progname, argv[optind]); write_stderr("Try \"%s --help\" for more information.\n", progname); exit(1); } /* * Some basic initialization must happen before we do anything * useful */ BaseInit(); /* * Establish a connection between the active and standby. */ if (Recovery_IsStandby()) { if (!gtm_standby_start_startup()) { elog(ERROR, "Failed to establish a connection to active-GTM."); exit(1); } elog(LOG, "Startup connection established with active-GTM."); } elog(DEBUG1, "Starting GTM server at (%s:%d) -- control file %s", ListenAddresses, GTMPortNumber, GTMControlFile); /* * Read the last GXID and start from there */ if (Recovery_IsStandby()) { if (!gtm_standby_begin_backup()) { elog(ERROR, "Failed to set begin_backup satatus to the active-GTM."); exit(1); } if (!gtm_standby_restore_next_gxid()) { elog(ERROR, "Failed to restore next/last gxid from the active-GTM."); exit(1); } elog(DEBUG1, "Restoring next/last gxid from the active-GTM succeeded."); if (!gtm_standby_restore_gxid()) { elog(ERROR, "Failed to restore all of gxid(s) from the active-GTM."); exit(1); } elog(DEBUG1, "Restoring all of gxid(s) from the active-GTM succeeded."); if (!gtm_standby_restore_sequence()) { elog(ERROR, "Failed to restore sequences from the active-GTM."); exit(1); } elog(DEBUG1, "Restoring sequences from the active-GTM succeeded."); } else { ctlf = fopen(GTMControlFile, "r"); GTM_RestoreTxnInfo(ctlf, next_gxid); GTM_RestoreSeqInfo(ctlf); if (ctlf) fclose(ctlf); } /* Backup the restore point */ GTM_SetNeedBackup(); GTM_WriteRestorePoint(); if (Recovery_IsStandby()) { if (!gtm_standby_register_self(NodeName, GTMPortNumber, GTMDataDir)) { elog(ERROR, "Failed to register myself on the active-GTM as a GTM node."); exit(1); } elog(DEBUG1, "Registering myself to the active-GTM as a GTM node succeeded."); } /* Recover Data of Registered nodes. */ if (Recovery_IsStandby()) { if (!gtm_standby_restore_node()) { elog(ERROR, "Failed to restore node information from the active-GTM."); exit(1); } elog(DEBUG1, "Restoring node information from the active-GTM succeeded."); if (!gtm_standby_end_backup()) { elog(ERROR, "Failed to setup normal standby mode to the active-GTM."); exit(1); } elog(LOG, "Started to run as GTM-Standby."); } else { /* Recover Data of Registered nodes. */ Recovery_RestoreRegisterInfo(); elog(LOG, "Started to run as GTM-Active."); } /* * Establish input sockets. */ for (i = 0; i < MAXLISTEN; i++) ListenSocket[i] = -1; if (ListenAddresses) { int success = 0; if (strcmp(ListenAddresses, "*") == 0) status = StreamServerPort(AF_UNSPEC, NULL, (unsigned short)GTMPortNumber, ListenSocket, MAXLISTEN); else status = StreamServerPort(AF_UNSPEC, ListenAddresses, (unsigned short)GTMPortNumber, ListenSocket, MAXLISTEN); if (status == STATUS_OK) success++; else ereport(FATAL, (errmsg("could not create listen socket for \"%s\"", ListenAddresses))); } /* * check that we have some socket to listen on */ if (ListenSocket[0] == -1) ereport(FATAL, (errmsg("no socket created for listening"))); /* * Record gtm options. We delay this till now to avoid recording * bogus options */ if (!CreateOptsFile(argc, argv)) exit(1); pqsignal(SIGHUP, GTM_SigleHandler); pqsignal(SIGKILL, GTM_SigleHandler); pqsignal(SIGQUIT, GTM_SigleHandler); pqsignal(SIGTERM, GTM_SigleHandler); pqsignal(SIGINT, GTM_SigleHandler); pqsignal(SIGUSR1, GTM_SigleHandler); pqinitmask(); /* * Now, activating a standby GTM... */ if (Recovery_IsStandby()) { if (!gtm_standby_activate_self()) { elog(ERROR, "Failed to update the standby-GTM status as \"CONNECTED\"."); exit(1); } elog(DEBUG1, "Updating the standby-GTM status as \"CONNECTED\" succeeded."); if (!gtm_standby_finish_startup()) { elog(ERROR, "Failed to close the initial connection to the active-GTM."); exit(1); } elog(LOG, "Startup connection with the active-GTM closed."); } /* * Accept any new connections. Fork a new thread for each incoming * connection */ status = ServerLoop(); /* * ServerLoop probably shouldn't ever return, but if it does, close down. */ exit(status != STATUS_OK); return 0; /* not reached */ } /* * ConnCreate -- create a local connection data structure */ static Port* ConnCreate(int serverFd) { Port* port; if (!(port = (Port*)calloc(1, sizeof(Port)))) { ereport(LOG, (ENOMEM, errmsg("out of memory"))); exit(1); } if (StreamConnection(serverFd, port) != STATUS_OK) { if (port->sock >= 0) StreamClose(port->sock); ConnFree(port); port = NULL; } port->conn_id = InvalidGTMProxyConnID; return port; } /* * ConnFree -- free a local connection data structure */ void ConnFree(Port* conn) { free(conn); } /* * Main idle loop of postmaster */ static int ServerLoop(void) { fd_set readmask; int nSockets; errno_t rc; nSockets = initMasks(&readmask); for (;;) { fd_set rmask; int selres; // MemoryContextStats(TopMostMemoryContext); /* * Wait for a connection request to arrive. * * We wait at most one minute, to ensure that the other background * tasks handled below get done even when no requests are arriving. */ rc = memcpy_s((char*)&rmask, sizeof(fd_set), (char*)&readmask, sizeof(fd_set)); securec_check(rc, "\0", "\0"); PG_SETMASK(&UnBlockSig); if (GTMAbortPending) { FILE* ctlf; /* * XXX We should do a clean shutdown here. For the time being, just * write the next GXID to be issued in the control file and exit * gracefully */ elog(LOG, "GTM shutting down."); /* * Tell GTM that we are shutting down so that no new GXIDs are * issued this point onwards */ GTM_SetShuttingDown(); ctlf = fopen(GTMControlFile, "w"); if (ctlf == NULL) { fprintf(stderr, "Failed to create/open the control file\n"); exit(2); } GTM_SaveTxnInfo(ctlf); GTM_SaveSeqInfo(ctlf); fclose(ctlf); exit(1); } { /* must set timeout each time; some OSes change it! */ struct timeval timeout; GTM_ThreadInfo* my_threadinfo = GetMyThreadInfo; timeout.tv_sec = 60; timeout.tv_usec = 0; /* * Now GTM-Standby can backup current status during this region */ GTM_RWLockRelease(&my_threadinfo->thr_lock); selres = select(nSockets, &rmask, NULL, NULL, &timeout); /* * Prohibit GTM-Standby backup from here. */ GTM_RWLockAcquire(&my_threadinfo->thr_lock, GTM_LOCKMODE_WRITE); } /* * Block all signals until we wait again. (This makes it safe for our * signal handlers to do nontrivial work.) */ PG_SETMASK(&BlockSig); /* Now check the select() result */ if (selres < 0) { if (errno != EINTR && errno != EWOULDBLOCK) { ereport(LOG, (EACCES, errmsg("select() failed in postmaster: %m"))); return STATUS_ERROR; } } /* * New connection pending on any of our sockets? If so, fork a child * process to deal with it. */ if (selres > 0) { int i; for (i = 0; i < MAXLISTEN; i++) { if (ListenSocket[i] == -1) break; if (FD_ISSET(ListenSocket[i], &rmask)) { Port* port; port = ConnCreate(ListenSocket[i]); if (port) { GTM_Conn* standby = NULL; standby = gtm_standby_connect_to_standby(); if (GTMAddConnection(port, standby) != STATUS_OK) { elog(ERROR, "Too many connections"); gtm_standby_disconnect_from_standby(standby); StreamClose(port->sock); ConnFree(port); } } } } } } } /* * Initialise the masks for select() for the ports we are listening on. * Return the number of sockets to listen on. */ static int initMasks(fd_set* rmask) { int maxsock = -1; int i; FD_ZERO(rmask); for (i = 0; i < MAXLISTEN; i++) { int fd = ListenSocket[i]; if (fd == -1) break; FD_SET(fd, rmask); if (fd > maxsock) maxsock = fd; } return maxsock + 1; } void* GTM_ThreadMain(void* argp) { GTM_ThreadInfo* thrinfo = (GTM_ThreadInfo*)argp; int qtype; StringInfoData input_message; sigjmp_buf local_sigjmp_buf; elog(DEBUG3, "Starting the connection helper thread"); /* * Create the memory context we will use in the main loop. * * MessageContext is reset once per iteration of the main loop, ie, upon * completion of processing of each command message from the client. * * This context is thread-specific */ MessageContext = AllocSetContextCreate(TopMemoryContext, "MessageContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, false); /* * Acquire the thread lock to prevent connection from GTM-Standby to update * GTM-Standby registration. */ GTM_RWLockAcquire(&thrinfo->thr_lock, GTM_LOCKMODE_WRITE); { /* * We expect a startup message at the very start. The message type is * REGISTER_COORD, followed by the 4 byte Coordinator ID */ char startup_type; GTM_StartupPacket sp; StringInfoData inBuf; errno_t rc; startup_type = pq_getbyte(thrinfo->thr_conn->con_port); if (startup_type != 'A') { ereport(ERROR, (EPROTO, errmsg("Expecting a startup message, but received %c", startup_type))); } initStringInfo(&inBuf); /* * All frontend messages have a length word next * after the type code; we can read the message contents independently of * the type. */ if (pq_getmessage(thrinfo->thr_conn->con_port, &inBuf, 0)) { ereport(ERROR, (EPROTO, errmsg("Expecting coordinator ID, but received EOF"))); } rc = memcpy_s(&sp, sizeof(GTM_StartupPacket), pq_getmsgbytes(&inBuf, sizeof(GTM_StartupPacket)), sizeof(GTM_StartupPacket)); securec_check(rc, "\0", "\0"); pq_getmsgend(&inBuf); GTM_RegisterPGXCNode(thrinfo->thr_conn->con_port, sp.sp_node_name); thrinfo->thr_conn->con_port->remote_type = sp.sp_remotetype; thrinfo->thr_conn->con_port->is_postmaster = sp.sp_ispostmaster; } { /* * Send a dummy authentication request message 'R' as the client * expects that in the current protocol */ StringInfoData buf; pq_beginmessage(&buf, 'R'); pq_endmessage(thrinfo->thr_conn->con_port, &buf); pq_flush(thrinfo->thr_conn->con_port); elog(DEBUG3, "Sent connection authentication message to the client"); } /* * Get the input_message in the TopMemoryContext so that we don't need to * free/palloc it for every incoming message. Unlike Postgres, we don't * expect the incoming messages to be of arbitrary sizes */ initStringInfo(&input_message); /* * POSTGRES main processing loop begins here * * If an exception is encountered, processing resumes here so we abort the * current transaction and start a new one. * * You might wonder why this isn't coded as an infinite loop around a * PG_TRY construct. The reason is that this is the bottom of the * exception stack, and so with PG_TRY there would be no exception handler * in force at all during the CATCH part. By leaving the outermost setjmp * always active, we have at least some chance of recovering from an error * during error recovery. (If we get into an infinite loop thereby, it * will soon be stopped by overflow of elog.c's internal state stack.) */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* * NOTE: if you are tempted to add more code in this if-block, * consider the high probability that it should be in * AbortTransaction() instead. The only stuff done directly here * should be stuff that is guaranteed to apply *only* for outer-level * error recovery, such as adjusting the FE/BE protocol status. */ /* Report the error to the client and/or server log */ if (thrinfo->thr_conn) EmitErrorReport(thrinfo->thr_conn->con_port); else EmitErrorReport(NULL); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; for (;;) { /* * Release storage left over from prior query cycle, and create a new * query input buffer in the cleared MessageContext. */ MemoryContextSwitchTo(MessageContext); MemoryContextResetAndDeleteChildren(MessageContext); /* * Just reset the input buffer to avoid repeated palloc/pfrees * * XXX We should consider resetting the MessageContext periodically to * handle any memory leaks */ resetStringInfo(&input_message); /* * GTM-Standby registration information can be updated during ReadCommand * operation. */ GTM_RWLockRelease(&thrinfo->thr_lock); /* * (3) read a command (loop blocks here) */ qtype = ReadCommand(thrinfo->thr_conn->con_port, &input_message); GTM_RWLockAcquire(&thrinfo->thr_lock, GTM_LOCKMODE_WRITE); /* * Check if GTM Standby info is upadted * Maybe the following lines can be a separate function. At present, this is done only here so * I'll leave them here. K.Suzuki, Nov.29, 2011 * Please note that we don't check if it is not in the standby mode to allow cascased standby. */ if (GTMThreads->gt_standby_ready && thrinfo->thr_conn->standby == NULL) { /* Connect to GTM-Standby */ thrinfo->thr_conn->standby = gtm_standby_connect_to_standby(); if (thrinfo->thr_conn->standby == NULL) GTMThreads->gt_standby_ready = false; /* This will make other threads to disconnect from * the standby, if needed.*/ } else if (GTMThreads->gt_standby_ready == false && thrinfo->thr_conn->standby) { /* Disconnect from GTM-Standby */ gtm_standby_disconnect_from_standby(thrinfo->thr_conn->standby); thrinfo->thr_conn->standby = NULL; } switch (qtype) { case 'C': ProcessCommand(thrinfo->thr_conn->con_port, &input_message); break; case 'X': case EOF: /* * Connection termination request * Remove all transactions opened within the thread */ GTM_RemoveAllTransInfos(-1); /* Disconnect node if necessary */ Recovery_PGXCNodeDisconnect(thrinfo->thr_conn->con_port); GTM_RWLockRelease(&thrinfo->thr_lock); pthread_exit(thrinfo); break; case 'F': /* * Flush all the outgoing data on the wire. Consume the message * type field for sanity */ /* Sync with standby first */ if (thrinfo->thr_conn->standby) { if (Backup_synchronously) gtm_sync_standby(thrinfo->thr_conn->standby); else gtmpqFlush(thrinfo->thr_conn->standby); } pq_getmsgint(&input_message, sizeof(GTM_MessageType)); pq_getmsgend(&input_message); pq_flush(thrinfo->thr_conn->con_port); break; default: /* * Remove all transactions opened within the thread */ GTM_RemoveAllTransInfos(-1); /* Disconnect node if necessary */ Recovery_PGXCNodeDisconnect(thrinfo->thr_conn->con_port); ereport(FATAL, (EPROTO, errmsg("invalid frontend message type %d", qtype))); break; } } /* can't get here because the above loop never exits */ Assert(false); return thrinfo; } void ProcessCommand(Port* myport, StringInfo input_message) { GTM_MessageType mtype; GTM_ProxyMsgHeader proxyhdr; if (myport->remote_type == GTM_NODE_GTM_PROXY) pq_copymsgbytes(input_message, (char*)&proxyhdr, sizeof(GTM_ProxyMsgHeader)); else proxyhdr.ph_conid = InvalidGTMProxyConnID; myport->conn_id = proxyhdr.ph_conid; mtype = pq_getmsgint(input_message, sizeof(GTM_MessageType)); /* * The next line will have some overhead. Better to be in * compile option. */ #ifdef GTM_DEBUG elog(DEBUG3, "mtype = %s (%d).", gtm_util_message_name(mtype), (int)mtype); #endif switch (mtype) { case MSG_SYNC_STANDBY: ProcessSyncStandbyCommand(myport, mtype, input_message); break; case MSG_NODE_REGISTER: case MSG_BKUP_NODE_REGISTER: case MSG_NODE_UNREGISTER: case MSG_BKUP_NODE_UNREGISTER: case MSG_NODE_LIST: ProcessPGXCNodeCommand(myport, mtype, input_message); break; case MSG_BEGIN_BACKUP: ProcessGTMBeginBackup(myport, input_message); break; case MSG_END_BACKUP: ProcessGTMEndBackup(myport, input_message); break; case MSG_NODE_BEGIN_REPLICATION_INIT: case MSG_NODE_END_REPLICATION_INIT: case MSG_TXN_BEGIN: case MSG_BKUP_TXN_BEGIN: case MSG_TXN_BEGIN_GETGXID: case MSG_BKUP_TXN_BEGIN_GETGXID: case MSG_TXN_BEGIN_GETGXID_AUTOVACUUM: case MSG_BKUP_TXN_BEGIN_GETGXID_AUTOVACUUM: case MSG_TXN_PREPARE: case MSG_BKUP_TXN_PREPARE: case MSG_TXN_START_PREPARED: case MSG_BKUP_TXN_START_PREPARED: case MSG_TXN_COMMIT: case MSG_BKUP_TXN_COMMIT: case MSG_TXN_COMMIT_PREPARED: case MSG_BKUP_TXN_COMMIT_PREPARED: case MSG_TXN_ROLLBACK: case MSG_BKUP_TXN_ROLLBACK: case MSG_TXN_GET_GXID: case MSG_BKUP_TXN_GET_GXID: case MSG_TXN_BEGIN_GETGXID_MULTI: case MSG_BKUP_TXN_BEGIN_GETGXID_MULTI: case MSG_TXN_COMMIT_MULTI: case MSG_BKUP_TXN_COMMIT_MULTI: case MSG_TXN_ROLLBACK_MULTI: case MSG_BKUP_TXN_ROLLBACK_MULTI: case MSG_TXN_GET_GID_DATA: case MSG_TXN_GET_NEXT_GXID: case MSG_TXN_GXID_LIST: ProcessTransactionCommand(myport, mtype, input_message); break; case MSG_SNAPSHOT_GET: case MSG_SNAPSHOT_GXID_GET: case MSG_SNAPSHOT_GET_MULTI: ProcessSnapshotCommand(myport, mtype, input_message); break; case MSG_SEQUENCE_INIT: case MSG_BKUP_SEQUENCE_INIT: case MSG_SEQUENCE_GET_NEXT: case MSG_BKUP_SEQUENCE_GET_NEXT: case MSG_SEQUENCE_GET_LAST: case MSG_SEQUENCE_SET_VAL: case MSG_BKUP_SEQUENCE_SET_VAL: case MSG_SEQUENCE_RESET: case MSG_BKUP_SEQUENCE_RESET: case MSG_SEQUENCE_CLOSE: case MSG_BKUP_SEQUENCE_CLOSE: case MSG_SEQUENCE_RENAME: case MSG_BKUP_SEQUENCE_RENAME: case MSG_SEQUENCE_ALTER: case MSG_BKUP_SEQUENCE_ALTER: case MSG_SEQUENCE_LIST: ProcessSequenceCommand(myport, mtype, input_message); break; case MSG_TXN_GET_STATUS: case MSG_TXN_GET_ALL_PREPARED: ProcessQueryCommand(myport, mtype, input_message); break; case MSG_BARRIER: case MSG_BKUP_BARRIER: ProcessBarrierCommand(myport, mtype, input_message); break; case MSG_BACKEND_DISCONNECT: GTM_RemoveAllTransInfos(proxyhdr.ph_conid); /* Mark PGXC Node as disconnected if backend disconnected is postmaster */ ProcessPGXCNodeBackendDisconnect(myport, input_message); break; default: ereport(FATAL, (EPROTO, errmsg("invalid frontend message type %d", mtype))); } if (GTM_NeedBackup()) GTM_WriteRestorePoint(); } static int GTMAddConnection(Port* port, GTM_Conn* standby) { GTM_ConnectionInfo* conninfo = NULL; errno_t rc; conninfo = (GTM_ConnectionInfo*)palloc(sizeof(GTM_ConnectionInfo)); rc = memset_s(conninfo, sizeof(GTM_ConnectionInfo), 0, sizeof(GTM_ConnectionInfo)); securec_check(rc, "\0", "\0"); if (conninfo == NULL) { ereport(ERROR, (ENOMEM, errmsg("Out of memory"))); return STATUS_ERROR; } elog(DEBUG3, "Started new connection"); conninfo->con_port = port; /* * Add a connection to the standby. */ if (standby != NULL) conninfo->standby = standby; /* * XXX Start the thread */ if (GTM_ThreadCreate(conninfo, GTM_ThreadMain) == NULL) { elog(ERROR, "failed to create a new thread"); return STATUS_ERROR; } return STATUS_OK; } /* ---------------- * ReadCommand reads a command from either the frontend or * standard input, places it in inBuf, and returns the * message type code (first byte of the message). * EOF is returned if end of file. * ---------------- */ static int ReadCommand(Port* myport, StringInfo inBuf) { int qtype; /* * Get message type code from the frontend. */ qtype = pq_getbyte(myport); /* frontend disconnected */ if (qtype == EOF) { ereport(COMMERROR, (EPROTO, errmsg("unexpected EOF on client connection"))); return EOF; } /* * Validate message type code before trying to read body; if we have lost * sync, better to say "command unknown" than to run out of memory because * we used garbage as a length word. * * This also gives us a place to set the doing_extended_query_message flag * as soon as possible. */ switch (qtype) { case 'C': break; case 'X': break; case 'F': break; default: /* * Otherwise we got garbage from the frontend. We treat this as * fatal because we have probably lost message boundary sync, and * there's no good way to recover. */ ereport(ERROR, (EPROTO, errmsg("invalid frontend message type %d", qtype))); break; } /* * In protocol version 3, all frontend messages have a length word next * after the type code; we can read the message contents independently of * the type. */ if (pq_getmessage(myport, inBuf, 0)) return EOF; /* suitable message already logged */ return qtype; } /* * Process MSG_SYNC_STANDBY message */ static void ProcessSyncStandbyCommand(Port* myport, GTM_MessageType mtype, StringInfo message) { StringInfoData buf; pq_getmsgend(message); pq_beginmessage(&buf, 'S'); pq_sendint(&buf, SYNC_STANDBY_RESULT, 4); pq_endmessage(myport, &buf); /* Sync standby first */ if (GetMyThreadInfo->thr_conn->standby) gtm_sync_standby(GetMyThreadInfo->thr_conn->standby); pq_flush(myport); } static void ProcessPGXCNodeCommand(Port* myport, GTM_MessageType mtype, StringInfo message) { switch (mtype) { case MSG_NODE_REGISTER: ProcessPGXCNodeRegister(myport, message, false); break; case MSG_BKUP_NODE_REGISTER: ProcessPGXCNodeRegister(myport, message, true); break; case MSG_NODE_UNREGISTER: ProcessPGXCNodeUnregister(myport, message, false); break; case MSG_BKUP_NODE_UNREGISTER: ProcessPGXCNodeUnregister(myport, message, true); break; case MSG_NODE_LIST: ProcessPGXCNodeList(myport, message); break; default: Assert(0); /* Shouldn't come here.. keep compiler quite */ } } static void ProcessTransactionCommand(Port* myport, GTM_MessageType mtype, StringInfo message) { elog(DEBUG1, "ProcessTransactionCommand: mtype:%d", mtype); switch (mtype) { case MSG_NODE_BEGIN_REPLICATION_INIT: ProcessBeginReplicationInitialSyncRequest(myport, message); break; case MSG_NODE_END_REPLICATION_INIT: ProcessEndReplicationInitialSyncRequest(myport, message); break; case MSG_TXN_BEGIN: ProcessBeginTransactionCommand(myport, message); break; case MSG_BKUP_TXN_BEGIN: ProcessBkupBeginTransactionCommand(myport, message); break; case MSG_TXN_BEGIN_GETGXID: ProcessBeginTransactionGetGXIDCommand(myport, message); break; case MSG_BKUP_TXN_BEGIN_GETGXID: ProcessBkupBeginTransactionGetGXIDCommand(myport, message); break; case MSG_TXN_BEGIN_GETGXID_AUTOVACUUM: ProcessBeginTransactionGetGXIDAutovacuumCommand(myport, message); break; case MSG_BKUP_TXN_BEGIN_GETGXID_AUTOVACUUM: ProcessBkupBeginTransactionGetGXIDAutovacuumCommand(myport, message); break; case MSG_TXN_BEGIN_GETGXID_MULTI: ProcessBeginTransactionGetGXIDCommandMulti(myport, message); break; case MSG_BKUP_TXN_BEGIN_GETGXID_MULTI: ProcessBkupBeginTransactionGetGXIDCommandMulti(myport, message); break; case MSG_TXN_START_PREPARED: ProcessStartPreparedTransactionCommand(myport, message, false); break; case MSG_BKUP_TXN_START_PREPARED: ProcessStartPreparedTransactionCommand(myport, message, true); break; case MSG_TXN_PREPARE: ProcessPrepareTransactionCommand(myport, message, false); break; case MSG_BKUP_TXN_PREPARE: ProcessPrepareTransactionCommand(myport, message, true); break; case MSG_TXN_COMMIT: ProcessCommitTransactionCommand(myport, message, false); break; case MSG_BKUP_TXN_COMMIT: ProcessCommitTransactionCommand(myport, message, true); break; case MSG_TXN_COMMIT_PREPARED: ProcessCommitPreparedTransactionCommand(myport, message, false); break; case MSG_BKUP_TXN_COMMIT_PREPARED: ProcessCommitPreparedTransactionCommand(myport, message, true); break; case MSG_TXN_ROLLBACK: ProcessRollbackTransactionCommand(myport, message, false); break; case MSG_BKUP_TXN_ROLLBACK: ProcessRollbackTransactionCommand(myport, message, true); break; case MSG_TXN_COMMIT_MULTI: ProcessCommitTransactionCommandMulti(myport, message, false); break; case MSG_BKUP_TXN_COMMIT_MULTI: ProcessCommitTransactionCommandMulti(myport, message, true); break; case MSG_TXN_ROLLBACK_MULTI: ProcessRollbackTransactionCommandMulti(myport, message, false); break; case MSG_BKUP_TXN_ROLLBACK_MULTI: ProcessRollbackTransactionCommandMulti(myport, message, true); break; case MSG_TXN_GET_GXID: /* * Notice: we don't have corresponding functions in gtm_client.c * * Because this function is not used, GTM-standby extension is not * included in this function. */ ProcessGetGXIDTransactionCommand(myport, message); break; case MSG_TXN_GET_GID_DATA: ProcessGetGIDDataTransactionCommand(myport, message); break; case MSG_TXN_GET_NEXT_GXID: ProcessGetNextGXIDTransactionCommand(myport, message); break; case MSG_TXN_GXID_LIST: ProcessGXIDListCommand(myport, message); break; default: Assert(0); /* Shouldn't come here.. keep compiler quite */ } } static void ProcessSnapshotCommand(Port* myport, GTM_MessageType mtype, StringInfo message) { switch (mtype) { case MSG_SNAPSHOT_GET: ProcessGetSnapshotCommand(myport, message, false); break; case MSG_SNAPSHOT_GET_MULTI: ProcessGetSnapshotCommandMulti(myport, message); break; case MSG_SNAPSHOT_GXID_GET: ProcessGetSnapshotCommand(myport, message, true); break; default: Assert(0); /* Shouldn't come here.. keep compiler quite */ } } static void ProcessSequenceCommand(Port* myport, GTM_MessageType mtype, StringInfo message) { switch (mtype) { case MSG_SEQUENCE_INIT: ProcessSequenceInitCommand(myport, message, false); break; case MSG_BKUP_SEQUENCE_INIT: ProcessSequenceInitCommand(myport, message, true); break; case MSG_SEQUENCE_ALTER: ProcessSequenceAlterCommand(myport, message, false); break; case MSG_BKUP_SEQUENCE_ALTER: ProcessSequenceAlterCommand(myport, message, true); break; case MSG_SEQUENCE_GET_NEXT: ProcessSequenceGetNextCommand(myport, message, false); break; case MSG_BKUP_SEQUENCE_GET_NEXT: ProcessSequenceGetNextCommand(myport, message, true); break; case MSG_SEQUENCE_SET_VAL: ProcessSequenceSetValCommand(myport, message, false); break; case MSG_BKUP_SEQUENCE_SET_VAL: ProcessSequenceSetValCommand(myport, message, true); break; case MSG_SEQUENCE_RESET: ProcessSequenceResetCommand(myport, message, false); break; case MSG_BKUP_SEQUENCE_RESET: ProcessSequenceResetCommand(myport, message, true); break; case MSG_SEQUENCE_CLOSE: ProcessSequenceCloseCommand(myport, message, false); break; case MSG_BKUP_SEQUENCE_CLOSE: ProcessSequenceCloseCommand(myport, message, true); break; case MSG_SEQUENCE_RENAME: ProcessSequenceRenameCommand(myport, message, false); break; case MSG_BKUP_SEQUENCE_RENAME: ProcessSequenceRenameCommand(myport, message, true); break; case MSG_SEQUENCE_LIST: ProcessSequenceListCommand(myport, message); break; default: Assert(0); /* Shouldn't come here.. keep compiler quite */ } } static void ProcessQueryCommand(Port* myport, GTM_MessageType mtype, StringInfo message) { switch (mtype) { case MSG_TXN_GET_STATUS: case MSG_TXN_GET_ALL_PREPARED: break; default: Assert(0); /* Shouldn't come here.. keep compiler quite */ } } static void GTM_RegisterPGXCNode(Port* myport, char* PGXCNodeName) { elog(DEBUG3, "Registering coordinator with name %s", PGXCNodeName); myport->node_name = strdup(PGXCNodeName); } /* * Validate the proposed data directory */ static void checkDataDir(void) { struct stat stat_buf; Assert(GTMDataDir); retry: if (stat(GTMDataDir, &stat_buf) != 0) { if (errno == ENOENT) { if (mkdir(GTMDataDir, 0700) != 0) { ereport(FATAL, (errno, errmsg("failed to create the directory \"%s\"", GTMDataDir))); } goto retry; } else { ereport(FATAL, (EPERM, errmsg("could not read permissions of directory \"%s\": %m", GTMDataDir))); } } /* eventual chdir would fail anyway, but let's test ... */ if (!S_ISDIR(stat_buf.st_mode)) { ereport(FATAL, (EINVAL, errmsg("specified data directory \"%s\" is not a directory", GTMDataDir))); } /* * Check that the directory belongs to my userid; if not, reject. * * This check is an essential part of the interlock that prevents two * postmasters from starting in the same directory (see CreateLockFile()). * Do not remove or weaken it. * * XXX can we safely enable this check on Windows? */ #if !defined(WIN32) && !defined(__CYGWIN__) if (stat_buf.st_uid != geteuid()) { ereport(FATAL, (EINVAL, errmsg("data directory \"%s\" has wrong ownership", GTMDataDir), errhint("The server must be started by the user that owns the data directory."))); } #endif } /* * Set data directory, but make sure it's an absolute path. Use this, * never set DataDir directly. */ void SetDataDir() { char* new; /* If presented path is relative, convert to absolute */ new = make_absolute_path(GTMDataDir); if (!new) { ereport(FATAL, (errno, errmsg("failed to set the data directory \"%s\"", GTMDataDir))); } if (GTMDataDir) free(GTMDataDir); GTMDataDir = new; } /* * Change working directory to DataDir. Most of the postmaster and backend * code assumes that we are in DataDir so it can use relative paths to access * stuff in and under the data directory. For convenience during path * setup, however, we don't force the chdir to occur during SetDataDir. */ static void ChangeToDataDir(void) { if (chdir(GTMDataDir) < 0) ereport(FATAL, (EINVAL, errmsg("could not change directory to \"%s\": %m", GTMDataDir))); } /* * Create the data directory lockfile. * * When this is called, we must have already switched the working * directory to DataDir, so we can just use a relative path. This * helps ensure that we are locking the directory we should be. */ static void CreateDataDirLockFile() { CreateLockFile(GTM_PID_FILE, GTMDataDir); } /* * Create a lockfile. * * filename is the name of the lockfile to create. * amPostmaster is used to determine how to encode the output PID. * isDDLock and refName are used to determine what error message to produce. */ static void CreateLockFile(const char* filename, const char* refName) { int fd; char buffer[MAXPGPATH + 100]; int ntries; int len; int encoded_pid; pid_t other_pid; pid_t my_pid = getpid(); /* * We need a loop here because of race conditions. But don't loop forever * (for example, a non-writable $PGDATA directory might cause a failure * that won't go away). 100 tries seems like plenty. */ for (ntries = 0;; ntries++) { /* * Try to create the lock file --- O_EXCL makes this atomic. * * Think not to make the file protection weaker than 0600. See * comments below. */ fd = open(filename, O_RDWR | O_CREAT | O_EXCL, 0600); if (fd >= 0) break; /* Success; exit the retry loop */ /* * Couldn't create the pid file. Probably it already exists. */ if ((errno != EEXIST && errno != EACCES) || ntries > 100) { ereport(FATAL, (EINVAL, errmsg("could not create lock file \"%s\": %m", filename))); } /* * Read the file to get the old owner's PID. Note race condition * here: file might have been deleted since we tried to create it. */ fd = open(filename, O_RDONLY, 0600); if (fd < 0) { if (errno == ENOENT) continue; /* race condition; try again */ ereport(FATAL, (EINVAL, errmsg("could not open lock file \"%s\": %m", filename))); } if ((len = read(fd, buffer, sizeof(buffer) - 1)) < 0) { ereport(FATAL, (EINVAL, errmsg("could not read lock file \"%s\": %m", filename))); } close(fd); buffer[len] = '\0'; encoded_pid = atoi(buffer); other_pid = (pid_t)encoded_pid; if (other_pid <= 0) { elog(FATAL, "bogus data in lock file \"%s\": \"%s\"", filename, buffer); } /* * Check to see if the other process still exists * * If the PID in the lockfile is our own PID or our parent's PID, then * the file must be stale (probably left over from a previous system * boot cycle). We need this test because of the likelihood that a * reboot will assign exactly the same PID as we had in the previous * reboot. Also, if there is just one more process launch in this * reboot than in the previous one, the lockfile might mention our * parent's PID. We can reject that since we'd never be launched * directly by a competing postmaster. We can't detect grandparent * processes unfortunately, but if the init script is written * carefully then all but the immediate parent shell will be * root-owned processes and so the kill test will fail with EPERM. * * We can treat the EPERM-error case as okay because that error * implies that the existing process has a different userid than we * do, which means it cannot be a competing postmaster. A postmaster * cannot successfully attach to a data directory owned by a userid * other than its own. (This is now checked directly in * checkDataDir(), but has been true for a long time because of the * restriction that the data directory isn't group- or * world-accessible.) Also, since we create the lockfiles mode 600, * we'd have failed above if the lockfile belonged to another userid * --- which means that whatever process kill() is reporting about * isn't the one that made the lockfile. (NOTE: this last * consideration is the only one that keeps us from blowing away a * Unix socket file belonging to an instance of Postgres being run by * someone else, at least on machines where /tmp hasn't got a * stickybit.) * * Windows hasn't got getppid(), but doesn't need it since it's not * using real kill() either... * * Normally kill() will fail with ESRCH if the given PID doesn't * exist. */ if (other_pid != my_pid #ifndef WIN32 && other_pid != getppid() #endif ) { if (kill(other_pid, 0) == 0 || (errno != ESRCH && errno != EPERM)) { /* lockfile belongs to a live process */ ereport(FATAL, (EINVAL, errmsg("lock file \"%s\" already exists", filename), errhint("Is another GTM (PID %d) running in data directory \"%s\"?", (int)other_pid, refName))); } } /* * Looks like nobody's home. Unlink the file and try again to create * it. Need a loop because of possible race condition against other * would-be creators. */ if (unlink(filename) < 0) { ereport(FATAL, (EACCES, errmsg("could not remove old lock file \"%s\": %m", filename), errhint("The file seems accidentally left over, but " "it could not be removed. Please remove the file " "by hand and try again."))); } } /* * Successfully created the file, now fill it. */ int rc; rc = snprintf_s(buffer, sizeof(buffer), sizeof(buffer) - 1, "%d\n%s\n%d\n", (int)my_pid, GTMDataDir, (Recovery_IsStandby() ? 0 : 1)); securec_check_ss(rc, "\0", "\0"); errno = 0; if (write(fd, buffer, strlen(buffer)) != strlen(buffer)) { int save_errno = errno; close(fd); unlink(filename); /* if write didn't set errno, assume problem is no disk space */ errno = save_errno ? save_errno : ENOSPC; ereport(FATAL, (EACCES, errmsg("could not write lock file \"%s\": %m", filename))); } if (close(fd)) { int save_errno = errno; unlink(filename); errno = save_errno; ereport(FATAL, (EACCES, errmsg("could not write lock file \"%s\": %m", filename))); } } /* * Create the opts file */ static bool CreateOptsFile(int argc, char* argv[]) { FILE* fp; int i; #define OPTS_FILE "gtm.opts" if ((fp = fopen(OPTS_FILE, "w")) == NULL) { elog(LOG, "could not create file \"%s\": %m", OPTS_FILE); return false; } for (i = 1; i < argc; i++) fprintf(fp, " \"%s\"", argv[i]); fputs("\n", fp); if (fclose(fp)) { elog(LOG, "could not write file \"%s\": %m", OPTS_FILE); return false; } return true; } /* delete pid file */ static void DeleteLockFile(const char* filename) { if (unlink(filename) < 0) { ereport(FATAL, (EACCES, errmsg("could not remove old lock file \"%s\": %m", filename), errhint("The file seems accidentally left over, but " "it could not be removed. Please remove the file " "by hand and try again."))); } } static void PromoteToActive(void) { const char* conf_file; FILE* fp; elog(LOG, "Promote signal received. Becoming an active..."); /* * Do promoting things here. */ Recovery_StandbySetStandby(false); CreateDataDirLockFile(); /* * Update the GTM config file for the next restart.. */ conf_file = GetConfigOption("config_file", true); elog(DEBUG1, "Config file is %s...", conf_file); if ((fp = fopen(conf_file, PG_BINARY_A)) == NULL) { ereport(FATAL, (EINVAL, errmsg("could not open GTM configuration file \"%s\": %m", conf_file))); } else { time_t stamp_time = (time_t)time(NULL); char strfbuf[128]; strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", localtime(&stamp_time)); fprintf(fp, "#===================================================\n" "# Updated due to GTM promote request\n" "# %s\nstartup = ACT\n" "#===================================================\n", strfbuf); if (fclose(fp)) { ereport(FATAL, (EINVAL, errmsg("could not close GTM configuration file \"%s\": %m", conf_file))); } } GTM_SetNeedBackup(); GTM_WriteRestorePoint(); return; } static void ProcessBarrierCommand(Port* myport, GTM_MessageType mtype, StringInfo message) { int barrier_id_len; char* barrier_id; int count = 0; GTM_Conn* oldconn = GetMyThreadInfo->thr_conn->standby; StringInfoData buf; barrier_id_len = pq_getmsgint(message, sizeof(int)); barrier_id = (char*)pq_getmsgbytes(message, barrier_id_len); pq_getmsgend(message); elog(INFO, "Processing BARRIER %s", barrier_id); if ((mtype == MSG_BARRIER) && GetMyThreadInfo->thr_conn->standby) { retry: bkup_report_barrier(GetMyThreadInfo->thr_conn->standby, barrier_id); if (gtm_standby_check_communication_error(&count, oldconn)) goto retry; if (Backup_synchronously && (myport->remote_type != GTM_NODE_GTM_PROXY)) gtm_sync_standby(GetMyThreadInfo->thr_conn->standby); } GTM_WriteBarrierBackup(barrier_id); if (mtype == MSG_BARRIER) { /* * Send a SUCCESS message back to the client */ pq_beginmessage(&buf, 'S'); pq_sendint(&buf, BARRIER_RESULT, 4); if (myport->remote_type == GTM_NODE_GTM_PROXY) { GTM_ProxyMsgHeader proxyhdr; proxyhdr.ph_conid = myport->conn_id; pq_sendbytes(&buf, (char*)&proxyhdr, sizeof(GTM_ProxyMsgHeader)); } pq_endmessage(myport, &buf); if (myport->remote_type != GTM_NODE_GTM_PROXY) { /* Flush standby first */ if (GetMyThreadInfo->thr_conn->standby) gtmpqFlush(GetMyThreadInfo->thr_conn->standby); pq_flush(myport); } } }