/* * pgbench.c * * A simple benchmark program for openGauss * Originally written by Tatsuo Ishii and enhanced by many contributors. * * contrib/pgbench/pgbench.c * Copyright (c) 2000-2012, PostgreSQL Global Development Group * ALL RIGHTS RESERVED; * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without a written agreement * is hereby granted, provided that the above copyright notice and this * paragraph and the following two paragraphs appear in all copies. * * IN NO EVENT SHALL THE AUTHOR OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * THE AUTHOR AND DISTRIBUTORS SPECIFICALLY DISCLAIMS ANY WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. * */ #ifdef WIN32 #define FD_SETSIZE 1024 /* set before winsock2.h is included */ #endif /* ! WIN32 */ #include "postgres_fe.h" #include "getopt_long.h" #include "libpq/libpq-fe.h" #include "libpq/pqsignal.h" #include "portability/instr_time.h" #include "utils/elog.h" #include #include #ifndef WIN32 #include #include #endif /* ! WIN32 */ #ifdef HAVE_SYS_SELECT_H #include #endif #ifdef HAVE_SYS_RESOURCE_H #include /* for getrlimit */ #endif #ifdef HAVE_POLL_H #include #endif #ifdef HAVE_SYS_POLL_H #include #endif #ifndef INT64_MAX #define INT64_MAX INT64CONST(0x7FFFFFFFFFFFFFFF) #endif /* * Multi-platform pthread implementations */ #ifdef WIN32 /* Use native win32 threads on Windows */ typedef struct win32_pthread* pthread_t; typedef int pthread_attr_t; static int pthread_create(pthread_t* thread, pthread_attr_t* attr, void* (*start_routine)(void*), void* arg); static int pthread_join(pthread_t th, void** thread_return); #elif defined(ENABLE_THREAD_SAFETY) /* Use platform-dependent pthread capability */ #include #else /* Use emulation with fork. Rename pthread identifiers to avoid conflicts */ #define PTHREAD_FORK_EMULATION #include #define pthread_t pg_pthread_t #define pthread_attr_t pg_pthread_attr_t #define pthread_create pg_pthread_create #define pthread_join pg_pthread_join typedef struct fork_pthread* pthread_t; typedef int pthread_attr_t; static int pthread_create(pthread_t* thread, pthread_attr_t* attr, void* (*start_routine)(void*), void* arg); static int pthread_join(pthread_t th, void** thread_return); #endif extern char* optarg; extern int optind; /******************************************************************** * some configurable parameters */ /* max number of clients allowed */ #ifdef FD_SETSIZE #define MAXCLIENTS (FD_SETSIZE - 10) #else #define MAXCLIENTS 1024 #endif #define DEFAULT_NXACTS 10 /* default nxacts */ int nxacts = 0; /* number of transactions per client */ int duration = 0; /* duration in seconds */ /* * scaling factor. for example, scale = 10 will make 1000000 tuples in * pgbench_accounts table. */ int scale = 1; /* * fillfactor. for example, fillfactor = 90 will use only 90 percent * space during inserts and leave 10 percent free. */ int fillfactor = 100; /* * use unlogged tables? */ int unlogged_tables = 0; /* * When threads are throttled to a given rate limit, this is the target delay * to reach that rate in usec. 0 is the default and means no throttling. */ int64 throttle_delay = 0; /* * tablespace selection */ char* tablespace = NULL; char* index_tablespace = NULL; /* * end of configurable parameters *********************************************************************/ #define nbranches \ 1 /* Makes little sense to change this. Change \ * -s instead */ #define ntellers 10 #define naccounts 100000 #ifdef PGXC bool use_branch = false; /* use branch id in DDL and DML */ #endif bool use_log; /* log transaction latencies to a file */ int progress = 0; /* thread progress report every this seconds */ int progress_nclients = 0; /* number of clients for progress report */ int progress_nthreads = 0; /* number of threads for progress report */ bool is_connect; /* establish connection for each transaction */ bool is_mot = false; /* use memory tables */ bool is_latencies; /* report per-command latencies */ int main_pid; /* main process id used in log filename */ char* pghost = ""; char* pgport = ""; char* pgoptions = NULL; char* pgtty = NULL; char* login = NULL; char* secrete = NULL; char* dbName; char* orient = NULL; char* with_options = NULL; volatile bool timer_exceeded = false; /* flag from signal handler */ volatile int seq_curr_index = -1; /* variable definitions */ typedef struct { char* name; /* variable name */ char* value; /* its value */ } Variable; #define MAX_FILES 128 /* max number of SQL script files allowed */ #define SHELL_COMMAND_SIZE 256 /* maximum size allowed for shell command */ /* * structures used in custom query mode */ typedef struct { PGconn* con; /* connection handle to DB */ int id; /* client No. */ int state; /* state No. */ int cnt; /* xacts count */ int ecnt; /* error count */ int listen; /* 0 indicates that an async query has been * sent */ int sleeping; /* 1 indicates that the client is napping */ bool throttling; /* whether nap is for throttling */ uint64 until; /* napping until (usec) */ Variable* variables; /* array of variable definitions */ int nvariables; instr_time txn_begin; /* used for measuring transaction latencies */ instr_time stmt_begin; /* used for measuring statement latencies */ int64 txn_latencies; /* cumulated latencies */ int64 txn_sqlats; /* cumulated square latencies */ bool is_throttled; /* whether transaction throttling is done */ int use_file; /* index in sql_files for this client */ bool prepared[MAX_FILES]; } CState; /* * Thread state and result */ typedef struct { int tid; /* thread id */ pthread_t thread; /* thread handle */ CState* state; /* array of CState */ int nstate; /* length of state[] */ instr_time start_time; /* thread start time */ instr_time* exec_elapsed; /* time spent executing cmds (per Command) */ int* exec_count; /* number of cmd executions (per Command) */ unsigned short random_state[3]; /* separate randomness for each thread */ int64 throttle_trigger; /* previous/next throttling (us) */ int64 throttle_lag; /* total transaction lag behind throttling */ int64 throttle_lag_max; /* max transaction lag */ } TState; #define INVALID_THREAD ((pthread_t)0) typedef struct { instr_time conn_time; int xacts; int64 latencies; int64 sqlats; int64 throttle_lag; int64 throttle_lag_max; } TResult; /* * queries read from files */ #define SQL_COMMAND 1 #define META_COMMAND 2 #define MAX_ARGS 32 typedef enum QueryMode { QUERY_SIMPLE, /* simple query */ QUERY_EXTENDED, /* extended query */ QUERY_PREPARED, /* extended query with prepared statements */ NUM_QUERYMODE } QueryMode; static QueryMode querymode = QUERY_SIMPLE; static const char* QUERYMODE[] = {"simple", "extended", "prepared"}; typedef struct { char* line; /* full text of command line */ int command_num; /* unique index of this Command struct */ int type; /* command type (SQL_COMMAND or META_COMMAND) */ int argc; /* number of command words */ char* argv[MAX_ARGS]; /* command word list */ } Command; static Command** sql_files[MAX_FILES]; /* SQL script files */ static int num_files; /* number of script files */ static int num_commands = 0; /* total number of Command structs */ static int debug = 0; /* debug flag */ /* default scenario */ static char* tpc_b = {"\\set nbranches " CppAsString2( nbranches) " * :scale\n" "\\set ntellers " CppAsString2( ntellers) " * :scale\n" "\\set naccounts " CppAsString2( naccounts) " * :scale\n" "\\setrandom aid 1 :naccounts\n" "\\setrandom bid 1 :nbranches\n" "\\setrandom ttid 1 :ntellers\n" "\\setrandom delta -5000 5000\n" "START TRANSACTION;\n" "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = " ":aid;\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE ttid = " ":ttid;\n" "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = " ":bid;\n" "INSERT INTO pgbench_history (ttid, bid, aid, delta, mtime) VALUES (:ttid, " ":bid, :aid, :delta, CURRENT_TIMESTAMP);\n" "END;\n"}; #ifdef PGXC static char* tpc_b_bid = {"\\set nbranches " CppAsString2( nbranches) " * :scale\n" "\\set ntellers " CppAsString2( ntellers) " * :scale\n" "\\set naccounts " CppAsString2( naccounts) " * :scale\n" "\\setrandom aid 1 :naccounts\n" "\\setrandom bid 1 :nbranches\n" "\\setrandom ttid 1 :ntellers\n" "\\setrandom delta -5000 5000\n" "START TRANSACTION;\n" "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid " "AND bid = :bid;\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid AND bid = :bid\n" "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE ttid = " ":ttid AND bid = :bid;\n" "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = " ":bid;\n" "INSERT INTO pgbench_history (ttid, bid, aid, delta, mtime) VALUES (:ttid, " ":bid, :aid, :delta, CURRENT_TIMESTAMP);\n" "END;\n"}; #endif /* -N case */ static char* simple_update = {"\\set nbranches " CppAsString2( nbranches) " * :scale\n" "\\set ntellers " CppAsString2( ntellers) " * :scale\n" "\\set naccounts " CppAsString2( naccounts) " * :scale\n" "\\setrandom aid 1 :naccounts\n" "\\setrandom bid 1 :nbranches\n" "\\setrandom ttid 1 :ntellers\n" "\\setrandom delta -5000 5000\n" "START TRANSACTION;\n" "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = " ":aid;\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" "INSERT INTO pgbench_history (ttid, bid, aid, delta, mtime) VALUES (:ttid, " ":bid, :aid, :delta, CURRENT_TIMESTAMP);\n" "END;\n"}; #ifdef PGXC static char* simple_update_bid = {"\\set nbranches " CppAsString2( nbranches) " * :scale\n" "\\set ntellers " CppAsString2( ntellers) " * :scale\n" "\\set naccounts " CppAsString2( naccounts) " * :scale\n" "\\setrandom aid 1 :naccounts\n" "\\setrandom bid 1 :nbranches\n" "\\setrandom ttid 1 :ntellers\n" "\\setrandom delta -5000 5000\n" "START TRANSACTION;\n" "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid " "AND bid = :bid;\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid AND bid = :bid;\n" "INSERT INTO pgbench_history (ttid, bid, aid, delta, mtime) VALUES (:ttid, " ":bid, :aid, :delta, CURRENT_TIMESTAMP);\n" "END;\n"}; #endif /* -S case */ static char* select_only = { "\\set naccounts " CppAsString2(naccounts) " * :scale\n" "\\setrandom aid 1 :naccounts\n" "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"}; /* Function prototypes */ static void setalarm(int seconds); static void* threadRun(void* arg); /* * routines to check mem allocations and fail noisily. */ static void* xmalloc(size_t size) { void* result = NULL; /* Avoid unportable behavior of malloc(0) */ if (size == 0) { size = 1; } result = malloc(size); if (!result) { fprintf(stderr, "out of memory\n"); exit(1); } return result; } static void* xrealloc(void* ptr, size_t size) { void* result = NULL; /* Avoid unportable behavior of realloc(NULL, 0) */ if (ptr == NULL && size == 0) { size = 1; } result = realloc(ptr, size); if (!result) { fprintf(stderr, "out of memory\n"); exit(1); } return result; } static char* xstrdup(const char* s) { char* result = NULL; result = strdup(s); if (!result) { fprintf(stderr, "out of memory\n"); exit(1); } return result; } static void usage(const char* progname) { printf("%s is a benchmarking tool for openGauss.\n\n" "Usage:\n" " %s [OPTION]... [DBNAME]\n" "\nInitialization options:\n" " -i invokes initialization mode\n" " -m use memory tables (mot)\n" " -F NUM fill factor\n" #ifdef PGXC " -k distribute by primary key branch id - bid\n" #endif " -s NUM scaling factor\n" " --index-tablespace=TABLESPACE\n" " create indexes in the specified tablespace\n" " --tablespace=TABLESPACE\n" " create tables in the specified tablespace\n" " --unlogged-tables\n" " create tables as unlogged tables\n" "\nBenchmarking options:\n" " -c NUM number of concurrent database clients (default: 1)\n" " -C establish new connection for each transaction\n" " -D VARNAME=VALUE\n" " define variable for use by custom script\n" " -f FILENAME read transaction script from FILENAME\n" #ifdef PGXC " -k query with default key and additional key branch id (bid)\n" #endif " -j NUM number of threads (default: 1)\n" " -l write transaction times to log file\n" " -M simple|extended|prepared\n" " protocol for submitting queries to server (default: simple)\n" " -n do not run VACUUM before tests\n" " -N do not update tables \"pgbench_tellers\" and \"pgbench_branches\"\n" " -O row|column|orc\n" " Table orientation option\n" " -P NUM show thread progress report every NUM seconds\n" " -r report average latency per command\n" " -R, --rate=NUM\n" " target rate in transactions per second\n" " -s NUM report this scale factor in output\n" " -S perform SELECT-only transactions\n" " -t NUM number of transactions each client runs (default: 10)\n" " -T NUM duration of benchmark test in seconds\n" " -v vacuum all four standard tables before tests\n" "\nCommon options:\n" " -d print debugging output\n" " -h HOSTNAME database server host or socket directory\n" " -p PORT database server port number\n" " -U USERNAME connect as specified database user\n" " -W PASSWORD connect as specified database user through explicit password\n" " -V, --version output version information, then exit\n" " -?, --help show this help, then exit\n", progname, progname); #if ((defined(ENABLE_MULTIPLE_NODES)) || (defined(ENABLE_PRIVATEGAUSS))) printf("\nReport bugs to GaussDB support.\n"); #else printf("\nReport bugs to community@opengauss.org> or join opengauss community .\n"); #endif } /* random number generator: uniform distribution from min to max inclusive */ static int getrand(TState* thread, int min, int max) { /* * Odd coding is so that min and max have approximately the same chance of * being selected as do numbers between them. * * pg_erand48() is thread-safe and concurrent, which is why we use it * rather than random(), which in glibc is non-reentrant, and therefore * protected by a mutex, and therefore a bottleneck on machines with many * CPUs. */ return min + (int)((max - min + 1) * pg_erand48(thread->random_state)); } /* call PQexec() and exit() on failure */ static void executeStatement(PGconn* con, const char* sql, bool err_tolerant = false) { PGresult* res = NULL; res = PQexec(con, sql); if (PQresultStatus(res) != PGRES_COMMAND_OK) { fprintf(stderr, "%s", PQerrorMessage(con)); if (!err_tolerant) exit(1); } PQclear(res); } /* set up a connection to the backend */ static PGconn* doConnect(void) { PGconn* conn = NULL; static char* password = NULL; bool new_pass = false; if (secrete != NULL) { password = secrete; } /* * Start the connection. Loop until we have a password if requested by * backend. */ do { new_pass = false; conn = PQsetdbLogin(pghost, pgport, pgoptions, pgtty, dbName, login, password); if (!conn) { fprintf(stderr, "Connection to database \"%s\" failed\n", dbName); return NULL; } if (PQstatus(conn) == CONNECTION_BAD && PQconnectionNeedsPassword(conn) && password == NULL) { PQfinish(conn); password = simple_prompt("Password: ", 100, false); new_pass = true; } } while (new_pass); /* check to see that the backend connection was successfully made */ if (PQstatus(conn) == CONNECTION_BAD) { fprintf(stderr, "Connection to database \"%s\" failed:\n%s", dbName, PQerrorMessage(conn)); PQfinish(conn); return NULL; } return conn; } /* throw away response from backend */ static void discard_response(CState* state) { PGresult* res = NULL; do { res = PQgetResult(state->con); if (res) PQclear(res); } while (res); } static int compareVariables(const void* v1, const void* v2) { return strcmp(((const Variable*)v1)->name, ((const Variable*)v2)->name); } static char* getVariable(CState* st, char* name) { Variable key; Variable* var = NULL; /* On some versions of Solaris, bsearch of zero items dumps core */ if (st->nvariables <= 0) { return NULL; } key.name = name; var = (Variable*)bsearch((void*)&key, (void*)st->variables, st->nvariables, sizeof(Variable), compareVariables); if (var != NULL) { return var->value; } else { return NULL; } } /* check whether the name consists of alphabets, numerals and underscores. */ static bool isLegalVariableName(const char* name) { int i; for (i = 0; name[i] != '\0'; i++) { if (!isalnum((unsigned char)name[i]) && name[i] != '_') { return false; } } return true; } static int putVariable(CState* st, const char* context, char* name, char* value) { Variable key; Variable* var = NULL; key.name = name; /* On some versions of Solaris, bsearch of zero items dumps core */ if (st->nvariables > 0) { var = (Variable*)bsearch((void*)&key, (void*)st->variables, st->nvariables, sizeof(Variable), compareVariables); } else { var = NULL; } if (var == NULL) { Variable* newvars = NULL; /* * Check for the name only when declaring a new variable to avoid * overhead. */ if (!isLegalVariableName(name)) { fprintf(stderr, "%s: invalid variable name '%s'\n", context, name); return false; } if (st->variables) { newvars = (Variable*)xrealloc(st->variables, (st->nvariables + 1) * sizeof(Variable)); } else { newvars = (Variable*)xmalloc(sizeof(Variable)); } st->variables = newvars; var = &newvars[st->nvariables]; var->name = xstrdup(name); var->value = xstrdup(value); st->nvariables++; qsort((void*)st->variables, st->nvariables, sizeof(Variable), compareVariables); } else { char* val = NULL; /* dup then free, in case value is pointing at this variable */ val = xstrdup(value); free(var->value); var->value = val; } return true; } static char* parseVariable(const char* sql, int* eaten) { int i = 0; char* name = NULL; do { i++; } while (isalnum((unsigned char)sql[i]) || sql[i] == '_'); if (i == 1) { return NULL; } name = (char*)xmalloc(i); errno_t rc = memcpy_s(name, i, &sql[1], i - 1); securec_check_c(rc, "", ""); name[i - 1] = '\0'; *eaten = i; return name; } static char* replaceVariable(char** sql, char* param, int len, char* value) { int valueln = strlen(value); errno_t rc; if (valueln > len) { size_t offset = param - *sql; *sql = (char*)xrealloc(*sql, strlen(*sql) - len + valueln + 1); param = *sql + offset; } if (valueln != len) memmove(param + valueln, param + len, strlen(param + len) + 1); rc = memcpy_s(param, valueln, value, valueln); securec_check_c(rc, "", ""); return param + valueln; } static char* assignVariables(CState* st, char* sql) { char *p = NULL; char *name = NULL; char *val = NULL; p = sql; while ((p = strchr(p, ':')) != NULL) { int eaten; name = parseVariable(p, &eaten); if (name == NULL) { while (*p == ':') { p++; } continue; } val = getVariable(st, name); free(name); if (val == NULL) { p++; continue; } p = replaceVariable(&sql, p, eaten, val); } return sql; } static void getQueryParams(CState* st, const Command* command, const char** params) { int i; for (i = 0; i < command->argc - 1; i++) params[i] = getVariable(st, command->argv[i + 1]); } /* * Run a shell command. The result is assigned to the variable if not NULL. * Return true if succeeded, or false on error. */ static bool runShellCommand(CState* st, char* variable, char** argv, int argc) { char command[SHELL_COMMAND_SIZE]; int i, len = 0; FILE* fp = NULL; char res[64]; char* endptr = NULL; int retval; /*---------- * Join arguments with whitespace separators. Arguments starting with * exactly one colon are treated as variables: * name - append a string "name" * :var - append a variable named 'var' * ::name - append a string ":name" *---------- */ for (i = 0; i < argc; i++) { char* arg = NULL; int arglen; if (argv[i][0] != ':') { arg = argv[i]; /* a string literal */ } else if (argv[i][1] == ':') { arg = argv[i] + 1; /* a string literal starting with colons */ } else if ((arg = getVariable(st, argv[i] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[i]); return false; } arglen = strlen(arg); if ((len + arglen + ((i > 0) ? 1 : 0)) >= (SHELL_COMMAND_SIZE - 1)) { fprintf(stderr, "%s: too long shell command\n", argv[0]); return false; } if (i > 0) command[len++] = ' '; errno_t rc = memcpy_s(command + len, SHELL_COMMAND_SIZE - len, arg, arglen); securec_check_c(rc, "", ""); len += arglen; } command[len] = '\0'; /* Fast path for non-assignment case */ if (variable == NULL) { if (system(command)) { if (!timer_exceeded) fprintf(stderr, "%s: cannot launch shell command\n", argv[0]); return false; } return true; } /* Execute the command with pipe and read the standard output. */ if ((fp = popen(command, "r")) == NULL) { fprintf(stderr, "%s: cannot launch shell command\n", argv[0]); return false; } if (fgets(res, sizeof(res), fp) == NULL) { if (!timer_exceeded) fprintf(stderr, "%s: cannot read the result\n", argv[0]); return false; } if (pclose(fp) < 0) { fprintf(stderr, "%s: cannot close shell command\n", argv[0]); return false; } /* Check whether the result is an integer and assign it to the variable */ retval = (int)strtol(res, &endptr, 10); while (*endptr != '\0' && isspace((unsigned char)*endptr)) { endptr++; } if (*res == '\0' || *endptr != '\0') { fprintf(stderr, "%s: must return an integer ('%s' returned)\n", argv[0], res); return false; } snprintf(res, sizeof(res), "%d", retval); if (!putVariable(st, "setshell", variable, res)) return false; #ifdef DEBUG printf("shell parameter name: %s, value: %s\n", argv[1], res); #endif return true; } #define MAX_PREPARE_NAME 32 static void preparedStatementName(char* buffer, int file, int state) { sprintf(buffer, "P%d_%d", file, state); } static bool clientDone(CState* st, bool ok) { (void)ok; /* unused */ if (st->con != NULL) { PQfinish(st->con); st->con = NULL; } return false; /* always false */ } /* return false if client should be disconnected */ static bool doCustom(TState* thread, CState* st, instr_time* conn_time, FILE* logfile) { PGresult* res = NULL; Command** commands; bool trans_needs_throttle = false; top: commands = sql_files[st->use_file]; /* * Handle throttling once per transaction by sleeping. It is simpler * to do this here rather than at the end, because so much complicated * logic happens below when statements finish. */ if (throttle_delay && !st->is_throttled) { /* * Use inverse transform sampling to randomly generate a delay, such * that the series of delays will approximate a Poisson distribution * centered on the throttle_delay time. * * 10000 implies a 9.2 (-log(1/10000)) to 0.0 (log 1) delay multiplier, * and results in a 0.055 % target underestimation bias: * * SELECT 1.0/AVG(-LN(i/10000.0)) FROM generate_series(1,10000) AS i; * = 1.000552717032611116335474 * * If transactions are too slow or a given wait is shorter than * a transaction, the next transaction will start right away. */ int64 wait = (int64)(throttle_delay * 1.00055271703 * -log(getrand(thread, 1, 10000) / 10000.0)); thread->throttle_trigger += wait; st->until = thread->throttle_trigger; st->sleeping = 1; st->throttling = true; st->is_throttled = true; if (debug) fprintf(stderr, "client %d throttling " INT64_FORMAT " us\n", st->id, wait); } if (st->sleeping) { /* are we sleeping? */ instr_time now; int64 now_us; INSTR_TIME_SET_CURRENT(now); now_us = INSTR_TIME_GET_MICROSEC(now); if (st->until <= (uint64)now_us) { st->sleeping = 0; /* Done sleeping, go ahead with next command */ if (st->throttling) { /* Measure lag of throttled transaction relative to target */ int64 lag = now_us - st->until; thread->throttle_lag += lag; if (lag > thread->throttle_lag_max) thread->throttle_lag_max = lag; st->throttling = false; } } else return true; /* Still sleeping, nothing to do here */ } if (st->listen) { /* are we receiver? */ if (commands[st->state]->type == SQL_COMMAND) { if (debug) fprintf(stderr, "client %d receiving\n", st->id); if (!PQconsumeInput(st->con)) { /* there's something wrong */ fprintf(stderr, "Client %d aborted in state %d. Probably the backend died while processing.\n", st->id, st->state); return clientDone(st, false); } if (PQisBusy(st->con)) return true; /* don't have the whole result yet */ } /* * command finished: accumulate per-command execution times in * thread-local data structure, if per-command latencies are requested */ if (is_latencies) { instr_time now; int cnum = commands[st->state]->command_num; INSTR_TIME_SET_CURRENT(now); INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum], now, st->stmt_begin); thread->exec_count[cnum]++; } /* transaction finished: record latency under progress or throttling */ if ((progress || throttle_delay) && commands[st->state + 1] == NULL) { instr_time diff; int64 latency; INSTR_TIME_SET_CURRENT(diff); INSTR_TIME_SUBTRACT(diff, st->txn_begin); latency = INSTR_TIME_GET_MICROSEC(diff); st->txn_latencies += latency; /* * XXX In a long benchmark run of high-latency transactions, this * int64 addition eventually overflows. For example, 100 threads * running 10s transactions will overflow it in 2.56 hours. With * a more-typical OLTP workload of .1s transactions, overflow * would take 256 hours. */ st->txn_sqlats += latency * latency; } /* * if transaction finished, record the time it took in the log */ if (logfile && commands[st->state + 1] == NULL) { instr_time now; instr_time diff; double usec; INSTR_TIME_SET_CURRENT(now); diff = now; INSTR_TIME_SUBTRACT(diff, st->txn_begin); usec = (double)INSTR_TIME_GET_MICROSEC(diff); #ifndef WIN32 /* This is more than we really ought to know about instr_time */ fprintf(logfile, "%d %d %.0f %d %ld %ld\n", st->id, st->cnt, usec, st->use_file, (long)now.tv_sec, (long)now.tv_usec); #else /* On Windows, instr_time doesn't provide a timestamp anyway */ fprintf(logfile, "%d %d %.0f %d 0 0\n", st->id, st->cnt, usec, st->use_file); #endif } /* identify transaction errors */ bool error_found = false; if (commands[st->state]->type == SQL_COMMAND) { /* * Read and discard the query result; note this is not included in * the statement latency numbers. */ res = PQgetResult(st->con); switch (PQresultStatus(res)) { case PGRES_COMMAND_OK: case PGRES_TUPLES_OK: break; /* OK */ default: if (!is_mot) { fprintf( stderr, "Client %d aborted in state %d: %s", st->id, st->state, PQerrorMessage(st->con)); } else { error_found = true; } break; } PQclear(res); discard_response(st); } if (commands[st->state + 1] == NULL) { if (is_connect) { PQfinish(st->con); st->con = NULL; } if (!error_found) { ++st->cnt; } if ((st->cnt >= nxacts && duration <= 0) || timer_exceeded) return clientDone(st, true); /* exit success */ } /* increment state counter */ st->state++; if (commands[st->state] == NULL) { st->state = 0; st->use_file = getrand(thread, 0, num_files - 1); commands = sql_files[st->use_file]; st->is_throttled = false; /* * No transaction is underway anymore, which means there is nothing * to listen to right now. When throttling rate limits are active, * a sleep will happen next, as the next transaction starts. And * then in any case the next SQL command will set listen back to 1. */ st->listen = 0; trans_needs_throttle = (throttle_delay > 0); } } if (st->con == NULL) { instr_time start, end; INSTR_TIME_SET_CURRENT(start); if ((st->con = doConnect()) == NULL) { fprintf(stderr, "Client %d aborted in establishing connection.\n", st->id); return clientDone(st, false); } INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(*conn_time, end, start); } /* * This ensures that a throttling delay is inserted before proceeding * with sql commands, after the first transaction. The first transaction * throttling is performed when first entering doCustom. */ if (trans_needs_throttle) { trans_needs_throttle = false; goto top; } /* Record transaction start time under logging, progress or throttling */ if ((logfile || progress || throttle_delay) && st->state == 0) INSTR_TIME_SET_CURRENT(st->txn_begin); /* Record statement start time if per-command latencies are requested */ if (is_latencies) INSTR_TIME_SET_CURRENT(st->stmt_begin); if (commands[st->state]->type == SQL_COMMAND) { const Command* command = commands[st->state]; int r; if (querymode == QUERY_SIMPLE) { char* sql = NULL; sql = xstrdup(command->argv[0]); sql = assignVariables(st, sql); if (debug) fprintf(stderr, "client %d sending %s\n", st->id, sql); r = PQsendQuery(st->con, sql); free(sql); } else if (querymode == QUERY_EXTENDED) { const char* sql = command->argv[0]; const char* params[MAX_ARGS]; getQueryParams(st, command, params); if (debug) fprintf(stderr, "client %d sending %s\n", st->id, sql); r = PQsendQueryParams(st->con, sql, command->argc - 1, NULL, params, NULL, NULL, 0); } else if (querymode == QUERY_PREPARED) { char name[MAX_PREPARE_NAME]; const char* params[MAX_ARGS]; if (!st->prepared[st->use_file]) { int j; for (j = 0; commands[j] != NULL; j++) { PGresult* res = NULL; char name[MAX_PREPARE_NAME]; if (commands[j]->type != SQL_COMMAND) { continue; } preparedStatementName(name, st->use_file, j); res = PQprepare(st->con, name, commands[j]->argv[0], commands[j]->argc - 1, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) fprintf(stderr, "%s", PQerrorMessage(st->con)); PQclear(res); } st->prepared[st->use_file] = true; } getQueryParams(st, command, params); preparedStatementName(name, st->use_file, st->state); if (debug) fprintf(stderr, "client %d sending %s\n", st->id, name); r = PQsendQueryPrepared(st->con, name, command->argc - 1, params, NULL, NULL, 0); } else /* unknown sql mode */ r = 0; if (r == 0) { if (debug) fprintf(stderr, "client %d cannot send %s\n", st->id, command->argv[0]); st->ecnt++; } else st->listen = 1; /* flags that should be listened */ } else if (commands[st->state]->type == META_COMMAND) { int argc = commands[st->state]->argc, i; char** argv = commands[st->state]->argv; if (debug) { fprintf(stderr, "client %d executing \\%s", st->id, argv[0]); for (i = 1; i < argc; i++) fprintf(stderr, " %s", argv[i]); fprintf(stderr, "\n"); } if (pg_strcasecmp(argv[0], "setrandom") == 0) { char* var = NULL; int min, max; char res[64]; if (*argv[2] == ':') { if ((var = getVariable(st, argv[2] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]); st->ecnt++; return true; } min = atoi(var); } else min = atoi(argv[2]); #ifdef NOT_USED if (min < 0) { fprintf(stderr, "%s: invalid minimum number %d\n", argv[0], min); st->ecnt++; return; } #endif if (*argv[3] == ':') { if ((var = getVariable(st, argv[3] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[3]); st->ecnt++; return true; } max = atoi(var); } else max = atoi(argv[3]); if (max < min) { fprintf(stderr, "%s: maximum is less than minimum\n", argv[0]); st->ecnt++; return true; } /* * getrand() neeeds to be able to subtract max from min and add * one the result without overflowing. Since we know max > min, * we can detect overflow just by checking for a negative result. * But we must check both that the subtraction doesn't overflow, * and that adding one to the result doesn't overflow either. */ if (max - min < 0 || (max - min) + 1 < 0) { fprintf(stderr, "%s: range too large\n", argv[0]); st->ecnt++; return true; } #ifdef DEBUG printf("min: %d max: %d random: %d\n", min, max, getrand(thread, min, max)); #endif snprintf(res, sizeof(res), "%d", getrand(thread, min, max)); if (!putVariable(st, argv[0], argv[1], res)) { st->ecnt++; return true; } st->listen = 1; } else if (pg_strcasecmp(argv[0], "setseq") == 0) { char* var = NULL; int min, max; char res[64]; if (*argv[2] == ':') { if ((var = getVariable(st, argv[2] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]); st->ecnt++; return true; } min = atoi(var); } else min = atoi(argv[2]); #ifdef NOT_USED if (min < 0) { fprintf(stderr, "%s: invalid minimum number %d\n", argv[0], min); st->ecnt++; return; } #endif if (*argv[3] == ':') { if ((var = getVariable(st, argv[3] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[3]); st->ecnt++; return true; } max = atoi(var); } else max = atoi(argv[3]); if (max < min) { fprintf(stderr, "%s: maximum is less than minimum\n", argv[0]); st->ecnt++; return true; } /* * getrand() neeeds to be able to subtract max from min and add * one the result without overflowing. Since we know max > min, * we can detect overflow just by checking for a negative result. * But we must check both that the subtraction doesn't overflow, * and that adding one to the result doesn't overflow either. */ if (max - min < 0 || (max - min) + 1 < 0) { fprintf(stderr, "%s: range too large\n", argv[0]); st->ecnt++; return true; } seq_curr_index++; if (seq_curr_index > max) seq_curr_index = min; if (seq_curr_index < min) seq_curr_index = min; snprintf(res, sizeof(res), "%d", seq_curr_index); if (!putVariable(st, argv[0], argv[1], res)) { st->ecnt++; return true; } st->listen = 1; } else if (pg_strcasecmp(argv[0], "set") == 0) { char* var = NULL; int ope1, ope2; char res[64]; if (*argv[2] == ':') { if ((var = getVariable(st, argv[2] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]); st->ecnt++; return true; } ope1 = atoi(var); } else ope1 = atoi(argv[2]); if (argc < 5) snprintf(res, sizeof(res), "%d", ope1); else { if (*argv[4] == ':') { if ((var = getVariable(st, argv[4] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[4]); st->ecnt++; return true; } ope2 = atoi(var); } else ope2 = atoi(argv[4]); if (strcmp(argv[3], "+") == 0) snprintf(res, sizeof(res), "%d", ope1 + ope2); else if (strcmp(argv[3], "-") == 0) snprintf(res, sizeof(res), "%d", ope1 - ope2); else if (strcmp(argv[3], "*") == 0) snprintf(res, sizeof(res), "%d", ope1 * ope2); else if (strcmp(argv[3], "/") == 0) { if (ope2 == 0) { fprintf(stderr, "%s: division by zero\n", argv[0]); st->ecnt++; return true; } snprintf(res, sizeof(res), "%d", ope1 / ope2); } else { fprintf(stderr, "%s: unsupported operator %s\n", argv[0], argv[3]); st->ecnt++; return true; } } if (!putVariable(st, argv[0], argv[1], res)) { st->ecnt++; return true; } st->listen = 1; } else if (pg_strcasecmp(argv[0], "sleep") == 0) { char* var = NULL; int usec; instr_time now; if (*argv[1] == ':') { if ((var = getVariable(st, argv[1] + 1)) == NULL) { fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[1]); st->ecnt++; return true; } usec = atoi(var); } else usec = atoi(argv[1]); if (argc > 2) { if (pg_strcasecmp(argv[2], "ms") == 0) usec *= 1000; else if (pg_strcasecmp(argv[2], "s") == 0) usec *= 1000000; } else usec *= 1000000; INSTR_TIME_SET_CURRENT(now); st->until = INSTR_TIME_GET_MICROSEC(now) + usec; st->sleeping = 1; st->listen = 1; } else if (pg_strcasecmp(argv[0], "setshell") == 0) { bool ret = runShellCommand(st, argv[1], argv + 2, argc - 2); if (timer_exceeded) /* timeout */ return clientDone(st, true); else if (!ret) /* on error */ { st->ecnt++; return true; } else /* succeeded */ st->listen = 1; } else if (pg_strcasecmp(argv[0], "shell") == 0) { bool ret = runShellCommand(st, NULL, argv + 1, argc - 1); if (timer_exceeded) /* timeout */ return clientDone(st, true); else if (!ret) /* on error */ { st->ecnt++; return true; } else /* succeeded */ st->listen = 1; } goto top; } return true; } /* discard connections */ static void disconnect_all(CState* state, int length) { int i; for (i = 0; i < length; i++) { if (state[i].con) { PQfinish(state[i].con); state[i].con = NULL; } } } /* create tables and setup data */ static void init(void) { /* * Note: TPC-B requires at least 100 bytes per row, and the "filler" * fields in these table declarations were intended to comply with that. * But because they default to NULLs, they don't actually take any space. * We could fix that by giving them non-null default values. However, that * would completely break comparability of pgbench results with prior * versions. Since pgbench has never pretended to be fully TPC-B * compliant anyway, we stick with the historical behavior. */ struct ddlinfo { char* table; char* cols; int declare_fillfactor; #ifdef PGXC char* distribute_by; #endif }; struct ddlinfo DDLs[] = {{"pgbench_branches", "bid int not null,bbalance int,filler char(88)", 1 #ifdef PGXC , "distribute by hash (bid)" #endif }, {"pgbench_tellers", "ttid int not null,bid int,tbalance int,filler char(84)", 1 #ifdef PGXC , "distribute by hash (bid)" #endif }, {"pgbench_accounts", "aid int not null,bid int,abalance int,filler char(84)", 1 #ifdef PGXC , "distribute by hash (bid)" #endif }, {"pgbench_history", "ttid int,bid int,aid int,delta int,mtime timestamp,filler char(22)", 0 #ifdef PGXC , "distribute by hash (bid)" #endif }}; static char* DDLAFTERs[] = {"alter table pgbench_branches add primary key (bid)", "alter table pgbench_tellers add primary key (ttid)", "alter table pgbench_accounts add primary key (aid)"}; static char* DDLAFTERs_mot[] = {"alter foreign table pgbench_branches add primary key (bid)", "alter foreign table pgbench_tellers add primary key (ttid)", "alter foreign table pgbench_accounts add primary key (aid)"}; #ifdef PGXC static char* DDLAFTERs_bid[] = {"alter table pgbench_branches add primary key (bid)", "alter table pgbench_tellers add primary key (ttid,bid)", "alter table pgbench_accounts add primary key (aid,bid)"}; #endif PGconn* con = NULL; PGresult* res = NULL; char sql[256]; int i; long ttl; if ((con = doConnect()) == NULL) exit(1); for (i = 0; i < (int)lengthof(DDLs); i++) { int hasWithOpts = 0; char opts[256]; char check_buffer[512]; char buffer[512]; struct ddlinfo* ddl = &DDLs[i]; /* Remove old table, if it exists. */ snprintf(check_buffer, 512, "select table_type from information_schema.tables where table_name = '%s' and table_type = 'FOREIGN TABLE'", ddl->table); PGresult *res = PQexec(con, check_buffer); if (PQntuples(res) == 0) { snprintf(buffer, 512, "drop table if exists %s", ddl->table); } else { snprintf(buffer, 512, "drop foreign table if exists %s", ddl->table); } PQclear(res); executeStatement(con, buffer); /* Construct new create table statement. */ opts[0] = '\0'; hasWithOpts = 0; if (with_options || ddl->declare_fillfactor) { /* header */ snprintf(opts + strlen(opts), 256 - strlen(opts), " with ("); /* only row orientation support fillfactor */ if ((!orient || strcmp(orient, "row") == 0) && ddl->declare_fillfactor) { snprintf(opts + strlen(opts), 256 - strlen(opts), " fillfactor=%d", fillfactor); ++hasWithOpts; } if (with_options) { if (hasWithOpts) { snprintf(opts + strlen(opts), 256 - strlen(opts), ","); } snprintf(opts + strlen(opts), 256 - strlen(opts), " %s ", with_options); ++hasWithOpts; } /* tail */ snprintf(opts + strlen(opts), 256 - strlen(opts), " )"); } if (tablespace != NULL) { char* escape_tablespace = NULL; escape_tablespace = PQescapeIdentifier(con, tablespace, strlen(tablespace)); snprintf(opts + strlen(opts), 256 - strlen(opts), " tablespace %s", escape_tablespace); PQfreemem(escape_tablespace); } #ifdef PGXC /* Add distribution columns if necessary */ if (use_branch) snprintf(buffer, 512, "create%s table %s(%s)%s %s", unlogged_tables ? " unlogged" : "", ddl->table, ddl->cols, opts, ddl->distribute_by); else #endif if (!is_mot) { snprintf(buffer, 512, "create%s table %s(%s)%s", unlogged_tables ? " unlogged" : "", ddl->table, ddl->cols, opts); } else { snprintf(buffer, 512, "create foreign table %s(%s)", ddl->table, ddl->cols); } executeStatement(con, buffer); } #define MINI_BATCH 5000 /* if mot create primary keys before data load */ if (is_mot) { fprintf(stderr, "set primary key on memory tables...\n"); for (i = 0; i < (int)lengthof(DDLAFTERs_mot); i++) { char buffer[256]; strncpy(buffer, DDLAFTERs_mot[i], 256); executeStatement(con, buffer, true); } } for (i = 0; i < nbranches * scale;) { executeStatement(con, "start transaction"); int k = 0; while (k < MINI_BATCH && i < nbranches * scale) { snprintf(sql, 256, "insert into pgbench_branches(bid,bbalance) values(%d,0)", i + 1); executeStatement(con, sql); i++; k++; } executeStatement(con, "commit"); } for (i = 0; i < ntellers * scale;) { executeStatement(con, "start transaction"); int k = 0; while (k < MINI_BATCH && i < ntellers * scale) { snprintf( sql, 256, "insert into pgbench_tellers(ttid,bid,tbalance) values (%d,%d,0)", i + 1, i / ntellers + 1); executeStatement(con, sql); i++; k++; } executeStatement(con, "commit"); } /* * fill the pgbench_accounts table with some data */ fprintf(stderr, "creating tables...\n"); executeStatement(con, "start transaction"); executeStatement(con, "truncate pgbench_accounts"); res = PQexec(con, "copy pgbench_accounts from stdin"); if (PQresultStatus(res) != PGRES_COPY_IN) { fprintf(stderr, "%s", PQerrorMessage(con)); exit(1); } PQclear(res); ttl = (naccounts * scale) >> 4; for (i = 0; i < naccounts * scale; i++) { int j = i + 1; snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0); if (PQputline(con, sql)) { fprintf(stderr, "PQputline failed\n"); exit(1); } if (j % ttl == 0) fprintf(stderr, "%d tuples done.\n", j); } if (PQputline(con, "\\.\n")) { fprintf(stderr, "very last PQputline failed\n"); exit(1); } if (PQendcopy(con)) { fprintf(stderr, "PQendcopy failed\n"); exit(1); } executeStatement(con, "commit"); /* * create indexes */ if (!is_mot) { fprintf(stderr, "set primary key...\n"); #ifdef PGXC /* * If all the tables are distributed according to bid, create an index on it * instead. */ if (use_branch) { for (i = 0; i < (int)lengthof(DDLAFTERs_bid); i++) { char buffer[256] = {0}; errno_t sc_rc = strncpy_s(buffer, sizeof(buffer), DDLAFTERs_bid[i], strlen(DDLAFTERs[i])); securec_check(sc_rc, "\0", "\0"); if (index_tablespace != NULL) { char* escape_tablespace = NULL; escape_tablespace = PQescapeIdentifier(con, index_tablespace, strlen(index_tablespace)); snprintf( buffer + strlen(buffer), 256 - strlen(buffer), " using index tablespace %s", escape_tablespace); PQfreemem(escape_tablespace); } executeStatement(con, buffer, true); } } else #endif for (i = 0; i < (int)lengthof(DDLAFTERs); i++) { char buffer[256] = {0}; errno_t sc_rc = strncpy_s(buffer, sizeof(buffer), DDLAFTERs[i], strlen(DDLAFTERs[i])); securec_check(sc_rc, "\0", "\0"); if (index_tablespace != NULL) { char* escape_tablespace1 = NULL; escape_tablespace1 = PQescapeIdentifier(con, index_tablespace, strlen(index_tablespace)); snprintf(buffer + strlen(buffer), 256 - strlen(buffer), " using index tablespace %s", escape_tablespace1); PQfreemem(escape_tablespace1); } executeStatement(con, buffer, true); } } /* vacuum */ fprintf(stderr, "vacuum..."); executeStatement(con, "vacuum analyze pgbench_branches"); executeStatement(con, "vacuum analyze pgbench_tellers"); executeStatement(con, "vacuum analyze pgbench_accounts"); executeStatement(con, "vacuum analyze pgbench_history"); fprintf(stderr, "done.\n"); PQfinish(con); } /* * Parse the raw sql and replace :param to $n. */ static bool parseQuery(Command* cmd, const char* raw_sql) { char *sql, *p; sql = xstrdup(raw_sql); cmd->argc = 1; p = sql; while ((p = strchr(p, ':')) != NULL) { char var[12]; char* name = NULL; int eaten; name = parseVariable(p, &eaten); if (name == NULL) { while (*p == ':') { p++; } continue; } if (cmd->argc >= MAX_ARGS) { fprintf(stderr, "statement has too many arguments (maximum is %d): %s\n", MAX_ARGS - 1, raw_sql); return false; } sprintf(var, "$%d", cmd->argc); p = replaceVariable(&sql, p, eaten, var); cmd->argv[cmd->argc] = name; cmd->argc++; } cmd->argv[0] = sql; return true; } /* Parse a command; return a Command struct, or NULL if it's a comment */ static Command* process_commands(char* buf) { const char delim[] = " \f\n\r\t\v"; Command* my_commands = NULL; int j; char *p = NULL;; char *tok = NULL; /* Make the string buf end at the next newline */ if ((p = strchr(buf, '\n')) != NULL) *p = '\0'; /* Skip leading whitespace */ p = buf; while (isspace((unsigned char)*p)) { p++; } /* If the line is empty or actually a comment, we're done */ if (*p == '\0' || strncmp(p, "--", 2) == 0) return NULL; /* Allocate and initialize Command structure */ my_commands = (Command*)xmalloc(sizeof(Command)); my_commands->line = xstrdup(buf); my_commands->command_num = num_commands++; my_commands->type = 0; /* until set */ my_commands->argc = 0; if (*p == '\\') { my_commands->type = META_COMMAND; j = 0; tok = strtok(++p, delim); while (tok != NULL) { my_commands->argv[j++] = xstrdup(tok); my_commands->argc++; tok = strtok(NULL, delim); } if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0) { if (my_commands->argc < 4) { fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]); exit(1); } for (j = 4; j < my_commands->argc; j++) fprintf(stderr, "%s: extra argument \"%s\" ignored\n", my_commands->argv[0], my_commands->argv[j]); } else if (pg_strcasecmp(my_commands->argv[0], "set") == 0) { if (my_commands->argc < 3) { fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]); exit(1); } for (j = ((my_commands->argc < 5) ? 3 : 5); j < my_commands->argc; j++) fprintf(stderr, "%s: extra argument \"%s\" ignored\n", my_commands->argv[0], my_commands->argv[j]); } else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0) { if (my_commands->argc < 2) { fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]); exit(1); } /* * Split argument into number and unit to allow "sleep 1ms" etc. * We don't have to terminate the number argument with null * because it will be parsed with atoi, which ignores trailing * non-digit characters. */ if (my_commands->argv[1][0] != ':') { char* c = my_commands->argv[1]; while (isdigit((unsigned char)*c)) { c++; } if (*c) { my_commands->argv[2] = c; if (my_commands->argc < 3) my_commands->argc = 3; } } if (my_commands->argc >= 3) { if (pg_strcasecmp(my_commands->argv[2], "us") != 0 && pg_strcasecmp(my_commands->argv[2], "ms") != 0 && pg_strcasecmp(my_commands->argv[2], "s") != 0) { fprintf(stderr, "%s: unknown time unit '%s' - must be us, ms or s\n", my_commands->argv[0], my_commands->argv[2]); exit(1); } } for (j = 3; j < my_commands->argc; j++) fprintf(stderr, "%s: extra argument \"%s\" ignored\n", my_commands->argv[0], my_commands->argv[j]); } else if (pg_strcasecmp(my_commands->argv[0], "setshell") == 0) { if (my_commands->argc < 3) { fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]); exit(1); } } else if (pg_strcasecmp(my_commands->argv[0], "shell") == 0) { if (my_commands->argc < 1) { fprintf(stderr, "%s: missing command\n", my_commands->argv[0]); exit(1); } } else { fprintf(stderr, "Invalid command %s\n", my_commands->argv[0]); exit(1); } } else { my_commands->type = SQL_COMMAND; switch (querymode) { case QUERY_SIMPLE: my_commands->argv[0] = xstrdup(p); my_commands->argc++; break; case QUERY_EXTENDED: case QUERY_PREPARED: if (!parseQuery(my_commands, p)) exit(1); break; default: exit(1); } } return my_commands; } static int process_file(char* filename) { #define COMMANDS_ALLOC_NUM 128 Command** my_commands; FILE* fd = NULL; int lineno; char buf[BUFSIZ * 8]; int alloc_num; if (num_files >= MAX_FILES) { fprintf(stderr, "Up to only %d SQL files are allowed\n", MAX_FILES); exit(1); } alloc_num = COMMANDS_ALLOC_NUM; my_commands = (Command**)xmalloc(sizeof(Command*) * alloc_num); if (strcmp(filename, "-") == 0) fd = stdin; else if ((fd = fopen(filename, "r")) == NULL) { fprintf(stderr, "%s: %s\n", filename, strerror(errno)); return false; } lineno = 0; while (fgets(buf, sizeof(buf), fd) != NULL) { Command* command = NULL; command = process_commands(buf); if (command == NULL) continue; my_commands[lineno] = command; lineno++; if (lineno >= alloc_num) { alloc_num += COMMANDS_ALLOC_NUM; my_commands = (Command**)xrealloc(my_commands, sizeof(Command*) * alloc_num); } } fclose(fd); my_commands[lineno] = NULL; sql_files[num_files++] = my_commands; return true; } static Command** process_builtin(char* tb) { #define COMMANDS_ALLOC_NUM 128 Command** my_commands; int lineno; char buf[BUFSIZ]; int alloc_num; alloc_num = COMMANDS_ALLOC_NUM; my_commands = (Command**)xmalloc(sizeof(Command*) * alloc_num); lineno = 0; for (;;) { char* p = NULL; Command* command = NULL; p = buf; while (*tb && *tb != '\n') { *p++ = *tb++; } if (*tb == '\0') break; if (*tb == '\n') tb++; *p = '\0'; command = process_commands(buf); if (command == NULL) { continue; } my_commands[lineno] = command; lineno++; if (lineno >= alloc_num) { alloc_num += COMMANDS_ALLOC_NUM; my_commands = (Command**)xrealloc(my_commands, sizeof(Command*) * alloc_num); } } my_commands[lineno] = NULL; return my_commands; } /* print out results */ static void printResults(int ttype, int normal_xacts, int nclients, TState* threads, int nthreads, instr_time total_time, instr_time conn_total_time, int64 total_latencies, int64 total_sqlats, int64 throttle_lag, int64 throttle_lag_max) { double time_include, tps_include, tps_exclude; char* s = NULL; time_include = INSTR_TIME_GET_DOUBLE(total_time); tps_include = normal_xacts / time_include; tps_exclude = normal_xacts / (time_include - (INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads)); if (ttype == 0) s = "TPC-B (sort of)"; else if (ttype == 2) s = "Update only pgbench_accounts"; else if (ttype == 1) s = "SELECT only"; else s = "Custom query"; printf("transaction type: %s\n", s); printf("scaling factor: %d\n", scale); printf("query mode: %s\n", QUERYMODE[querymode]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); if (duration <= 0) { printf("number of transactions per client: %d\n", nxacts); printf("number of transactions actually processed: %d/%d\n", normal_xacts, nxacts * nclients); } else { printf("duration: %d s\n", duration); printf("number of transactions actually processed: %d\n", normal_xacts); } if (throttle_delay || progress) { /* compute and show latency average and standard deviation */ double latency = 0.001 * total_latencies / normal_xacts; double sqlat = (double)total_sqlats / normal_xacts; printf("latency average: %.3f ms\n" "latency stddev: %.3f ms\n", latency, 0.001 * sqrt(sqlat - 1000000.0 * latency * latency)); } else { /* only an average latency computed from the duration is available */ printf("latency average: %.3f ms\n", 1000.0 * duration * nclients / normal_xacts); } if (throttle_delay) { /* * Report average transaction lag under rate limit throttling. This * is the delay between scheduled and actual start times for the * transaction. The measured lag may be caused by thread/client load, * the database load, or the Poisson throttling process. */ printf("rate limit schedule lag: avg %.3f (max %.3f) ms\n", 0.001 * throttle_lag / normal_xacts, 0.001 * throttle_lag_max); } printf("tps = %f (including connections establishing)\n", tps_include); printf("tps = %f (excluding connections establishing)\n", tps_exclude); /* Report per-command latencies */ if (is_latencies) { int i; for (i = 0; i < num_files; i++) { Command** commands; if (num_files > 1) printf("statement latencies in milliseconds, file %d:\n", i + 1); else printf("statement latencies in milliseconds:\n"); for (commands = sql_files[i]; *commands != NULL; commands++) { Command* command = *commands; int cnum = command->command_num; double total_time; instr_time total_exec_elapsed; int total_exec_count; int t; /* Accumulate per-thread data for command */ INSTR_TIME_SET_ZERO(total_exec_elapsed); total_exec_count = 0; for (t = 0; t < nthreads; t++) { TState* thread = &threads[t]; INSTR_TIME_ADD(total_exec_elapsed, thread->exec_elapsed[cnum]); total_exec_count += thread->exec_count[cnum]; } if (total_exec_count > 0) { total_time = INSTR_TIME_GET_MILLISEC(total_exec_elapsed) / (double)total_exec_count; } else { total_time = 0.0; } printf("\t%f\t%s\n", total_time, command->line); } } } } int main(int argc, char** argv) { int c; int nclients = 1; /* default number of simulated clients */ int nthreads = 1; /* default number of threads */ int is_init_mode = 0; /* initialize mode? */ int is_no_vacuum = 0; /* no vacuum at all before testing? */ int do_vacuum_accounts = 0; /* do vacuum accounts before testing? */ int ttype = 0; /* transaction type. 0: TPC-B, 1: SELECT only, * 2: skip update of branches and tellers */ int optindex; char* filename = NULL; bool scale_given = false; CState* state = NULL; /* status of clients */ TState* threads = NULL; /* array of thread */ instr_time start_time; /* start up time */ instr_time total_time; instr_time conn_total_time; int total_xacts = 0; int64 total_latencies = 0; int64 total_sqlats = 0; int64 throttle_lag = 0; int64 throttle_lag_max = 0; int i; static struct option long_options[] = {{"index-tablespace", required_argument, NULL, 3}, {"tablespace", required_argument, NULL, 2}, {"unlogged-tables", no_argument, &unlogged_tables, 1}, {NULL, 0, NULL, 0}}; #ifdef HAVE_GETRLIMIT struct rlimit rlim; #endif PGconn* con = NULL; PGresult* res = NULL; char* env = NULL; char val[64]; const char* progname = NULL; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { usage(progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { puts("pgbench (PostgreSQL) " PG_VERSION); exit(0); } } #ifdef WIN32 /* stderr is buffered on Win32. */ setvbuf(stderr, NULL, _IONBF, 0); #endif if ((env = getenv("PGHOST")) != NULL && *env != '\0') pghost = env; if ((env = getenv("PGPORT")) != NULL && *env != '\0') pgport = env; else if ((env = getenv("PGUSER")) != NULL && *env != '\0') login = env; state = (CState*)xmalloc(sizeof(CState)); memset(state, 0, sizeof(CState)); #ifdef PGXC while ((c = getopt_long(argc, argv, "ih:mknvp:dSNc:j:Crs:t:T:U:lf:D:F:M:O:P:R:W:", long_options, &optindex)) != -1) #else while ((c = getopt_long(argc, argv, "ih:mnvp:dSNc:j:Crs:t:T:U:lf:D:F:M:P:R:W:", long_options, &optindex)) != -1) #endif { switch (c) { case 'i': is_init_mode++; break; #ifdef PGXC case 'k': use_branch = true; break; #endif case 'm': is_mot = true; break; case 'h': pghost = optarg; break; case 'n': is_no_vacuum++; break; case 'v': do_vacuum_accounts++; break; case 'p': pgport = optarg; break; case 'd': debug++; break; case 'S': ttype = 1; break; case 'N': ttype = 2; break; case 'c': nclients = atoi(optarg); #ifdef HAVE_POLL if (nclients <= 0) #else if (nclients <= 0 || nclients > MAXCLIENTS) #endif { fprintf(stderr, "invalid number of clients: %d\n", nclients); exit(1); } #ifdef HAVE_GETRLIMIT #ifdef RLIMIT_NOFILE /* most platforms use RLIMIT_NOFILE */ if (getrlimit(RLIMIT_NOFILE, &rlim) == -1) #else /* but BSD doesn't ... */ if (getrlimit(RLIMIT_OFILE, &rlim) == -1) #endif /* RLIMIT_NOFILE */ { fprintf(stderr, "getrlimit failed: %s\n", strerror(errno)); exit(1); } if ((int)rlim.rlim_cur <= (nclients + 2)) { fprintf(stderr, "You need at least %d open files but you are only allowed to use %ld.\n", nclients + 2, (long)rlim.rlim_cur); fprintf(stderr, "Use limit/ulimit to increase the limit before using pgbench.\n"); exit(1); } #endif /* HAVE_GETRLIMIT */ break; case 'j': /* jobs */ nthreads = atoi(optarg); if (nthreads <= 0) { fprintf(stderr, "invalid number of threads: %d\n", nthreads); exit(1); } break; case 'C': is_connect = true; break; case 'r': is_latencies = true; break; case 's': scale_given = true; scale = atoi(optarg); if (scale <= 0) { fprintf(stderr, "invalid scaling factor: %d\n", scale); exit(1); } break; case 't': if (duration > 0) { fprintf(stderr, "specify either a number of transactions (-t) or a duration (-T), not both.\n"); exit(1); } nxacts = atoi(optarg); if (nxacts <= 0) { fprintf(stderr, "invalid number of transactions: %d\n", nxacts); exit(1); } break; case 'T': if (nxacts > 0) { fprintf(stderr, "specify either a number of transactions (-t) or a duration (-T), not both.\n"); exit(1); } duration = atoi(optarg); if (duration <= 0) { fprintf(stderr, "invalid duration: %d\n", duration); exit(1); } break; case 'U': login = optarg; break; case 'W': secrete = optarg; break; case 'l': use_log = true; break; case 'f': ttype = 3; filename = optarg; if (process_file(filename) == false || *sql_files[num_files - 1] == NULL) exit(1); break; case 'D': { char* p = NULL; if ((p = strchr(optarg, '=')) == NULL || p == optarg || *(p + 1) == '\0') { fprintf(stderr, "invalid variable definition: %s\n", optarg); exit(1); } *p++ = '\0'; if (!putVariable(&state[0], "option", optarg, p)) exit(1); } break; case 'F': fillfactor = atoi(optarg); if (is_mot) { fprintf(stderr, "fillfactor is not supported with memory tables\n"); exit(1); } if ((fillfactor < 10) || (fillfactor > 100)) { fprintf(stderr, "invalid fillfactor: %d\n", fillfactor); exit(1); } break; case 'M': if (num_files > 0) { fprintf(stderr, "query mode (-M) should be specifiled before transaction scripts (-f)\n"); exit(1); } for (int iquerymode = 0; iquerymode < NUM_QUERYMODE; iquerymode++) { querymode = (QueryMode)iquerymode; if (strcmp(optarg, QUERYMODE[querymode]) == 0) break; } if (querymode >= NUM_QUERYMODE) { fprintf(stderr, "invalid query mode (-M): %s\n", optarg); exit(1); } break; #ifdef PGXC case 'O': orient = xstrdup(optarg); break; #endif case 'P': progress = atoi(optarg); if (progress <= 0) { fprintf(stderr, "thread progress delay (-P) must be positive (%s)\n", optarg); exit(1); } break; case 'R': { /* get a double from the beginning of option value */ double throttle_value = atof(optarg); if (throttle_value <= 0.0) { fprintf(stderr, "invalid rate limit: %s\n", optarg); exit(1); } /* Invert rate limit into a time offset */ throttle_delay = (int64)(1000000.0 / throttle_value); } break; case 0: /* This covers long options which take no argument. */ break; case 2: /* tablespace */ if (is_mot) { fprintf(stderr, "tablespace is not supported with memory tables\n"); exit(1); } tablespace = optarg; break; case 3: /* index-tablespace */ if (is_mot) { fprintf(stderr, "index_tablespace is not supported with memory tables\n"); exit(1); } index_tablespace = optarg; break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); break; } } /* compute a per thread delay */ throttle_delay *= nthreads; if (orient) { #define MIN_LEN(s, x) (((strlen(s) > x) ? x : strlen(s))) if (strncmp(orient, "row", MIN_LEN(orient, 3)) == 0) { with_options = xstrdup("orientation = row"); } else if (strncmp(orient, "column", MIN_LEN(orient, 6)) == 0) { with_options = xstrdup("orientation = column"); } else if (strncmp(orient, "orc", MIN_LEN(orient, 3)) == 0) { with_options = xstrdup("orientation = orc, version=0.12"); } else { fprintf( stderr, "Unknown orientation option: %s.\nTry \"%s --help\" for more information.\n", orient, progname); exit(1); } } if (argc > optind) dbName = argv[optind]; else { if ((env = getenv("PGDATABASE")) != NULL && *env != '\0') { dbName = env; } else if (login != NULL && *login != '\0') { dbName = login; } else { dbName = ""; } } if (is_init_mode) { init(); if (orient) { free(orient); orient = NULL; } if (with_options) { free(with_options); with_options = NULL; } exit(0); } /* Use DEFAULT_NXACTS if neither nxacts nor duration is specified. */ if (nxacts <= 0 && duration <= 0) nxacts = DEFAULT_NXACTS; if (nclients % nthreads != 0) { fprintf(stderr, "number of clients (%d) must be a multiple of number of threads (%d)\n", nclients, nthreads); exit(1); } /* * is_latencies only works with multiple threads in thread-based * implementations, not fork-based ones, because it supposes that the * parent can see changes made to the per-thread execution stats by child * threads. It seems useful enough to accept despite this limitation, but * perhaps we should FIXME someday (by passing the stats data back up * through the parent-to-child pipes). */ #ifndef ENABLE_THREAD_SAFETY if (is_latencies && nthreads > 1) { fprintf(stderr, "-r does not work with -j larger than 1 on this platform.\n"); exit(1); } #endif // Prepared protocol only prepares the statement once so reconnect for each // query submission won't work. // if (querymode == QUERY_PREPARED && is_connect) { fprintf(stderr, "-C does not work with -M prepared protocol.\n"); exit(1); } /* * save main process id in the global variable because process id will be * changed after fork. */ main_pid = (int)getpid(); progress_nclients = nclients; progress_nthreads = nthreads; if (nclients > 1) { state = (CState*)xrealloc(state, sizeof(CState) * nclients); memset(state + 1, 0, sizeof(CState) * (nclients - 1)); /* copy any -D switch values to all clients */ for (i = 1; i < nclients; i++) { int j; state[i].id = i; for (j = 0; j < state[0].nvariables; j++) { if (!putVariable(&state[i], "startup", state[0].variables[j].name, state[0].variables[j].value)) exit(1); } } } if (debug) { if (duration <= 0) printf( "pghost: %s pgport: %s nclients: %d nxacts: %d dbName: %s\n", pghost, pgport, nclients, nxacts, dbName); else printf("pghost: %s pgport: %s nclients: %d duration: %d dbName: %s\n", pghost, pgport, nclients, duration, dbName); } /* opening connection... */ con = doConnect(); if (con == NULL) exit(1); if (PQstatus(con) == CONNECTION_BAD) { fprintf(stderr, "Connection to database '%s' failed.\n", dbName); fprintf(stderr, "%s", PQerrorMessage(con)); exit(1); } if (ttype != 3) { /* * get the scaling factor that should be same as count(*) from * pgbench_branches if this is not a custom query */ res = PQexec(con, "select count(*) from pgbench_branches"); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "%s", PQerrorMessage(con)); exit(1); } scale = atoi(PQgetvalue(res, 0, 0)); if (scale < 0) { fprintf(stderr, "count(*) from pgbench_branches invalid (%d)\n", scale); exit(1); } PQclear(res); /* warn if we override user-given -s switch */ if (scale_given) fprintf(stderr, "Scale option ignored, using pgbench_branches table count = %d\n", scale); } /* * :scale variables normally get -s or database scale, but don't override * an explicit -D switch */ if (getVariable(&state[0], "scale") == NULL) { snprintf(val, sizeof(val), "%d", scale); for (i = 0; i < nclients; i++) { if (!putVariable(&state[i], "startup", "scale", val)) exit(1); } } if (!is_no_vacuum) { fprintf(stderr, "starting vacuum..."); executeStatement(con, "vacuum pgbench_branches"); executeStatement(con, "vacuum pgbench_tellers"); executeStatement(con, "truncate pgbench_history"); fprintf(stderr, "end.\n"); if (do_vacuum_accounts) { fprintf(stderr, "starting vacuum pgbench_accounts..."); executeStatement(con, "vacuum analyze pgbench_accounts"); fprintf(stderr, "end.\n"); } } PQfinish(con); /* set random seed */ INSTR_TIME_SET_CURRENT(start_time); srandom((unsigned int)INSTR_TIME_GET_MICROSEC(start_time)); /* process builtin SQL scripts */ switch (ttype) { case 0: #ifdef PGXC if (use_branch) sql_files[0] = process_builtin(tpc_b_bid); else #endif sql_files[0] = process_builtin(tpc_b); num_files = 1; break; case 1: sql_files[0] = process_builtin(select_only); num_files = 1; break; case 2: #ifdef PGXC if (use_branch) sql_files[0] = process_builtin(simple_update_bid); else #endif sql_files[0] = process_builtin(simple_update); num_files = 1; break; default: break; } /* set up thread data structures */ threads = (TState*)xmalloc(sizeof(TState) * nthreads); for (i = 0; i < nthreads; i++) { TState* thread = &threads[i]; thread->tid = i; thread->state = &state[nclients / nthreads * i]; thread->nstate = nclients / nthreads; thread->random_state[0] = random(); thread->random_state[1] = random(); thread->random_state[2] = random(); if (is_latencies) { /* Reserve memory for the thread to store per-command latencies */ int t; thread->exec_elapsed = (instr_time*)xmalloc(sizeof(instr_time) * num_commands); thread->exec_count = (int*)xmalloc(sizeof(int) * num_commands); for (t = 0; t < num_commands; t++) { INSTR_TIME_SET_ZERO(thread->exec_elapsed[t]); thread->exec_count[t] = 0; } } else { thread->exec_elapsed = NULL; thread->exec_count = NULL; } } /* get start up time */ INSTR_TIME_SET_CURRENT(start_time); /* set alarm if duration is specified. */ if (duration > 0) setalarm(duration); /* start threads */ for (i = 0; i < nthreads; i++) { TState* thread = &threads[i]; INSTR_TIME_SET_CURRENT(thread->start_time); /* the first thread (i = 0) is executed by main thread */ if (i > 0) { int err = pthread_create(&thread->thread, NULL, threadRun, thread); if (err != 0 || thread->thread == INVALID_THREAD) { fprintf(stderr, "cannot create thread: %s\n", strerror(err)); exit(1); } } else { thread->thread = INVALID_THREAD; } } /* wait for threads and accumulate results */ total_xacts = 0; INSTR_TIME_SET_ZERO(conn_total_time); for (i = 0; i < nthreads; i++) { void* ret = NULL; if (threads[i].thread == INVALID_THREAD) ret = threadRun(&threads[i]); else pthread_join(threads[i].thread, &ret); if (ret != NULL) { TResult* r = (TResult*)ret; total_xacts += r->xacts; total_latencies += r->latencies; total_sqlats += r->sqlats; throttle_lag += r->throttle_lag; if (r->throttle_lag_max > throttle_lag_max) throttle_lag_max = r->throttle_lag_max; INSTR_TIME_ADD(conn_total_time, r->conn_time); free(ret); } } disconnect_all(state, nclients); /* get end time */ INSTR_TIME_SET_CURRENT(total_time); INSTR_TIME_SUBTRACT(total_time, start_time); printResults(ttype, total_xacts, nclients, threads, nthreads, total_time, conn_total_time, total_latencies, total_sqlats, throttle_lag, throttle_lag_max); return 0; } static void* threadRun(void* arg) { TState* thread = (TState*)arg; CState* state = thread->state; TResult* result = NULL; FILE* logfile = NULL; /* per-thread log file */ instr_time start, end; int nstate = thread->nstate; int remains = nstate; /* number of remaining clients */ int i, j; /* for reporting progress: */ int64 thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time); int64 last_report = thread_start; int64 next_report = last_report + (int64)progress * 1000000; int64 last_count = 0, last_lats = 0, last_sqlats = 0, last_lags = 0; #ifdef HAVE_POLL struct pollfd* ufds = (pollfd*)xmalloc(nstate * sizeof(pollfd)); /*nstate = nclients /nthreads */ int nfds = 0; /*the count number of fd */ #else fd_set input_mask; int maxsock = -1; /* max socket number to be waited, initialized as -1 */ #endif /* * Initialize throttling rate target for all of the thread's clients. It * might be a little more accurate to reset thread->start_time here too. * The possible drift seems too small relative to typical throttle delay * times to worry about it. */ INSTR_TIME_SET_CURRENT(start); thread->throttle_trigger = INSTR_TIME_GET_MICROSEC(start); thread->throttle_lag = 0; thread->throttle_lag_max = 0; result = (TResult*)xmalloc(sizeof(TResult)); INSTR_TIME_SET_ZERO(result->conn_time); /* open log file if requested */ if (use_log) { char logpath[64]; if (thread->tid == 0) snprintf(logpath, sizeof(logpath), "pgbench_log.%d", main_pid); else snprintf(logpath, sizeof(logpath), "pgbench_log.%d.%d", main_pid, thread->tid); logfile = fopen(logpath, "w"); if (logfile == NULL) { fprintf(stderr, "Couldn't open logfile \"%s\": %s", logpath, strerror(errno)); goto done; } } if (!is_connect) { /* make connections to the database */ for (i = 0; i < nstate; i++) { if ((state[i].con = doConnect()) == NULL) goto done; } } /* time after thread and connections set up */ INSTR_TIME_SET_CURRENT(result->conn_time); INSTR_TIME_SUBTRACT(result->conn_time, thread->start_time); /* send start up queries in async manner */ for (i = 0; i < nstate; i++) { CState* st = &state[i]; Command** commands = sql_files[st->use_file]; int prev_ecnt = st->ecnt; st->use_file = getrand(thread, 0, num_files - 1); if (!doCustom(thread, st, &result->conn_time, logfile)) remains--; /* I've aborted */ if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND) { fprintf(stderr, "Client %d aborted in state %d. Execution meta-command failed.\n", i, st->state); remains--; /* I've aborted */ PQfinish(st->con); st->con = NULL; } } while (remains > 0) { #ifdef HAVE_POLL nfds = 0; /*the count number of fd */ memset(ufds, 0, nstate * sizeof(pollfd)); #else FD_ZERO(&input_mask); maxsock = -1; /* max socket number to be waited, initialized as -1 */ #endif int64 now_usec = 0; int64 min_usec = INT64_MAX; for (i = 0; i < nstate; i++) { CState* st = &state[i]; Command** commands = sql_files[st->use_file]; int sock; if (st->con == NULL) { continue; } else if (st->sleeping) { if (st->throttling && timer_exceeded) { /* interrupt client which has not started a transaction */ remains--; st->sleeping = 0; st->throttling = false; PQfinish(st->con); st->con = NULL; continue; } else /* just a nap from the script */ { int this_usec; if (min_usec == INT64_MAX) { instr_time now; INSTR_TIME_SET_CURRENT(now); now_usec = INSTR_TIME_GET_MICROSEC(now); } this_usec = st->until - now_usec; if (min_usec > this_usec) min_usec = this_usec; } } else if (commands[st->state]->type == META_COMMAND) { min_usec = 0; /* the connection is ready to run */ break; } sock = PQsocket(st->con); if (sock < 0) { fprintf(stderr, "bad socket: %s\n", strerror(errno)); goto done; } #ifdef HAVE_POLL ufds[nfds].fd = sock; ufds[nfds].events = POLLIN | POLLPRI | POLLRDHUP | POLLERR | POLLHUP; ++nfds; #else FD_SET(sock, &input_mask); if (maxsock < sock) maxsock = sock; #endif } #ifdef HAVE_POLL if (min_usec > 0 && nfds > 0) { int nsocks; /* return from select(2) */ if (min_usec != INT64_MAX) { nsocks = poll(ufds, nfds, min_usec / 1000); } else { nsocks = poll(ufds, nfds, -1); /*wait for ever, until some events happend in the sockets*/ } if (nsocks < 0) { if (errno == EINTR) continue; /* must be something wrong */ fprintf(stderr, "poll failed: %s\n", strerror(errno)); goto done; } } for (i = 0; i < nfds; i++) { CState* st = NULL; for (j = 0; j < nstate; j++) { st = &state[j]; if (ufds[i].fd == PQsocket(st->con)) break; } Command** commands = sql_files[st->use_file]; int prev_ecnt = st->ecnt; if (st->con && (ufds[i].revents & (POLLIN | POLLPRI | POLLHUP) || commands[st->state]->type == META_COMMAND)) { if (!doCustom(thread, st, &result->conn_time, logfile)) remains--; /* I've aborted */ ufds[i].revents = 0; } if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND) { fprintf(stderr, "Client %d aborted in state %d. Execution of meta-command failed.\n", i, st->state); remains--; /* I've aborted */ PQfinish(st->con); st->con = NULL; } } #else if (min_usec > 0 && maxsock != -1) { int nsocks; /* return from select(2) */ if (min_usec != INT64_MAX) { struct timeval timeout; timeout.tv_sec = min_usec / 1000000; timeout.tv_usec = min_usec % 1000000; nsocks = select(maxsock + 1, &input_mask, NULL, NULL, &timeout); } else { nsocks = select(maxsock + 1, &input_mask, NULL, NULL, NULL); } if (nsocks < 0) { if (errno == EINTR) continue; /* must be something wrong */ fprintf(stderr, "select failed: %s\n", strerror(errno)); goto done; } } /* ok, backend returns reply */ for (i = 0; i < nstate; i++) { CState* st = &state[i]; Command** commands = sql_files[st->use_file]; int prev_ecnt = st->ecnt; if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask) || commands[st->state]->type == META_COMMAND)) { if (!doCustom(thread, st, &result->conn_time, logfile)) remains--; /* I've aborted */ } if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND) { fprintf(stderr, "Client %d aborted in state %d. Execution of meta-command failed.\n", i, st->state); remains--; /* I've aborted */ PQfinish(st->con); st->con = NULL; } } #endif #ifdef PTHREAD_FORK_EMULATION /* each process reports its own progression */ if (progress) { instr_time now_time; int64 now; INSTR_TIME_SET_CURRENT(now_time); now = INSTR_TIME_GET_MICROSEC(now_time); if (now >= next_report) { /* generate and show report */ int64 count = 0, lats = 0, sqlats = 0; int64 lags = thread->throttle_lag; int64 run = now - last_report; double tps, total_run, latency, sqlat, stdev, lag; for (i = 0; i < nstate; i++) { count += state[i].cnt; lats += state[i].txn_latencies; sqlats += state[i].txn_sqlats; } total_run = (now - thread_start) / 1000000.0; tps = 1000000.0 * (count - last_count) / run; latency = 0.001 * (lats - last_lats) / (count - last_count); sqlat = 1.0 * (sqlats - last_sqlats) / (count - last_count); stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency); lag = 0.001 * (lags - last_lags) / (count - last_count); if (throttle_delay) fprintf(stderr, "progress %d: %.1f s, %.1f tps, " "lat %.3f ms stddev %.3f, lag %.3f ms\n", thread->tid, total_run, tps, latency, stdev, lag); else fprintf(stderr, "progress %d: %.1f s, %.1f tps, " "lat %.3f ms stddev %.3f\n", thread->tid, total_run, tps, latency, stdev); last_count = count; last_lats = lats; last_sqlats = sqlats; last_lags = lags; last_report = now; next_report += progress * 1000000; } } #else /* progress report by thread 0 for all threads */ if (progress && thread->tid == 0) { instr_time now_time; int64 now; INSTR_TIME_SET_CURRENT(now_time); now = INSTR_TIME_GET_MICROSEC(now_time); if (now >= next_report) { /* generate and show report */ int64 count = 0, lats = 0, sqlats = 0, lags = 0; int64 run = now - last_report; double tps, total_run, latency, sqlat, lag, stdev; for (i = 0; i < progress_nclients; i++) { count += state[i].cnt; lats += state[i].txn_latencies; sqlats += state[i].txn_sqlats; } for (i = 0; i < progress_nthreads; i++) lags += thread[i].throttle_lag; total_run = (now - thread_start) / 1000000.0; tps = 1000000.0 * (count - last_count) / run; latency = 0.001 * (lats - last_lats) / (count - last_count); sqlat = 1.0 * (sqlats - last_sqlats) / (count - last_count); stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency); lag = 0.001 * (lags - last_lags) / (count - last_count); if (throttle_delay) fprintf(stderr, "progress: %.1f s, %.1f tps, " "lat %.3f ms stddev %.3f, lag %.3f ms\n", total_run, tps, latency, stdev, lag); else fprintf(stderr, "progress: %.1f s, %.1f tps, " "lat %.3f ms stddev %.3f\n", total_run, tps, latency, stdev); last_count = count; last_lats = lats; last_sqlats = sqlats; last_lags = lags; last_report = now; next_report += progress * 1000000; } } #endif /* PTHREAD_FORK_EMULATION */ } done: #ifdef HAVE_POLL free(ufds); #endif INSTR_TIME_SET_CURRENT(start); disconnect_all(state, nstate); result->xacts = 0; result->latencies = 0; result->sqlats = 0; for (i = 0; i < nstate; i++) { result->xacts += state[i].cnt; result->latencies += state[i].txn_latencies; result->sqlats += state[i].txn_sqlats; } result->throttle_lag = thread->throttle_lag; result->throttle_lag_max = thread->throttle_lag_max; INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(result->conn_time, end, start); if (logfile) fclose(logfile); return result; } /* * Support for duration option: set timer_exceeded after so many seconds. */ #ifndef WIN32 static void handle_sig_alarm(SIGNAL_ARGS) { timer_exceeded = true; } static void setalarm(int seconds) { pqsignal(SIGALRM, handle_sig_alarm); alarm(seconds); } #ifndef ENABLE_THREAD_SAFETY /* * implements pthread using fork. */ typedef struct fork_pthread { pid_t pid; int pipes[2]; } fork_pthread; static int pthread_create(pthread_t* thread, pthread_attr_t* attr, void* (*start_routine)(void*), void* arg) { fork_pthread* th = NULL; void* ret = NULL; th = (fork_pthread*)xmalloc(sizeof(fork_pthread)); if (pipe(th->pipes) < 0) { free(th); return errno; } th->pid = fork(); if (th->pid == -1) /* error */ { free(th); return errno; } if (th->pid != 0) /* in parent process */ { close(th->pipes[1]); *thread = th; return 0; } /* in child process */ close(th->pipes[0]); /* set alarm again because the child does not inherit timers */ if (duration > 0) setalarm(duration); ret = start_routine(arg); write(th->pipes[1], ret, sizeof(TResult)); close(th->pipes[1]); free(th); exit(0); } static int pthread_join(pthread_t th, void** thread_return) { int status; while (waitpid(th->pid, &status, 0) != th->pid) { if (errno != EINTR) return errno; } if (thread_return != NULL) { /* assume result is TResult */ *thread_return = xmalloc(sizeof(TResult)); if (read(th->pipes[0], *thread_return, sizeof(TResult)) != sizeof(TResult)) { free(*thread_return); *thread_return = NULL; } } close(th->pipes[0]); free(th); return 0; } #endif #else /* WIN32 */ static VOID CALLBACK win32_timer_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) { timer_exceeded = true; } static void setalarm(int seconds) { HANDLE queue; HANDLE timer; /* This function will be called at most once, so we can cheat a bit. */ queue = CreateTimerQueue(); if (seconds > ((DWORD)-1) / 1000 || !CreateTimerQueueTimer(&timer, queue, win32_timer_callback, NULL, seconds * 1000, 0, WT_EXECUTEINTIMERTHREAD | WT_EXECUTEONLYONCE)) { fprintf(stderr, "Failed to set timer\n"); exit(1); } } /* partial pthread implementation for Windows */ typedef struct win32_pthread { HANDLE handle; void* (*routine)(void*); void* arg; void* result; } win32_pthread; static unsigned __stdcall win32_pthread_run(void* arg) { win32_pthread* th = (win32_pthread*)arg; th->result = th->routine(th->arg); return 0; } static int pthread_create(pthread_t* thread, pthread_attr_t* attr, void* (*start_routine)(void*), void* arg) { int save_errno; win32_pthread* th = NULL; th = (win32_pthread*)xmalloc(sizeof(win32_pthread)); th->routine = start_routine; th->arg = arg; th->result = NULL; th->handle = (HANDLE)_beginthreadex(NULL, 0, win32_pthread_run, th, 0, NULL); if (th->handle == NULL) { save_errno = errno; free(th); return save_errno; } *thread = th; return 0; } static int pthread_join(pthread_t th, void** thread_return) { if (th == NULL || th->handle == NULL) return errno = EINVAL; if (WaitForSingleObject(th->handle, INFINITE) != WAIT_OBJECT_0) { _dosmaperr(GetLastError()); return errno; } if (thread_return) *thread_return = th->result; CloseHandle(th->handle); free(th); return 0; } #endif /* WIN32 */