Merge branch '2.2' into develop
This commit is contained in:
@ -58,7 +58,7 @@ int atomic_load_int(const int *variable)
|
||||
#ifdef MXS_USE_ATOMIC_BUILTINS
|
||||
return __atomic_load_n(variable, __ATOMIC_SEQ_CST);
|
||||
#else
|
||||
return __sync_fetch_and_or(variable, 0);
|
||||
return __sync_fetch_and_or((int*)variable, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -67,7 +67,7 @@ int32_t atomic_load_int32(const int32_t *variable)
|
||||
#ifdef MXS_USE_ATOMIC_BUILTINS
|
||||
return __atomic_load_n(variable, __ATOMIC_SEQ_CST);
|
||||
#else
|
||||
return __sync_fetch_and_or(variable, 0);
|
||||
return __sync_fetch_and_or((int32_t*)variable, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int64_t atomic_load_int64(const int64_t *variable)
|
||||
#ifdef MXS_USE_ATOMIC_BUILTINS
|
||||
return __atomic_load_n(variable, __ATOMIC_SEQ_CST);
|
||||
#else
|
||||
return __sync_fetch_and_or(variable, 0);
|
||||
return __sync_fetch_and_or((int64_t*)variable, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -85,7 +85,7 @@ uint32_t atomic_load_uint32(const uint32_t *variable)
|
||||
#ifdef MXS_USE_ATOMIC_BUILTINS
|
||||
return __atomic_load_n(variable, __ATOMIC_SEQ_CST);
|
||||
#else
|
||||
return __sync_fetch_and_or(variable, 0);
|
||||
return __sync_fetch_and_or((uint32_t*)variable, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -94,7 +94,7 @@ uint64_t atomic_load_uint64(const uint64_t *variable)
|
||||
#ifdef MXS_USE_ATOMIC_BUILTINS
|
||||
return __atomic_load_n(variable, __ATOMIC_SEQ_CST);
|
||||
#else
|
||||
return __sync_fetch_and_or(variable, 0);
|
||||
return __sync_fetch_and_or((uint64_t*)variable, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -146,6 +146,7 @@ const char CN_TYPE[] = "type";
|
||||
const char CN_UNIX[] = "unix";
|
||||
const char CN_USER[] = "user";
|
||||
const char CN_USERS[] = "users";
|
||||
const char CN_USERS_REFRESH_TIME[] = "users_refresh_time";
|
||||
const char CN_VERSION_STRING[] = "version_string";
|
||||
const char CN_WEIGHTBY[] = "weightby";
|
||||
const char CN_SESSION_TRACK_TRX_STATE[] = "session_track_trx_state";
|
||||
@ -1668,6 +1669,44 @@ handle_global_item(const char *name, const char *value)
|
||||
{
|
||||
gateway.local_address = MXS_STRDUP_A(value);
|
||||
}
|
||||
else if (strcmp(name, CN_USERS_REFRESH_TIME) == 0)
|
||||
{
|
||||
char* endptr;
|
||||
long users_refresh_time = strtol(value, &endptr, 0);
|
||||
if (*endptr == '\0')
|
||||
{
|
||||
if (users_refresh_time < 0)
|
||||
{
|
||||
MXS_NOTICE("Value of '%s' is less than 0, users will "
|
||||
"not be automatically refreshed.", CN_USERS_REFRESH_TIME);
|
||||
// Strictly speaking they will be refreshed once every 68 years,
|
||||
// but I just don't beleave the uptime will be that long.
|
||||
users_refresh_time = INT32_MAX;
|
||||
}
|
||||
else if (users_refresh_time < USERS_REFRESH_TIME_MIN)
|
||||
{
|
||||
MXS_WARNING("%s is less than the allowed minimum value of %d for the "
|
||||
"configuration option '%s', using the minimum value.",
|
||||
value, USERS_REFRESH_TIME_MIN, CN_USERS_REFRESH_TIME);
|
||||
users_refresh_time = USERS_REFRESH_TIME_MIN;
|
||||
}
|
||||
|
||||
if (users_refresh_time > INT32_MAX)
|
||||
{
|
||||
// To ensure that there will be no overflows when
|
||||
// we later do arithmetic.
|
||||
users_refresh_time = INT32_MAX;
|
||||
}
|
||||
|
||||
gateway.users_refresh_time = users_refresh_time;
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_ERROR("%s is an invalid value for '%s', using default %d instead.",
|
||||
value, CN_USERS_REFRESH_TIME, USERS_REFRESH_TIME_DEFAULT);
|
||||
gateway.users_refresh_time = USERS_REFRESH_TIME_DEFAULT;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; lognames[i].name; i++)
|
||||
|
||||
@ -515,15 +515,15 @@ monitorShow(DCB *dcb, MXS_MONITOR *monitor)
|
||||
break;
|
||||
}
|
||||
|
||||
dcb_printf(dcb, "Monitor: %p\n", monitor);
|
||||
dcb_printf(dcb, "Name: %s\n", monitor->name);
|
||||
dcb_printf(dcb, "State: %s\n", state);
|
||||
dcb_printf(dcb, "Sampling interval: %lu milliseconds\n", monitor->interval);
|
||||
dcb_printf(dcb, "Connect Timeout: %i seconds\n", monitor->connect_timeout);
|
||||
dcb_printf(dcb, "Read Timeout: %i seconds\n", monitor->read_timeout);
|
||||
dcb_printf(dcb, "Write Timeout: %i seconds\n", monitor->write_timeout);
|
||||
dcb_printf(dcb, "Connect attempts: %i \n", monitor->connect_attempts);
|
||||
dcb_printf(dcb, "Monitored servers: ");
|
||||
dcb_printf(dcb, "Monitor: %p\n", monitor);
|
||||
dcb_printf(dcb, "Name: %s\n", monitor->name);
|
||||
dcb_printf(dcb, "State: %s\n", state);
|
||||
dcb_printf(dcb, "Sampling interval: %lu milliseconds\n", monitor->interval);
|
||||
dcb_printf(dcb, "Connect Timeout: %i seconds\n", monitor->connect_timeout);
|
||||
dcb_printf(dcb, "Read Timeout: %i seconds\n", monitor->read_timeout);
|
||||
dcb_printf(dcb, "Write Timeout: %i seconds\n", monitor->write_timeout);
|
||||
dcb_printf(dcb, "Connect attempts: %i \n", monitor->connect_attempts);
|
||||
dcb_printf(dcb, "Monitored servers: ");
|
||||
|
||||
const char *sep = "";
|
||||
|
||||
@ -543,12 +543,12 @@ monitorShow(DCB *dcb, MXS_MONITOR *monitor)
|
||||
}
|
||||
else
|
||||
{
|
||||
dcb_printf(dcb, "\t(no diagnostics)\n");
|
||||
dcb_printf(dcb, " (no diagnostics)\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dcb_printf(dcb, "\tMonitor failed\n");
|
||||
dcb_printf(dcb, " Monitor failed\n");
|
||||
}
|
||||
dcb_printf(dcb, "\n");
|
||||
}
|
||||
@ -2444,41 +2444,41 @@ MXS_MONITORED_SERVER* mon_get_monitored_server(const MXS_MONITOR* mon, SERVER* s
|
||||
int mon_config_get_servers(const MXS_CONFIG_PARAMETER* params, const char* key, const MXS_MONITOR* mon,
|
||||
MXS_MONITORED_SERVER*** monitored_servers_out)
|
||||
{
|
||||
ss_dassert(*monitored_servers_out == NULL);
|
||||
ss_dassert(monitored_servers_out != NULL && *monitored_servers_out == NULL);
|
||||
SERVER** servers = NULL;
|
||||
int servers_size = config_get_server_list(params, key, &servers);
|
||||
int rval = 0;
|
||||
int found = 0;
|
||||
// All servers in the array must be monitored by the given monitor.
|
||||
if (servers_size > 0)
|
||||
{
|
||||
MXS_MONITORED_SERVER** monitored_array =
|
||||
(MXS_MONITORED_SERVER**)MXS_CALLOC(servers_size, sizeof(MXS_MONITORED_SERVER*));
|
||||
bool error = false;
|
||||
for (int i = 0; i < servers_size && !error; i++)
|
||||
for (int i = 0; i < servers_size; i++)
|
||||
{
|
||||
MXS_MONITORED_SERVER* mon_serv = mon_get_monitored_server(mon, servers[i]);
|
||||
if (mon_serv != NULL)
|
||||
{
|
||||
monitored_array[i] = mon_serv;
|
||||
monitored_array[found++] = mon_serv;
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_ERROR("Server '%s' is not monitored by monitor '%s'.", servers[i]->unique_name, mon->name);
|
||||
error = true;
|
||||
MXS_WARNING("Server '%s' is not monitored by monitor '%s'.",
|
||||
servers[i]->unique_name, mon->name);
|
||||
}
|
||||
}
|
||||
MXS_FREE(servers);
|
||||
|
||||
if (error)
|
||||
ss_dassert(found <= servers_size);
|
||||
if (found == 0)
|
||||
{
|
||||
MXS_FREE(monitored_array);
|
||||
rval = -1;
|
||||
monitored_array = NULL;
|
||||
}
|
||||
else
|
||||
else if (found < servers_size)
|
||||
{
|
||||
*monitored_servers_out = monitored_array;
|
||||
rval = servers_size;
|
||||
monitored_array = (MXS_MONITORED_SERVER**)MXS_REALLOC(monitored_array, found);
|
||||
}
|
||||
*monitored_servers_out = monitored_array;
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
@ -335,6 +335,34 @@ serviceStartPort(SERVICE *service, SERV_LISTENER *port)
|
||||
}
|
||||
}
|
||||
|
||||
MXS_CONFIG* config = config_get_global_options();
|
||||
time_t last;
|
||||
bool warned;
|
||||
|
||||
/**
|
||||
* At service start last update is set to config->users_refresh_time seconds earlier.
|
||||
* This way MaxScale could try reloading users just after startup. But only if user
|
||||
* refreshing has not been turned off.
|
||||
*/
|
||||
if (config->users_refresh_time == INT32_MAX)
|
||||
{
|
||||
last = time(NULL);
|
||||
warned = true; // So that there will not be a refresh rate warning.
|
||||
}
|
||||
else
|
||||
{
|
||||
last = time(NULL) - config->users_refresh_time;
|
||||
warned = false;
|
||||
}
|
||||
|
||||
int nthreads = config_threadcount();
|
||||
|
||||
for (int i = 0; i < nthreads; ++i)
|
||||
{
|
||||
service->rate_limits[i].last = last;
|
||||
service->rate_limits[i].warned = warned;
|
||||
}
|
||||
|
||||
if (port->listener->func.listen(port->listener, config_bind))
|
||||
{
|
||||
port->listener->session = session_alloc(service, port->listener);
|
||||
@ -1629,22 +1657,24 @@ int service_refresh_users(SERVICE *service)
|
||||
self = 0;
|
||||
}
|
||||
|
||||
MXS_CONFIG* config = config_get_global_options();
|
||||
|
||||
/* Check if refresh rate limit has been exceeded */
|
||||
if ((now < service->rate_limits[self].last + USERS_REFRESH_TIME) ||
|
||||
(service->rate_limits[self].nloads >= USERS_REFRESH_MAX_PER_TIME))
|
||||
if (now < service->rate_limits[self].last + config->users_refresh_time)
|
||||
{
|
||||
MXS_ERROR("[%s] Refresh rate limit exceeded for load of users' table.", service->name);
|
||||
if (!service->rate_limits[self].warned)
|
||||
{
|
||||
MXS_WARNING("[%s] Refresh rate limit (once every %ld seconds) exceeded for "
|
||||
"load of users' table.",
|
||||
service->name, config->users_refresh_time);
|
||||
service->rate_limits[self].warned = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
service->rate_limits[self].nloads++;
|
||||
service->rate_limits[self].last = now;
|
||||
service->rate_limits[self].warned = false;
|
||||
|
||||
/** If we have reached the limit on users refreshes, reset refresh time and count */
|
||||
if (service->rate_limits[self].nloads >= USERS_REFRESH_MAX_PER_TIME)
|
||||
{
|
||||
service->rate_limits[self].nloads = 0;
|
||||
service->rate_limits[self].last = now;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
LISTENER_ITERATOR iter;
|
||||
|
||||
@ -703,8 +703,8 @@ static bool get_hostname(DCB *dcb, char *client_hostname, size_t size)
|
||||
|
||||
if (lookup_result != 0 && lookup_result != EAI_NONAME)
|
||||
{
|
||||
MXS_ERROR("Client hostname lookup failed for '%s', getnameinfo() returned: '%s'.",
|
||||
dcb->remote, gai_strerror(lookup_result));
|
||||
MXS_WARNING("Client hostname lookup failed for '%s', getnameinfo() returned: '%s'.",
|
||||
dcb->remote, gai_strerror(lookup_result));
|
||||
}
|
||||
|
||||
return lookup_result == 0;
|
||||
|
||||
@ -129,6 +129,7 @@ static bool wait_cluster_stabilization(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER*
|
||||
const ServerVector& slaves, int seconds_remaining);
|
||||
static string get_connection_errors(const ServerVector& servers);
|
||||
static int64_t scan_server_id(const char* id_string);
|
||||
static string generate_change_master_cmd(MYSQL_MONITOR* mon, const string& master_host, int master_port);
|
||||
|
||||
static bool report_version_err = true;
|
||||
static const char* hb_table_name = "maxscale_schema.replication_heartbeat";
|
||||
@ -160,6 +161,9 @@ static const char CN_REPLICATION_PASSWORD[] = "replication_password";
|
||||
/** Server id default value */
|
||||
static const int64_t SERVER_ID_UNKNOWN = -1;
|
||||
|
||||
/** Default port */
|
||||
static const int PORT_UNKNOWN = 0;
|
||||
|
||||
class Gtid
|
||||
{
|
||||
public:
|
||||
@ -1056,6 +1060,8 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
handle->id = config_get_global_options()->id;
|
||||
handle->warn_set_standalone_master = true;
|
||||
handle->master_gtid_domain = -1;
|
||||
handle->external_master_host[0] = '\0';
|
||||
handle->external_master_port = PORT_UNKNOWN;
|
||||
handle->monitor = monitor;
|
||||
}
|
||||
|
||||
@ -1170,6 +1176,21 @@ static bool stop_monitor(MXS_MONITOR* mon)
|
||||
return actually_stopped;
|
||||
}
|
||||
|
||||
static string monitored_servers_to_string(MXS_MONITORED_SERVER** array, size_t array_size)
|
||||
{
|
||||
string rval;
|
||||
if (array_size > 0)
|
||||
{
|
||||
const char* separator = "";
|
||||
for (size_t i = 0; i < array_size; i++)
|
||||
{
|
||||
rval += separator;
|
||||
rval += array[i]->server->unique_name;
|
||||
separator = ",";
|
||||
}
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
/**
|
||||
* Daignostic interface
|
||||
*
|
||||
@ -1180,35 +1201,57 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
|
||||
{
|
||||
const MYSQL_MONITOR *handle = (const MYSQL_MONITOR *)mon->handle;
|
||||
|
||||
dcb_printf(dcb, "Automatic failover:\t%s\n", handle->auto_failover ? "Enabled" : "Disabled");
|
||||
dcb_printf(dcb, "Failcount:\t\t%d\n", handle->failcount);
|
||||
dcb_printf(dcb, "Failover Timeout:\t%u\n", handle->failover_timeout);
|
||||
dcb_printf(dcb, "Switchover Timeout:\t%u\n", handle->switchover_timeout);
|
||||
dcb_printf(dcb, "Auto rejoin:\t\t%s\n", handle->auto_rejoin ? "Enabled" : "Disabled");
|
||||
dcb_printf(dcb, "MaxScale MonitorId:\t%lu\n", handle->id);
|
||||
dcb_printf(dcb, "Replication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled");
|
||||
dcb_printf(dcb, "Detect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled");
|
||||
dcb_printf(dcb, "Server information\n\n");
|
||||
dcb_printf(dcb, "Automatic failover: %s\n", handle->auto_failover ? "Enabled" : "Disabled");
|
||||
dcb_printf(dcb, "Failcount: %d\n", handle->failcount);
|
||||
dcb_printf(dcb, "Failover timeout: %u\n", handle->failover_timeout);
|
||||
dcb_printf(dcb, "Switchover timeout: %u\n", handle->switchover_timeout);
|
||||
dcb_printf(dcb, "Automatic rejoin: %s\n", handle->auto_rejoin ? "Enabled" : "Disabled");
|
||||
dcb_printf(dcb, "MaxScale monitor ID: %lu\n", handle->id);
|
||||
dcb_printf(dcb, "Detect replication lag: %s\n", (handle->replicationHeartbeat == 1) ?
|
||||
"Enabled" : "Disabled");
|
||||
dcb_printf(dcb, "Detect stale master: %s\n", (handle->detectStaleMaster == 1) ?
|
||||
"Enabled" : "Disabled");
|
||||
if (handle->n_excluded > 0)
|
||||
{
|
||||
dcb_printf(dcb, "Non-promotable servers (failover): ");
|
||||
dcb_printf(dcb, "%s\n",
|
||||
monitored_servers_to_string(handle->excluded_servers, handle->n_excluded).c_str());
|
||||
}
|
||||
|
||||
dcb_printf(dcb, "\nServer information:\n-------------------\n\n");
|
||||
for (MXS_MONITORED_SERVER *db = mon->monitored_servers; db; db = db->next)
|
||||
{
|
||||
MySqlServerInfo *serv_info = get_server_info(handle, db);
|
||||
dcb_printf(dcb, "Server: %s\n", db->server->unique_name);
|
||||
dcb_printf(dcb, "Server ID: %" PRId64 "\n", serv_info->server_id);
|
||||
dcb_printf(dcb, "Read only: %s\n", serv_info->read_only ? "ON" : "OFF");
|
||||
dcb_printf(dcb, "Slave configured: %s\n", serv_info->slave_configured ? "YES" : "NO");
|
||||
dcb_printf(dcb, "Slave IO running: %s\n", serv_info->slave_status.slave_io_running ? "YES" : "NO");
|
||||
dcb_printf(dcb, "Slave SQL running: %s\n", serv_info->slave_status.slave_sql_running ? "YES" : "NO");
|
||||
dcb_printf(dcb, "Master ID: %" PRId64 "\n", serv_info->slave_status.master_server_id);
|
||||
dcb_printf(dcb, "Master binlog file: %s\n", serv_info->slave_status.master_log_file.c_str());
|
||||
dcb_printf(dcb, "Master binlog position: %lu\n", serv_info->slave_status.read_master_log_pos);
|
||||
dcb_printf(dcb, "Server: %s\n", db->server->unique_name);
|
||||
dcb_printf(dcb, "Server ID: %" PRId64 "\n", serv_info->server_id);
|
||||
dcb_printf(dcb, "Read only: %s\n", serv_info->read_only ? "YES" : "NO");
|
||||
dcb_printf(dcb, "Slave configured: %s\n", serv_info->slave_configured ? "YES" : "NO");
|
||||
if (serv_info->slave_configured)
|
||||
{
|
||||
dcb_printf(dcb, "Slave IO running: %s\n", serv_info->slave_status.slave_io_running ? "YES" : "NO");
|
||||
dcb_printf(dcb, "Slave SQL running: %s\n", serv_info->slave_status.slave_sql_running ? "YES" : "NO");
|
||||
dcb_printf(dcb, "Master ID: %" PRId64 "\n", serv_info->slave_status.master_server_id);
|
||||
dcb_printf(dcb, "Master binlog file: %s\n", serv_info->slave_status.master_log_file.c_str());
|
||||
dcb_printf(dcb, "Master binlog position: %lu\n", serv_info->slave_status.read_master_log_pos);
|
||||
}
|
||||
if (serv_info->gtid_current_pos.server_id != SERVER_ID_UNKNOWN)
|
||||
{
|
||||
dcb_printf(dcb, "Gtid current position: %s\n",
|
||||
serv_info->gtid_current_pos.to_string().c_str());
|
||||
}
|
||||
if (serv_info->gtid_binlog_pos.server_id != SERVER_ID_UNKNOWN)
|
||||
{
|
||||
dcb_printf(dcb, "Gtid binlog position: %s\n",
|
||||
serv_info->gtid_current_pos.to_string().c_str());
|
||||
}
|
||||
if (serv_info->slave_status.gtid_io_pos.server_id != SERVER_ID_UNKNOWN)
|
||||
{
|
||||
dcb_printf(dcb, "Gtid_IO_Pos: %s\n", serv_info->slave_status.gtid_io_pos.to_string().c_str());
|
||||
dcb_printf(dcb, "Gtid slave IO position: %s\n",
|
||||
serv_info->slave_status.gtid_io_pos.to_string().c_str());
|
||||
}
|
||||
if (handle->multimaster)
|
||||
{
|
||||
dcb_printf(dcb, "Master group: %d\n", serv_info->group);
|
||||
dcb_printf(dcb, "Master group: %d\n", serv_info->group);
|
||||
}
|
||||
|
||||
dcb_printf(dcb, "\n");
|
||||
@ -1243,7 +1286,11 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
|
||||
{
|
||||
json_object_set_new(rval, "script", json_string(handle->script));
|
||||
}
|
||||
|
||||
if (handle->n_excluded > 0)
|
||||
{
|
||||
string list = monitored_servers_to_string(handle->excluded_servers, handle->n_excluded);
|
||||
json_object_set_new(rval, CN_NO_PROMOTE_SERVERS, json_string(list.c_str()));
|
||||
}
|
||||
if (mon->monitored_servers)
|
||||
{
|
||||
json_t* arr = json_array();
|
||||
@ -1267,11 +1314,12 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
|
||||
json_string(serv_info->slave_status.master_log_file.c_str()));
|
||||
json_object_set_new(srv, "master_binlog_position",
|
||||
json_integer(serv_info->slave_status.read_master_log_pos));
|
||||
if (serv_info->slave_status.gtid_io_pos.server_id != SERVER_ID_UNKNOWN)
|
||||
{
|
||||
json_object_set_new(srv, "gtid_io_pos",
|
||||
json_object_set_new(srv, "gtid_current_pos",
|
||||
json_string(serv_info->gtid_current_pos.to_string().c_str()));
|
||||
json_object_set_new(srv, "gtid_binlog_pos",
|
||||
json_string(serv_info->gtid_binlog_pos.to_string().c_str()));
|
||||
json_object_set_new(srv, "gtid_io_pos",
|
||||
json_string(serv_info->slave_status.gtid_io_pos.to_string().c_str()));
|
||||
}
|
||||
if (handle->multimaster)
|
||||
{
|
||||
json_object_set_new(srv, "master_group", json_integer(serv_info->group));
|
||||
@ -1707,6 +1755,11 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITORED_SERVER *database)
|
||||
}
|
||||
/* Query a few settings. */
|
||||
read_server_variables(database, serv_info);
|
||||
/* If gtid domain exists and server is 10.0, update gtid:s */
|
||||
if (handle->master_gtid_domain >= 0 && serv_info->version == MYSQL_SERVER_VERSION_100)
|
||||
{
|
||||
update_gtids(handle, database, serv_info);
|
||||
}
|
||||
/* Check for MariaDB 10.x.x and get status for multi-master replication */
|
||||
if (serv_info->version == MYSQL_SERVER_VERSION_100 || serv_info->version == MYSQL_SERVER_VERSION_55)
|
||||
{
|
||||
@ -2216,13 +2269,49 @@ monitorMain(void *arg)
|
||||
|
||||
if (handle->master != NULL && SERVER_IS_MASTER(handle->master->server))
|
||||
{
|
||||
int64_t domain = get_server_info(handle, handle->master)->gtid_domain_id;
|
||||
MySqlServerInfo* master_info = get_server_info(handle, handle->master);
|
||||
// Update cluster gtid domain
|
||||
int64_t domain = master_info->gtid_domain_id;
|
||||
if (handle->master_gtid_domain >= 0 && domain != handle->master_gtid_domain)
|
||||
{
|
||||
MXS_INFO("gtid_domain_id of master has changed: %" PRId64 " -> %" PRId64 ".",
|
||||
MXS_NOTICE("Gtid domain id of master has changed: %" PRId64 " -> %" PRId64 ".",
|
||||
handle->master_gtid_domain, domain);
|
||||
}
|
||||
handle->master_gtid_domain = domain;
|
||||
|
||||
// Update cluster external master
|
||||
if (SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(handle->master->server))
|
||||
{
|
||||
if (master_info->slave_status.master_host != handle->external_master_host ||
|
||||
master_info->slave_status.master_port != handle->external_master_port)
|
||||
{
|
||||
const char* new_ext_host = master_info->slave_status.master_host.c_str();
|
||||
const int new_ext_port = master_info->slave_status.master_port;
|
||||
if (handle->external_master_port == PORT_UNKNOWN)
|
||||
{
|
||||
MXS_NOTICE("Cluster master server is replicating from an external master: %s:%d",
|
||||
new_ext_host, new_ext_port);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("The external master of the cluster has changed: %s:%d -> %s:%d.",
|
||||
handle->external_master_host, handle->external_master_port,
|
||||
new_ext_host, new_ext_port);
|
||||
}
|
||||
snprintf(handle->external_master_host, sizeof(handle->external_master_host),
|
||||
"%s", new_ext_host);
|
||||
handle->external_master_port = new_ext_port;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (handle->external_master_port != PORT_UNKNOWN)
|
||||
{
|
||||
MXS_NOTICE("Cluster lost the external master.");
|
||||
}
|
||||
handle->external_master_host[0] = '\0';
|
||||
handle->external_master_port = PORT_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
ptr = mon->monitored_servers;
|
||||
@ -3359,17 +3448,14 @@ static bool check_replication_settings(const MXS_MONITORED_SERVER* server, MySql
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the given slave is a valid promotion candidate. Update the server info structs of all slaves.
|
||||
* Also populate the output vector with other slave servers.
|
||||
* Check that the given slave is a valid promotion candidate.
|
||||
*
|
||||
* @param mon Cluster monitor
|
||||
* @param preferred Preferred new master
|
||||
* @param slaves_out Output array for other slaves. These should be redirected to the new master. Can be NULL.
|
||||
* @param err_out Json object for error printing. Can be NULL.
|
||||
* @return True, if given slave is a valid promotion candidate.
|
||||
*/
|
||||
bool switchover_check_preferred_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* preferred,
|
||||
ServerVector* slaves_out, json_t** err_out)
|
||||
bool switchover_check_preferred_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* preferred, json_t** err_out)
|
||||
{
|
||||
ss_dassert(preferred);
|
||||
bool rval = true;
|
||||
@ -3380,20 +3466,6 @@ bool switchover_check_preferred_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER*
|
||||
preferred->server->unique_name);
|
||||
rval = false;
|
||||
}
|
||||
for (MXS_MONITORED_SERVER *slave = mon->monitor->monitored_servers; slave; slave = slave->next)
|
||||
{
|
||||
if (slave != preferred)
|
||||
{
|
||||
// The update_slave_info()-call is not strictly necessary here, but it should be ran to keep this
|
||||
// function analogous with failover_select_new_master(). The later functions can then assume that
|
||||
// slave server info is up to date.
|
||||
MySqlServerInfo* slave_info = update_slave_info(mon, slave);
|
||||
if (slave_info && slaves_out)
|
||||
{
|
||||
slaves_out->push_back(slave);
|
||||
}
|
||||
}
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
@ -3459,7 +3531,8 @@ MXS_MONITORED_SERVER* select_new_master(MYSQL_MONITOR* mon, ServerVector* slaves
|
||||
// If a server cannot be connected to, it won't be considered for promotion or redirected.
|
||||
// Do not worry about the exclusion list yet, querying the excluded servers is ok.
|
||||
MySqlServerInfo* cand_info = update_slave_info(mon, cand);
|
||||
if (cand_info)
|
||||
// If master is replicating from external master, it is updated but not added to array.
|
||||
if (cand_info && cand != mon->master)
|
||||
{
|
||||
slaves_out->push_back(cand);
|
||||
// Check that server is not in the exclusion list while still being a valid choice.
|
||||
@ -3587,6 +3660,25 @@ bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_maste
|
||||
return rval;
|
||||
}
|
||||
|
||||
bool start_external_replication(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||
{
|
||||
bool rval = false;
|
||||
string change_cmd = generate_change_master_cmd(mon, mon->external_master_host, mon->external_master_port);
|
||||
if (mxs_mysql_query(new_master->con, change_cmd.c_str()) == 0 &&
|
||||
mxs_mysql_query(new_master->con, "START SLAVE;") == 0)
|
||||
{
|
||||
MXS_NOTICE("New master starting replication from external master %s:%d.",
|
||||
mon->external_master_host, mon->external_master_port);
|
||||
rval = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Could not start replication from external master: '%s'.",
|
||||
mysql_error(new_master->con));
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares a server for the replication master role.
|
||||
*
|
||||
@ -3595,7 +3687,7 @@ bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_maste
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return True if successful
|
||||
*/
|
||||
bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||
bool promote_new_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||
{
|
||||
bool success = false;
|
||||
MXS_NOTICE("Promoting server '%s' to master.", new_master->server->unique_name);
|
||||
@ -3612,19 +3704,35 @@ bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!success)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Promotion failed: '%s'. Query: '%s'.",
|
||||
mysql_error(new_master->con), query);
|
||||
}
|
||||
// If the previous master was a slave to an external master, start the equivalent slave connection on
|
||||
// the new master. Success of replication is not checked.
|
||||
else if (mon->external_master_port != PORT_UNKNOWN &&
|
||||
!start_external_replication(mon, new_master, err_out))
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
string generate_change_master_cmd(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master)
|
||||
/**
|
||||
* Generate a CHANGE MASTER TO-query.
|
||||
*
|
||||
* @param mon Cluster monitor, needed for username & password
|
||||
* @param master_host Master hostname/address
|
||||
* @param master_port Master port
|
||||
* @return Generated query
|
||||
*/
|
||||
string generate_change_master_cmd(MYSQL_MONITOR* mon, const string& master_host, int master_port)
|
||||
{
|
||||
std::stringstream change_cmd;
|
||||
change_cmd << "CHANGE MASTER TO MASTER_HOST = '" << new_master->server->name << "', ";
|
||||
change_cmd << "MASTER_PORT = " << new_master->server->port << ", ";
|
||||
change_cmd << "CHANGE MASTER TO MASTER_HOST = '" << master_host << "', ";
|
||||
change_cmd << "MASTER_PORT = " << master_port << ", ";
|
||||
change_cmd << "MASTER_USE_GTID = current_pos, ";
|
||||
change_cmd << "MASTER_USER = '" << mon->replication_user << "', ";
|
||||
const char MASTER_PW[] = "MASTER_PASSWORD = '";
|
||||
@ -3678,7 +3786,8 @@ int redirect_slaves(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master, const
|
||||
ServerVector* redirected_slaves = NULL)
|
||||
{
|
||||
MXS_NOTICE("Redirecting slaves to new master.");
|
||||
std::string change_cmd = generate_change_master_cmd(mon, new_master);
|
||||
std::string change_cmd = generate_change_master_cmd(mon,
|
||||
new_master->server->name, new_master->server->port);
|
||||
int successes = 0;
|
||||
for (ServerVector::const_iterator iter = slaves.begin(); iter != slaves.end(); iter++)
|
||||
{
|
||||
@ -3763,7 +3872,7 @@ static bool do_failover(MYSQL_MONITOR* mon, json_t** err_out)
|
||||
seconds_remaining -= seconds_step2;
|
||||
|
||||
// Step 3: Stop and reset slave, set read-only to 0.
|
||||
if (promote_new_master(new_master, err_out))
|
||||
if (promote_new_master(mon, new_master, err_out))
|
||||
{
|
||||
// Step 4: Redirect slaves.
|
||||
ServerVector redirected_slaves;
|
||||
@ -3776,8 +3885,20 @@ static bool do_failover(MYSQL_MONITOR* mon, json_t** err_out)
|
||||
|
||||
// Step 5: Finally, add an event to the new master to advance gtid and wait for the slaves
|
||||
// to receive it. seconds_remaining can be 0 or less at this point. Even in such a case
|
||||
// wait_cluster_stabilization() may succeed if replication is fast enough.
|
||||
if (wait_cluster_stabilization(mon, new_master, redirected_slaves, seconds_remaining))
|
||||
// wait_cluster_stabilization() may succeed if replication is fast enough. If using external
|
||||
// replication, skip this step. Come up with an alternative later.
|
||||
if (mon->external_master_port != PORT_UNKNOWN)
|
||||
{
|
||||
MXS_WARNING("Replicating from external master, skipping final check.");
|
||||
rval = true;
|
||||
}
|
||||
else if (redirected_slaves.empty())
|
||||
{
|
||||
// No slaves to check. Assume success.
|
||||
rval = true;
|
||||
MXS_DEBUG("Failover: no slaves to redirect, skipping stabilization check.");
|
||||
}
|
||||
else if (wait_cluster_stabilization(mon, new_master, redirected_slaves, seconds_remaining))
|
||||
{
|
||||
rval = true;
|
||||
time_t step5_time = time(NULL);
|
||||
@ -3910,47 +4031,79 @@ static bool switchover_demote_master(MYSQL_MONITOR* mon,
|
||||
json_t** err_out)
|
||||
{
|
||||
MXS_NOTICE("Demoting server '%s'.", current_master->server->unique_name);
|
||||
string error;
|
||||
bool success = false;
|
||||
const char* query = "SET GLOBAL read_only=1;";
|
||||
if (mxs_mysql_query(current_master->con, query) == 0)
|
||||
bool query_error = false;
|
||||
MYSQL* conn = current_master->con;
|
||||
const char* query = "";
|
||||
// The presence of an external master changes several things.
|
||||
const bool external_master = SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(current_master->server);
|
||||
|
||||
if (external_master)
|
||||
{
|
||||
query = "FLUSH TABLES;";
|
||||
if (mxs_mysql_query(current_master->con, query) == 0)
|
||||
// First need to stop slave. read_only is probably on already, although not certain.
|
||||
query = "STOP SLAVE;";
|
||||
query_error = (mxs_mysql_query(conn, query) != 0);
|
||||
if (!query_error)
|
||||
{
|
||||
query = "FLUSH LOGS;";
|
||||
if (mxs_mysql_query(current_master->con, query) == 0)
|
||||
{
|
||||
query = "";
|
||||
if (update_gtids(mon, current_master, info))
|
||||
{
|
||||
success = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!success)
|
||||
{
|
||||
// Somehow, a step after "SET read_only" failed. Try to set read_only back to 0. It may not
|
||||
// work since the connection is likely broken.
|
||||
error = mysql_error(current_master->con);
|
||||
mxs_mysql_query(current_master->con, "SET GLOBAL read_only=0;");
|
||||
query = "RESET SLAVE ALL;";
|
||||
query_error = (mxs_mysql_query(conn, query) != 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
string error_desc;
|
||||
if (!query_error)
|
||||
{
|
||||
error = mysql_error(current_master->con);
|
||||
query = "SET GLOBAL read_only=1;";
|
||||
query_error = (mxs_mysql_query(conn, query) != 0);
|
||||
if (!query_error)
|
||||
{
|
||||
// If have external master, no writes are allowed so skip this step. It's not essential, just
|
||||
// adds one to gtid.
|
||||
if (!external_master)
|
||||
{
|
||||
query = "FLUSH TABLES;";
|
||||
query_error = (mxs_mysql_query(conn, query) != 0);
|
||||
}
|
||||
|
||||
if (!query_error)
|
||||
{
|
||||
query = "FLUSH LOGS;";
|
||||
query_error = (mxs_mysql_query(conn, query) != 0);
|
||||
if (!query_error)
|
||||
{
|
||||
query = "";
|
||||
if (update_gtids(mon, current_master, info))
|
||||
{
|
||||
success = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!success)
|
||||
{
|
||||
// Somehow, a step after "SET read_only" failed. Try to set read_only back to 0. It may not
|
||||
// work since the connection is likely broken.
|
||||
error_desc = mysql_error(conn);
|
||||
mxs_mysql_query(conn, "SET GLOBAL read_only=0;");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (query_error)
|
||||
{
|
||||
error_desc = mysql_error(conn);
|
||||
}
|
||||
|
||||
if (!success)
|
||||
{
|
||||
if (error.empty())
|
||||
if (error_desc.empty())
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Demotion failed due to an error in updating gtid:s.");
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Demotion failed due to a query error: '%s'. Query: '%s'.",
|
||||
error.c_str(), query);
|
||||
error_desc.c_str(), query);
|
||||
}
|
||||
}
|
||||
return success;
|
||||
@ -4075,16 +4228,15 @@ static bool switchover_wait_slaves_catchup(const ServerVector& slaves, const Gti
|
||||
* @return True if commands were accepted. This does not guarantee that replication proceeds
|
||||
* successfully.
|
||||
*/
|
||||
static bool switchover_start_slave(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* old_master,
|
||||
MXS_MONITORED_SERVER* new_master)
|
||||
static bool switchover_start_slave(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* old_master, SERVER* new_master)
|
||||
{
|
||||
bool rval = false;
|
||||
std::string change_cmd = generate_change_master_cmd(mon, new_master);
|
||||
std::string change_cmd = generate_change_master_cmd(mon, new_master->name, new_master->port);
|
||||
if (mxs_mysql_query(old_master->con, change_cmd.c_str()) == 0 &&
|
||||
mxs_mysql_query(old_master->con, "START SLAVE;") == 0)
|
||||
{
|
||||
MXS_NOTICE("Old master '%s' starting replication from '%s'.",
|
||||
old_master->server->unique_name, new_master->server->unique_name);
|
||||
old_master->server->unique_name, new_master->unique_name);
|
||||
rval = true;
|
||||
}
|
||||
else
|
||||
@ -4245,9 +4397,26 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
||||
ServerVector redirectable_slaves;
|
||||
if (new_master)
|
||||
{
|
||||
if (switchover_check_preferred_master(mon, new_master, &redirectable_slaves, err_out))
|
||||
if (switchover_check_preferred_master(mon, new_master, err_out))
|
||||
{
|
||||
promotion_target = new_master;
|
||||
/* User-given candidate is good. Update info on all slave servers.
|
||||
* The update_slave_info()-call is not strictly necessary here, but it should be ran to keep this
|
||||
* path analogous with failover_select_new_master(). The later functions can then assume that
|
||||
* slave server info is up to date.
|
||||
*/
|
||||
for (MXS_MONITORED_SERVER* slave = mon->monitor->monitored_servers; slave; slave = slave->next)
|
||||
{
|
||||
if (slave != promotion_target)
|
||||
{
|
||||
MySqlServerInfo* slave_info = update_slave_info(mon, slave);
|
||||
// If master is replicating from external master, it is updated but not added to array.
|
||||
if (slave_info && slave != current_master)
|
||||
{
|
||||
redirectable_slaves.push_back(slave);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -4262,7 +4431,7 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
||||
bool rval = false;
|
||||
MySqlServerInfo* curr_master_info = get_server_info(mon, demotion_target);
|
||||
|
||||
// Step 2: Set read-only to on, flush logs.
|
||||
// Step 2: Set read-only to on, flush logs, update master gtid:s
|
||||
if (switchover_demote_master(mon, demotion_target, curr_master_info, err_out))
|
||||
{
|
||||
bool catchup_and_promote_success = false;
|
||||
@ -4281,12 +4450,12 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
||||
seconds_remaining -= seconds_step3;
|
||||
|
||||
// Step 4: On new master STOP and RESET SLAVE, set read-only to off.
|
||||
if (promote_new_master(promotion_target, err_out))
|
||||
if (promote_new_master(mon, promotion_target, err_out))
|
||||
{
|
||||
catchup_and_promote_success = true;
|
||||
// Step 5: Redirect slaves and start replication on old master.
|
||||
ServerVector redirected_slaves;
|
||||
bool start_ok = switchover_start_slave(mon, demotion_target, promotion_target);
|
||||
bool start_ok = switchover_start_slave(mon, demotion_target, promotion_target->server);
|
||||
if (start_ok)
|
||||
{
|
||||
redirected_slaves.push_back(demotion_target);
|
||||
@ -4301,9 +4470,15 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
||||
seconds_remaining -= difftime(step5_time, step3_time);
|
||||
|
||||
// Step 6: Finally, add an event to the new master to advance gtid and wait for the slaves
|
||||
// to receive it.
|
||||
if (wait_cluster_stabilization(mon, promotion_target, redirected_slaves,
|
||||
seconds_remaining))
|
||||
// to receive it. If using external replication, skip this step. Come up with an
|
||||
// alternative later.
|
||||
if (mon->external_master_port != PORT_UNKNOWN)
|
||||
{
|
||||
MXS_WARNING("Replicating from external master, skipping final check.");
|
||||
rval = true;
|
||||
}
|
||||
else if (wait_cluster_stabilization(mon, promotion_target, redirected_slaves,
|
||||
seconds_remaining))
|
||||
{
|
||||
rval = true;
|
||||
time_t step6_time = time(NULL);
|
||||
@ -4334,6 +4509,12 @@ static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_mast
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Could not disable read_only on server %s: '%s'.",
|
||||
demotion_target->server->unique_name, mysql_error(demotion_target->con));
|
||||
}
|
||||
|
||||
// Try to reactivate external replication if any.
|
||||
if (mon->external_master_port != PORT_UNKNOWN)
|
||||
{
|
||||
start_external_replication(mon, new_master, err_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
return rval;
|
||||
@ -4517,18 +4698,18 @@ static bool get_joinable_servers(MYSQL_MONITOR* mon, ServerVector* output)
|
||||
*/
|
||||
static uint32_t do_rejoin(MYSQL_MONITOR* mon, const ServerVector& joinable_servers)
|
||||
{
|
||||
MXS_MONITORED_SERVER* master = mon->master;
|
||||
SERVER* master = mon->master->server;
|
||||
uint32_t servers_joined = 0;
|
||||
if (!joinable_servers.empty())
|
||||
{
|
||||
string change_cmd = generate_change_master_cmd(mon, master);
|
||||
string change_cmd = generate_change_master_cmd(mon, master->name, master->port);
|
||||
for (ServerVector::const_iterator iter = joinable_servers.begin();
|
||||
iter != joinable_servers.end();
|
||||
iter++)
|
||||
{
|
||||
MXS_MONITORED_SERVER* joinable = *iter;
|
||||
const char* name = joinable->server->unique_name;
|
||||
const char* master_name = master->server->unique_name;
|
||||
const char* master_name = master->unique_name;
|
||||
MySqlServerInfo* redir_info = get_server_info(mon, joinable);
|
||||
|
||||
bool op_success;
|
||||
|
||||
@ -71,6 +71,8 @@ typedef struct
|
||||
bool verify_master_failure; /**< Whether master failure is verified via slaves */
|
||||
int master_failure_timeout; /**< Time in seconds to wait before doing failover */
|
||||
int64_t master_gtid_domain; /**< Gtid domain currently used by the master */
|
||||
char external_master_host[MAX_SERVER_ADDRESS_LEN]; /**< External master host, for fail/switchover */
|
||||
int external_master_port; /**< External master port */
|
||||
bool auto_rejoin; /**< Attempt to start slave replication on standalone servers or servers
|
||||
replicating from the wrong master. */
|
||||
int n_excluded; /**< Number of excluded servers */
|
||||
|
||||
Reference in New Issue
Block a user