Files
MaxScale/server/modules/monitor/auroramon/auroramon.c
ekorh475 259e944b3d Server status changes now happen under a lock
MXS-873 To prevent monitors and MaxAdmin from interfering with each other,
changes to the server status flags now happen under a lock. To avoid
interfering with monitor logic, the monitors now acquire locks to all
of their servers at the start of the monitor loop and release them
before sleeping.
2016-12-12 15:04:05 +02:00

386 lines
10 KiB
C

/*
* Copyright (c) 2016 MariaDB Corporation Ab
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file and at www.mariadb.com/bsl.
*
* Change Date: 2019-07-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2 or later of the General
* Public License.
*/
/**
* @file auroramon.c - Amazon RDS Aurora monitor
*/
#include <maxscale/modinfo.h>
#include <maxscale/thread.h>
#include <maxscale/monitor.h>
#include <mysqld_error.h>
#include <maxscale/alloc.h>
#include <maxscale/debug.h>
static char *version_str = (char*)"V1.0.0";
/*lint -e14 */
MODULE_INFO info =
{
MODULE_API_MONITOR,
MODULE_BETA_RELEASE,
MONITOR_VERSION,
"Aurora monitor"
};
/*lint +e14 */
typedef struct aurora_monitor
{
bool shutdown; /**< True if the monitor is stopped */
THREAD thread; /**< Monitor thread */
char* script; /**< Launchable script */
bool events[MAX_MONITOR_EVENT]; /**< Enabled monitor events */
} AURORA_MONITOR;
/**
* Implementation of the mandatory version entry point
*
* @return version string of the module
*/
/*lint -e14 */
char *
version()
{
return version_str;
}
/**
* The module initialisation routine, called when the module is first loaded.
*/
/*lint -e14 */
void
ModuleInit()
{
}
/*lint +e14 */
/**
* @brief Update the status of a server
*
* This function connects to the database and queries it for its status. The
* status of the server is adjusted accordingly based on the results of the
* query.
*
* @param monitor Monitor object
* @param database Server whose status should be updated
*/
void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database)
{
if (!SERVER_IN_MAINT(database->server))
{
SERVER temp_server = {.status = database->server->status};
server_clear_status_nolock(&temp_server, SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE | SERVER_AUTH_ERROR);
database->mon_prev_status = database->server->status;
/** Try to connect to or ping the database */
connect_result_t rval = mon_connect_to_db(monitor, database);
if (rval == MONITOR_CONN_OK)
{
server_set_status_nolock(&temp_server, SERVER_RUNNING);
MYSQL_RES *result;
/** Connection is OK, query for replica status */
if (mysql_query(database->con, "SELECT @@aurora_server_id, server_id FROM "
"information_schema.replica_host_status "
"WHERE session_id = 'MASTER_SESSION_ID'") == 0 &&
(result = mysql_store_result(database->con)))
{
ss_dassert(mysql_field_count(database->con) == 2);
MYSQL_ROW row = mysql_fetch_row(result);
int status = SERVER_SLAVE;
/** The master will return a row with two identical non-NULL fields */
if (row[0] && row[1] && strcmp(row[0], row[1]) == 0)
{
status = SERVER_MASTER;
}
server_set_status_nolock(&temp_server, status);
mysql_free_result(result);
}
else
{
MXS_ERROR("Failed to query server %s (%s:%d): %d, %s",
database->server->unique_name, database->server->name,
database->server->port, mysql_errno(database->con),
mysql_error(database->con));
}
}
else
{
/** Failed to connect to the database */
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status_nolock(&temp_server, SERVER_AUTH_ERROR);
}
if (mon_status_changed(database) && mon_print_fail_status(database))
{
mon_log_connect_error(database, rval);
}
}
server_transfer_status(database->server, &temp_server);
}
}
/**
* @brief Check if this is an event that the Aurora monitor handles
* @param event Event to check
* @return True if the event is monitored, false if it is not
* */
bool is_aurora_event(monitor_event_t event)
{
static monitor_event_t aurora_events[] =
{
MASTER_DOWN_EVENT,
MASTER_UP_EVENT,
SLAVE_DOWN_EVENT,
SLAVE_UP_EVENT,
SERVER_DOWN_EVENT,
SERVER_UP_EVENT,
LOST_MASTER_EVENT,
LOST_SLAVE_EVENT,
NEW_MASTER_EVENT,
NEW_SLAVE_EVENT,
MAX_MONITOR_EVENT
};
for (int i = 0; aurora_events[i] != MAX_MONITOR_EVENT; i++)
{
if (event == aurora_events[i])
{
return true;
}
}
return false;
}
/**
* @brief Main monitoring loop
*
* @param arg The MONITOR object for this monitor
*/
static void
monitorMain(void *arg)
{
MONITOR *monitor = (MONITOR*)arg;
AURORA_MONITOR *handle = monitor->handle;
if (mysql_thread_init())
{
MXS_ERROR("mysql_thread_init failed in Aurora monitor. Exiting.");
return;
}
while (!handle->shutdown)
{
lock_monitor_servers(monitor);
for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next)
{
update_server_status(monitor, ptr);
if (SERVER_IS_DOWN(ptr->server))
{
/** Hang up all DCBs connected to the failed server */
dcb_hangup_foreach(ptr->server);
}
}
/**
* After updating the status of all servers, check if monitor events
* need to be launched.
*
* TODO: Move this functionality into monitor.c, it is duplicated in
* every monitor.
*/
for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next)
{
if (mon_status_changed(ptr))
{
monitor_event_t evtype = mon_get_event_type(ptr);
if (is_aurora_event(evtype))
{
mon_log_state_change(ptr);
if (handle->script && handle->events[evtype])
{
monitor_launch_script(monitor, ptr, handle->script);
}
}
}
}
release_monitor_servers(monitor);
/** Sleep until the next monitoring interval */
int ms = 0;
while (ms < monitor->interval && !handle->shutdown)
{
thread_millisleep(MON_BASE_INTERVAL_MS);
ms += MON_BASE_INTERVAL_MS;
}
}
mysql_thread_end();
}
/**
* Helper function to free the monitor handle
*/
static void auroramon_free(AURORA_MONITOR *handle)
{
if (handle)
{
MXS_FREE(handle->script);
MXS_FREE(handle);
}
}
/**
* @brief Start the monitor
*
* This function initializes the monitor and starts the monitoring thread.
*
* @param arg The MONITOR structure for this monitor
* @param opt The configuration parameters for this monitor
* @return Monitor handle
*/
static void *
startMonitor(MONITOR *mon, const CONFIG_PARAMETER *params)
{
bool have_events = false, script_error = false;
AURORA_MONITOR *handle = mon->handle;
if (handle)
{
handle->shutdown = false;
}
else
{
if ((handle = (AURORA_MONITOR *) MXS_MALLOC(sizeof(AURORA_MONITOR))) == NULL)
{
return NULL;
}
handle->shutdown = false;
handle->script = NULL;
memset(handle->events, false, sizeof(handle->events));
while (params)
{
if (strcmp(params->name, "script") == 0)
{
if (externcmd_can_execute(params->value))
{
handle->script = MXS_STRDUP_A(params->value);
}
else
{
script_error = true;
}
}
else if (strcmp(params->name, "events") == 0)
{
if (mon_parse_event_string(handle->events, sizeof(handle->events), params->value) != 0)
{
script_error = true;
}
else
{
have_events = true;
}
}
params = params->next;
}
if (!check_monitor_permissions(mon, "SELECT @@aurora_server_id, server_id FROM "
"information_schema.replica_host_status "
"WHERE session_id = 'MASTER_SESSION_ID'"))
{
MXS_ERROR("Failed to start monitor. See earlier errors for more information.");
auroramon_free(handle);
return NULL;
}
if (script_error)
{
MXS_ERROR("Errors were found in the script configuration parameters "
"for the monitor '%s'.", mon->name);
auroramon_free(handle);
return NULL;
}
/** If no specific events are given, enable them all */
if (!have_events)
{
memset(handle->events, true, sizeof(handle->events));
}
}
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
{
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
auroramon_free(handle);
return NULL;
}
return handle;
}
/**
* Stop a running monitor
*
* @param arg Handle on thr running monior
*/
static void
stopMonitor(MONITOR *mon)
{
AURORA_MONITOR *handle = (AURORA_MONITOR *) mon->handle;
handle->shutdown = true;
thread_wait(handle->thread);
}
/**
* Diagnostic interface
*
* @param dcb DCB to send output
* @param mon The monitor
*/
static void
diagnostics(DCB *dcb, const MONITOR *mon)
{
}
static MONITOR_OBJECT MyObject =
{
startMonitor,
stopMonitor,
diagnostics
};
/**
* The module entry point routine. It is this routine that must populate the
* structure that is referred to as the "module object", this is a structure
* with the set of external entry points for this module.
*
* @return The module object
*/
MONITOR_OBJECT *
GetModuleObject()
{
return &MyObject;
}
/*lint +e14 */