Files
MaxScale/server/modules/routing/readwritesplit/rwsplit_select_backends.c
2017-04-05 11:35:13 +03:00

558 lines
17 KiB
C

/*
* Copyright (c) 2016 MariaDB Corporation Ab
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
*
* Change Date: 2019-07-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2 or later of the General
* Public License.
*/
#include "readwritesplit.h"
#include <stdio.h>
#include <strings.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <maxscale/router.h>
#include "rwsplit_internal.h"
/**
* @file rwsplit_select_backends.c The functions that implement back end
* selection for the read write split router. All of these functions are
* internal to that router and not intended to be called from elsewhere.
*
* @verbatim
* Revision History
*
* Date Who Description
* 08/08/2016 Martin Brampton Initial implementation
*
* @endverbatim
*/
static bool connect_server(backend_ref_t *bref, MXS_SESSION *session, bool execute_history);
static void log_server_connections(select_criteria_t select_criteria,
backend_ref_t *backend_ref, int router_nservers);
static SERVER_REF *get_root_master(backend_ref_t *servers, int router_nservers);
static int bref_cmp_global_conn(const void *bref1, const void *bref2);
static int bref_cmp_router_conn(const void *bref1, const void *bref2);
static int bref_cmp_behind_master(const void *bref1, const void *bref2);
static int bref_cmp_current_load(const void *bref1, const void *bref2);
/**
* The order of functions _must_ match with the order the select criteria are
* listed in select_criteria_t definition in readwritesplit.h
*/
int (*criteria_cmpfun[LAST_CRITERIA])(const void *, const void *) =
{
NULL,
bref_cmp_global_conn,
bref_cmp_router_conn,
bref_cmp_behind_master,
bref_cmp_current_load
};
/**
* @brief Check whether it's possible to connect to this server
*
* @param bref Backend reference
* @return True if a connection to this server can be attempted
*/
static bool bref_valid_for_connect(const backend_ref_t *bref)
{
return !BREF_HAS_FAILED(bref) && SERVER_IS_RUNNING(bref->ref->server);
}
/**
* Check whether it's possible to use this server as a slave
*
* @param bref Backend reference
* @param master_host The master server
* @return True if this server is a valid slave candidate
*/
static bool bref_valid_for_slave(const backend_ref_t *bref, const SERVER *master_host)
{
SERVER *server = bref->ref->server;
return (SERVER_IS_SLAVE(server) || SERVER_IS_RELAY_SERVER(server)) &&
(master_host == NULL || (server != master_host));
}
/**
* @brief Find the best slave candidate
*
* This function iterates through @c bref and tries to find the best backend
* reference that is not in use. @c cmpfun will be called to compare the backends.
*
* @param bref Backend reference
* @param n Size of @c bref
* @param master The master server
* @param cmpfun qsort() compatible comparison function
* @return The best slave backend reference or NULL if no candidates could be found
*/
backend_ref_t* get_slave_candidate(backend_ref_t *bref, int n, const SERVER *master,
int (*cmpfun)(const void *, const void *))
{
backend_ref_t *candidate = NULL;
for (int i = 0; i < n; i++)
{
if (!BREF_IS_IN_USE(&bref[i]) &&
bref_valid_for_connect(&bref[i]) &&
bref_valid_for_slave(&bref[i], master))
{
if (candidate)
{
if (cmpfun(candidate, &bref[i]) > 0)
{
candidate = &bref[i];
}
}
else
{
candidate = &bref[i];
}
}
}
return candidate;
}
/**
* @brief Search suitable backend servers from those of router instance
*
* It is assumed that there is only one master among servers of a router instance.
* As a result, the first master found is chosen. There will possibly be more
* backend references than connected backends because only those in correct state
* are connected to.
*
* @param p_master_ref Pointer to location where master's backend reference is to be stored
* @param backend_ref Pointer to backend server reference object array
* @param router_nservers Number of backend server pointers pointed to by @p backend_ref
* @param max_nslaves Upper limit for the number of slaves
* @param max_slave_rlag Maximum allowed replication lag for any slave
* @param select_criteria Slave selection criteria
* @param session Client session
* @param router Router instance
* @return true, if at least one master and one slave was found.
*/
bool select_connect_backend_servers(backend_ref_t **p_master_ref,
backend_ref_t *backend_ref,
int router_nservers, int max_nslaves,
int max_slave_rlag,
select_criteria_t select_criteria,
MXS_SESSION *session,
ROUTER_INSTANCE *router,
bool active_session)
{
if (p_master_ref == NULL || backend_ref == NULL)
{
MXS_ERROR("Master reference (%p) or backend reference (%p) is NULL.",
p_master_ref, backend_ref);
ss_dassert(false);
return false;
}
/* get the root Master */
SERVER_REF *master_backend = get_root_master(backend_ref, router_nservers);
SERVER *master_host = master_backend ? master_backend->server : NULL;
if (router->rwsplit_config.master_failure_mode == RW_FAIL_INSTANTLY &&
(master_host == NULL || SERVER_IS_DOWN(master_host)))
{
MXS_ERROR("Couldn't find suitable Master from %d candidates.", router_nservers);
return false;
}
/**
* New session:
*
* Connect to both master and slaves
*
* Existing session:
*
* Master is already connected or we don't have a master. The function was
* called because new slaves must be selected to replace failed ones.
*/
bool master_connected = active_session || *p_master_ref != NULL;
/** Check slave selection criteria and set compare function */
int (*p)(const void *, const void *) = criteria_cmpfun[select_criteria];
ss_dassert(p);
SERVER *old_master = *p_master_ref ? (*p_master_ref)->ref->server : NULL;
if (MXS_LOG_PRIORITY_IS_ENABLED(LOG_INFO))
{
log_server_connections(select_criteria, backend_ref, router_nservers);
}
int slaves_found = 0;
int slaves_connected = 0;
const int min_nslaves = 0; /*< not configurable at the time */
bool succp = false;
if (!master_connected)
{
/** Find a master server */
for (int i = 0; i < router_nservers; i++)
{
SERVER *serv = backend_ref[i].ref->server;
if (bref_valid_for_connect(&backend_ref[i]) &&
master_host && serv == master_host)
{
if (connect_server(&backend_ref[i], session, false))
{
*p_master_ref = &backend_ref[i];
break;
}
}
}
}
/** Calculate how many connections we already have */
for (int i = 0; i < router_nservers; i++)
{
if (bref_valid_for_connect(&backend_ref[i]) &&
bref_valid_for_slave(&backend_ref[i], master_host))
{
slaves_found += 1;
if (BREF_IS_IN_USE(&backend_ref[i]))
{
slaves_connected += 1;
}
}
}
ss_dassert(slaves_connected < max_nslaves || max_nslaves == 0);
backend_ref_t *bref = get_slave_candidate(backend_ref, router_nservers, master_host, p);
/** Connect to all possible slaves */
while (bref && slaves_connected < max_nslaves)
{
if (connect_server(bref, session, true))
{
slaves_connected += 1;
}
else
{
/** Failed to connect, mark server as failed */
bref_set_state(bref, BREF_FATAL_FAILURE);
}
bref = get_slave_candidate(backend_ref, router_nservers, master_host, p);
}
/**
* Successful cases
*/
if (slaves_connected >= min_nslaves && slaves_connected <= max_nslaves)
{
succp = true;
if (MXS_LOG_PRIORITY_IS_ENABLED(LOG_INFO))
{
if (slaves_connected < max_nslaves)
{
MXS_INFO("Couldn't connect to maximum number of "
"slaves. Connected successfully to %d slaves "
"of %d of them.", slaves_connected, slaves_found);
}
for (int i = 0; i < router_nservers; i++)
{
if (BREF_IS_IN_USE((&backend_ref[i])))
{
MXS_INFO("Selected %s in \t[%s]:%d",
STRSRVSTATUS(backend_ref[i].ref->server),
backend_ref[i].ref->server->name,
backend_ref[i].ref->server->port);
}
} /* for */
}
}
/** Failure cases */
else
{
MXS_ERROR("Couldn't establish required amount of slave connections for "
"router session. Would need between %d and %d slaves but only have %d.",
min_nslaves, max_nslaves, slaves_connected);
/** Clean up connections */
for (int i = 0; i < router_nservers; i++)
{
if (BREF_IS_IN_USE((&backend_ref[i])))
{
ss_dassert(backend_ref[i].ref->connections > 0);
close_failed_bref(&backend_ref[i], true);
/** Decrease backend's connection counter. */
atomic_add(&backend_ref[i].ref->connections, -1);
RW_CHK_DCB(&backend_ref[i], backend_ref[i].bref_dcb);
dcb_close(backend_ref[i].bref_dcb);
RW_CLOSE_BREF(&backend_ref[i]);
}
}
}
return succp;
}
/** Compare number of connections from this router in backend servers */
static int bref_cmp_router_conn(const void *bref1, const void *bref2)
{
SERVER_REF *b1 = ((backend_ref_t *)bref1)->ref;
SERVER_REF *b2 = ((backend_ref_t *)bref2)->ref;
if (b1->weight == 0 && b2->weight == 0)
{
return b1->connections - b2->connections;
}
else if (b1->weight == 0)
{
return 1;
}
else if (b2->weight == 0)
{
return -1;
}
return ((1000 + 1000 * b1->connections) / b1->weight) -
((1000 + 1000 * b2->connections) / b2->weight);
}
/** Compare number of global connections in backend servers */
static int bref_cmp_global_conn(const void *bref1, const void *bref2)
{
SERVER_REF *b1 = ((backend_ref_t *)bref1)->ref;
SERVER_REF *b2 = ((backend_ref_t *)bref2)->ref;
if (b1->weight == 0 && b2->weight == 0)
{
return b1->server->stats.n_current -
b2->server->stats.n_current;
}
else if (b1->weight == 0)
{
return 1;
}
else if (b2->weight == 0)
{
return -1;
}
return ((1000 + 1000 * b1->server->stats.n_current) / b1->weight) -
((1000 + 1000 * b2->server->stats.n_current) / b2->weight);
}
/** Compare replication lag between backend servers */
static int bref_cmp_behind_master(const void *bref1, const void *bref2)
{
SERVER_REF *b1 = ((backend_ref_t *)bref1)->ref;
SERVER_REF *b2 = ((backend_ref_t *)bref2)->ref;
if (b1->weight == 0 && b2->weight == 0)
{
return b1->server->rlag -
b2->server->rlag;
}
else if (b1->weight == 0)
{
return 1;
}
else if (b2->weight == 0)
{
return -1;
}
return ((1000 + 1000 * b1->server->rlag) / b1->weight) -
((1000 + 1000 * b2->server->rlag) / b2->weight);
}
/** Compare number of current operations in backend servers */
static int bref_cmp_current_load(const void *bref1, const void *bref2)
{
SERVER_REF *b1 = ((backend_ref_t *)bref1)->ref;
SERVER_REF *b2 = ((backend_ref_t *)bref2)->ref;
if (b1->weight == 0 && b2->weight == 0)
{
return b1->server->stats.n_current_ops - b2->server->stats.n_current_ops;
}
else if (b1->weight == 0)
{
return 1;
}
else if (b2->weight == 0)
{
return -1;
}
return ((1000 + 1000 * b1->server->stats.n_current_ops) / b1->weight) -
((1000 + 1000 * b2->server->stats.n_current_ops) / b2->weight);
}
/**
* @brief Connect a server
*
* Connects to a server, adds callbacks to the created DCB and updates
* router statistics. If @p execute_history is true, the session command
* history will be executed on this server.
*
* @param b Router's backend structure for the server
* @param session Client's session object
* @param execute_history Execute session command history
* @return True if successful, false if an error occurred
*/
static bool connect_server(backend_ref_t *bref, MXS_SESSION *session, bool execute_history)
{
SERVER *serv = bref->ref->server;
bool rval = false;
bref->bref_dcb = dcb_connect(serv, session, serv->protocol);
if (bref->bref_dcb != NULL)
{
bref_clear_state(bref, BREF_CLOSED);
bref->closed_at = 0;
if (!execute_history || execute_sescmd_history(bref))
{
bref->bref_state = 0;
bref_set_state(bref, BREF_IN_USE);
atomic_add(&bref->ref->connections, 1);
rval = true;
}
else
{
MXS_ERROR("Failed to execute session command in %s ([%s]:%d). See earlier "
"errors for more details.",
bref->ref->server->unique_name,
bref->ref->server->name,
bref->ref->server->port);
RW_CHK_DCB(bref, bref->bref_dcb);
dcb_close(bref->bref_dcb);
RW_CLOSE_BREF(bref);
bref->bref_dcb = NULL;
}
}
else
{
MXS_ERROR("Unable to establish connection with server [%s]:%d",
serv->name, serv->port);
}
return rval;
}
/**
* @brief Log server connections
*
* @param select_criteria Slave selection criteria
* @param backend_ref Backend reference array
* @param router_nservers Number of backends in @p backend_ref
*/
static void log_server_connections(select_criteria_t select_criteria,
backend_ref_t *backend_ref, int router_nservers)
{
if (select_criteria == LEAST_GLOBAL_CONNECTIONS ||
select_criteria == LEAST_ROUTER_CONNECTIONS ||
select_criteria == LEAST_BEHIND_MASTER ||
select_criteria == LEAST_CURRENT_OPERATIONS)
{
MXS_INFO("Servers and %s connection counts:",
select_criteria == LEAST_GLOBAL_CONNECTIONS ? "all MaxScale"
: "router");
for (int i = 0; i < router_nservers; i++)
{
SERVER_REF *b = backend_ref[i].ref;
switch (select_criteria)
{
case LEAST_GLOBAL_CONNECTIONS:
MXS_INFO("MaxScale connections : %d in \t[%s]:%d %s",
b->server->stats.n_current, b->server->name,
b->server->port, STRSRVSTATUS(b->server));
break;
case LEAST_ROUTER_CONNECTIONS:
MXS_INFO("RWSplit connections : %d in \t[%s]:%d %s",
b->connections, b->server->name,
b->server->port, STRSRVSTATUS(b->server));
break;
case LEAST_CURRENT_OPERATIONS:
MXS_INFO("current operations : %d in \t[%s]:%d %s",
b->server->stats.n_current_ops,
b->server->name, b->server->port,
STRSRVSTATUS(b->server));
break;
case LEAST_BEHIND_MASTER:
MXS_INFO("replication lag : %d in \t[%s]:%d %s",
b->server->rlag, b->server->name,
b->server->port, STRSRVSTATUS(b->server));
default:
break;
}
}
}
}
/********************************
* This routine returns the root master server from MySQL replication tree
* Get the root Master rule:
*
* find server with the lowest replication depth level
* and the SERVER_MASTER bitval
* Servers are checked even if they are in 'maintenance'
*
* @param servers The list of servers
* @param router_nservers The number of servers
* @return The Master found
*
*/
static SERVER_REF *get_root_master(backend_ref_t *servers, int router_nservers)
{
int i = 0;
SERVER_REF *master_host = NULL;
for (i = 0; i < router_nservers; i++)
{
if (servers[i].ref == NULL)
{
/** This should not happen */
ss_dassert(false);
continue;
}
SERVER_REF *b = servers[i].ref;
if (SERVER_IS_MASTER(b->server))
{
if (master_host == NULL ||
(b->server->depth < master_host->server->depth))
{
master_host = b;
}
}
}
return master_host;
}