readwritesplit.c: prevent switching the master during session. Added logging to cases where master has changed.
Moved DCB's member errhandle_called behing DEBUG flags to Release build. It shows if handleError is called for a DCB and makes it possible to avoid redundant calls.
This commit is contained in:
parent
3b07449daa
commit
62270412cf
@ -121,8 +121,8 @@ DCB *rval;
|
||||
#if defined(SS_DEBUG)
|
||||
rval->dcb_chk_top = CHK_NUM_DCB;
|
||||
rval->dcb_chk_tail = CHK_NUM_DCB;
|
||||
rval->dcb_errhandle_called = false;
|
||||
#endif
|
||||
rval->dcb_errhandle_called = false;
|
||||
rval->dcb_role = role;
|
||||
spinlock_init(&rval->dcb_initlock);
|
||||
spinlock_init(&rval->writeqlock);
|
||||
|
@ -205,9 +205,9 @@ typedef struct dcb_callback {
|
||||
typedef struct dcb {
|
||||
#if defined(SS_DEBUG)
|
||||
skygw_chk_t dcb_chk_top;
|
||||
bool dcb_errhandle_called;
|
||||
#endif
|
||||
dcb_role_t dcb_role;
|
||||
bool dcb_errhandle_called; /*< this can be called only once */
|
||||
dcb_role_t dcb_role;
|
||||
SPINLOCK dcb_initlock;
|
||||
DCBEVENTQ evq; /**< The event queue for this DCB */
|
||||
int fd; /**< The descriptor */
|
||||
|
@ -828,6 +828,17 @@ static void handleError(
|
||||
SESSION *session = backend_dcb->session;
|
||||
session_state_t sesstate;
|
||||
|
||||
/** Don't handle same error twice on same DCB */
|
||||
if (backend_dcb->dcb_errhandle_called)
|
||||
{
|
||||
/** we optimistically assume that previous call succeed */
|
||||
*succp = true;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
backend_dcb->dcb_errhandle_called = true;
|
||||
}
|
||||
spinlock_acquire(&session->ses_lock);
|
||||
sesstate = session->state;
|
||||
client_dcb = session->client;
|
||||
|
@ -1029,6 +1029,9 @@ static void freeSession(
|
||||
|
||||
/**
|
||||
* Provide the router with a pointer to a suitable backend dcb.
|
||||
*
|
||||
* As of Nov. 2014, slave which has least connections is always chosen.
|
||||
*
|
||||
* Detect failures in server statuses and reselect backends if necessary.
|
||||
* If name is specified, server name becomes primary selection criteria.
|
||||
*
|
||||
@ -2074,14 +2077,11 @@ static int routeQuery(
|
||||
"route to master "
|
||||
"but couldn't find "
|
||||
"master in a "
|
||||
"suitable state "
|
||||
"failed.")));
|
||||
"suitable state.")));
|
||||
}
|
||||
/**
|
||||
* Master has changed. Set the dcb pointer NULL and
|
||||
* return with error indicator.
|
||||
* Master has changed. Return with error indicator.
|
||||
*/
|
||||
router_cli_ses->rses_master_ref->bref_dcb = NULL;
|
||||
rses_end_locked_router_action(router_cli_ses);
|
||||
succp = false;
|
||||
ret = 0;
|
||||
@ -2655,7 +2655,7 @@ static bool select_connect_backend_servers(
|
||||
const int min_nslaves = 0; /*< not configurable at the time */
|
||||
bool is_synced_master;
|
||||
int (*p)(const void *, const void *);
|
||||
BACKEND* master_host = NULL;
|
||||
BACKEND* master_host;
|
||||
|
||||
if (p_master_ref == NULL || backend_ref == NULL)
|
||||
{
|
||||
@ -2667,46 +2667,32 @@ static bool select_connect_backend_servers(
|
||||
/* get the root Master */
|
||||
master_host = get_root_master(backend_ref, router_nservers);
|
||||
|
||||
/**
|
||||
* Master is already chosen and connected. It means that the function
|
||||
* was called from error handling function or from some other similar
|
||||
* function where session was already established but new slaves needed
|
||||
* to be selected.
|
||||
/**
|
||||
* Existing session : master is already chosen and connected.
|
||||
* The function was called because new slave must be selected to replace
|
||||
* failed one.
|
||||
*/
|
||||
if (*p_master_ref != NULL &&
|
||||
BREF_IS_IN_USE((*p_master_ref)))
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [select_connect_backend_servers] Master %p fd %d found.",
|
||||
pthread_self(),
|
||||
(*p_master_ref)->bref_dcb,
|
||||
(*p_master_ref)->bref_dcb->fd)));
|
||||
|
||||
master_found = true;
|
||||
master_connected = true;
|
||||
|
||||
if (*p_master_ref != NULL)
|
||||
{
|
||||
/**
|
||||
* Ensure that *p_master_ref and master_host point to same backend
|
||||
* and it has a master role.
|
||||
* Ensure that backend reference is in use, stored master is
|
||||
* still current root master.
|
||||
*/
|
||||
ss_dassert(master_host &&
|
||||
((*p_master_ref)->bref_backend->backend_server ==
|
||||
master_host->backend_server) &&
|
||||
(master_host->backend_server->status &
|
||||
(SERVER_MASTER|SERVER_MAINT)) == SERVER_MASTER);
|
||||
}
|
||||
/** New session or master failure case */
|
||||
if (!BREF_IS_IN_USE((*p_master_ref)) ||
|
||||
!SERVER_IS_MASTER((*p_master_ref)->bref_backend->backend_server) ||
|
||||
master_host != (*p_master_ref)->bref_backend)
|
||||
{
|
||||
succp = false;
|
||||
goto return_succp;
|
||||
}
|
||||
master_found = true;
|
||||
master_connected = true;
|
||||
}
|
||||
/**
|
||||
* New session : select master and slaves
|
||||
*/
|
||||
else
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [select_connect_backend_servers] Session %p doesn't "
|
||||
"currently have a master chosen. Proceeding to master "
|
||||
"selection.",
|
||||
pthread_self(),
|
||||
session)));
|
||||
|
||||
{
|
||||
master_found = false;
|
||||
master_connected = false;
|
||||
}
|
||||
@ -2745,11 +2731,6 @@ static bool select_connect_backend_servers(
|
||||
b->backend_conn_count)));
|
||||
}
|
||||
#endif
|
||||
/* assert with master_host */
|
||||
ss_dassert(!master_connected ||
|
||||
(master_host &&
|
||||
((*p_master_ref)->bref_backend->backend_server == master_host->backend_server) &&
|
||||
SERVER_MASTER));
|
||||
/**
|
||||
* Sort the pointer list to servers according to connection counts. As
|
||||
* a consequence those backends having least connections are in the
|
||||
@ -2903,6 +2884,9 @@ static bool select_connect_backend_servers(
|
||||
else if (master_host &&
|
||||
(b->backend_server == master_host->backend_server))
|
||||
{
|
||||
/** not allowed to replace old master with new */
|
||||
ss_dassert(*p_master_ref == NULL);
|
||||
|
||||
*p_master_ref = &backend_ref[i];
|
||||
|
||||
if (master_connected)
|
||||
@ -4072,7 +4056,6 @@ static void rwsplit_process_router_options(
|
||||
* Even if succp == true connecting to new slave may have failed. succp is to
|
||||
* tell whether router has enough master/slave connections to continue work.
|
||||
*/
|
||||
|
||||
static void handleError (
|
||||
ROUTER* instance,
|
||||
void* router_session,
|
||||
@ -4086,10 +4069,17 @@ static void handleError (
|
||||
ROUTER_CLIENT_SES* rses = (ROUTER_CLIENT_SES *)router_session;
|
||||
|
||||
CHK_DCB(backend_dcb);
|
||||
#if defined(SS_DEBUG)
|
||||
ss_dassert(!backend_dcb->dcb_errhandle_called);
|
||||
backend_dcb->dcb_errhandle_called = true;
|
||||
#endif
|
||||
/** Don't handle same error twice on same DCB */
|
||||
if (backend_dcb->dcb_errhandle_called)
|
||||
{
|
||||
/** we optimistically assume that previous call succeed */
|
||||
*succp = true;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
backend_dcb->dcb_errhandle_called = true;
|
||||
}
|
||||
session = backend_dcb->session;
|
||||
|
||||
if (session != NULL)
|
||||
@ -4107,7 +4097,8 @@ static void handleError (
|
||||
return;
|
||||
}
|
||||
|
||||
if (rses->rses_master_ref->bref_dcb == backend_dcb)
|
||||
if (rses->rses_master_ref->bref_dcb == backend_dcb &&
|
||||
!SERVER_IS_MASTER(rses->rses_master_ref->bref_backend->backend_server))
|
||||
{
|
||||
/** Master failed, can't recover */
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
@ -4206,6 +4197,11 @@ static bool handle_error_new_connection(
|
||||
}
|
||||
CHK_BACKEND_REF(bref);
|
||||
|
||||
/**
|
||||
* If query was sent through the bref and it is waiting for reply from
|
||||
* the backend server it is necessary to send an error to the client
|
||||
* because it is waiting for reply.
|
||||
*/
|
||||
if (BREF_IS_WAITING_RESULT(bref))
|
||||
{
|
||||
DCB* client_dcb;
|
||||
@ -4473,6 +4469,10 @@ static backend_ref_t* get_bref_from_dcb(
|
||||
return bref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls hang-up function for DCB if it is not both running and in
|
||||
* master/slave/joined/ndb role. Called by DCB's callback routine.
|
||||
*/
|
||||
static int router_handle_state_switch(
|
||||
DCB* dcb,
|
||||
DCB_REASON reason,
|
||||
@ -4513,6 +4513,7 @@ return_rc:
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static sescmd_cursor_t* backend_ref_get_sescmd_cursor (
|
||||
backend_ref_t* bref)
|
||||
{
|
||||
@ -4634,7 +4635,7 @@ static BACKEND *get_root_master(
|
||||
* Servers are checked even if they are in 'maintenance'
|
||||
*
|
||||
* @param rses pointer to router session
|
||||
* @return pointer to backend reference of the root master
|
||||
* @return pointer to backend reference of the root master or NULL
|
||||
*
|
||||
*/
|
||||
static backend_ref_t* get_root_master_bref(
|
||||
@ -4664,6 +4665,14 @@ static backend_ref_t* get_root_master_bref(
|
||||
bref++;
|
||||
i += 1;
|
||||
}
|
||||
if (candidate_bref == NULL)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error : Could not find master among the backend "
|
||||
"servers. Previous master state : %s",
|
||||
STRSRVSTATUS(BREFSRV(rses->rses_master_ref)))));
|
||||
}
|
||||
return candidate_bref;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user