readwritesplit.c: prevent switching the master during session. Added logging to cases where master has changed.

Moved DCB's member errhandle_called behing DEBUG flags to Release build. It shows if handleError is called for a DCB and makes it possible to avoid redundant calls.
This commit is contained in:
VilhoRaatikka 2014-11-10 14:15:32 +02:00
parent 3b07449daa
commit 62270412cf
4 changed files with 77 additions and 57 deletions

View File

@ -121,8 +121,8 @@ DCB *rval;
#if defined(SS_DEBUG)
rval->dcb_chk_top = CHK_NUM_DCB;
rval->dcb_chk_tail = CHK_NUM_DCB;
rval->dcb_errhandle_called = false;
#endif
rval->dcb_errhandle_called = false;
rval->dcb_role = role;
spinlock_init(&rval->dcb_initlock);
spinlock_init(&rval->writeqlock);

View File

@ -205,9 +205,9 @@ typedef struct dcb_callback {
typedef struct dcb {
#if defined(SS_DEBUG)
skygw_chk_t dcb_chk_top;
bool dcb_errhandle_called;
#endif
dcb_role_t dcb_role;
bool dcb_errhandle_called; /*< this can be called only once */
dcb_role_t dcb_role;
SPINLOCK dcb_initlock;
DCBEVENTQ evq; /**< The event queue for this DCB */
int fd; /**< The descriptor */

View File

@ -828,6 +828,17 @@ static void handleError(
SESSION *session = backend_dcb->session;
session_state_t sesstate;
/** Don't handle same error twice on same DCB */
if (backend_dcb->dcb_errhandle_called)
{
/** we optimistically assume that previous call succeed */
*succp = true;
return;
}
else
{
backend_dcb->dcb_errhandle_called = true;
}
spinlock_acquire(&session->ses_lock);
sesstate = session->state;
client_dcb = session->client;

View File

@ -1029,6 +1029,9 @@ static void freeSession(
/**
* Provide the router with a pointer to a suitable backend dcb.
*
* As of Nov. 2014, slave which has least connections is always chosen.
*
* Detect failures in server statuses and reselect backends if necessary.
* If name is specified, server name becomes primary selection criteria.
*
@ -2074,14 +2077,11 @@ static int routeQuery(
"route to master "
"but couldn't find "
"master in a "
"suitable state "
"failed.")));
"suitable state.")));
}
/**
* Master has changed. Set the dcb pointer NULL and
* return with error indicator.
* Master has changed. Return with error indicator.
*/
router_cli_ses->rses_master_ref->bref_dcb = NULL;
rses_end_locked_router_action(router_cli_ses);
succp = false;
ret = 0;
@ -2655,7 +2655,7 @@ static bool select_connect_backend_servers(
const int min_nslaves = 0; /*< not configurable at the time */
bool is_synced_master;
int (*p)(const void *, const void *);
BACKEND* master_host = NULL;
BACKEND* master_host;
if (p_master_ref == NULL || backend_ref == NULL)
{
@ -2667,46 +2667,32 @@ static bool select_connect_backend_servers(
/* get the root Master */
master_host = get_root_master(backend_ref, router_nservers);
/**
* Master is already chosen and connected. It means that the function
* was called from error handling function or from some other similar
* function where session was already established but new slaves needed
* to be selected.
/**
* Existing session : master is already chosen and connected.
* The function was called because new slave must be selected to replace
* failed one.
*/
if (*p_master_ref != NULL &&
BREF_IS_IN_USE((*p_master_ref)))
{
LOGIF(LD, (skygw_log_write(
LOGFILE_DEBUG,
"%lu [select_connect_backend_servers] Master %p fd %d found.",
pthread_self(),
(*p_master_ref)->bref_dcb,
(*p_master_ref)->bref_dcb->fd)));
master_found = true;
master_connected = true;
if (*p_master_ref != NULL)
{
/**
* Ensure that *p_master_ref and master_host point to same backend
* and it has a master role.
* Ensure that backend reference is in use, stored master is
* still current root master.
*/
ss_dassert(master_host &&
((*p_master_ref)->bref_backend->backend_server ==
master_host->backend_server) &&
(master_host->backend_server->status &
(SERVER_MASTER|SERVER_MAINT)) == SERVER_MASTER);
}
/** New session or master failure case */
if (!BREF_IS_IN_USE((*p_master_ref)) ||
!SERVER_IS_MASTER((*p_master_ref)->bref_backend->backend_server) ||
master_host != (*p_master_ref)->bref_backend)
{
succp = false;
goto return_succp;
}
master_found = true;
master_connected = true;
}
/**
* New session : select master and slaves
*/
else
{
LOGIF(LD, (skygw_log_write(
LOGFILE_DEBUG,
"%lu [select_connect_backend_servers] Session %p doesn't "
"currently have a master chosen. Proceeding to master "
"selection.",
pthread_self(),
session)));
{
master_found = false;
master_connected = false;
}
@ -2745,11 +2731,6 @@ static bool select_connect_backend_servers(
b->backend_conn_count)));
}
#endif
/* assert with master_host */
ss_dassert(!master_connected ||
(master_host &&
((*p_master_ref)->bref_backend->backend_server == master_host->backend_server) &&
SERVER_MASTER));
/**
* Sort the pointer list to servers according to connection counts. As
* a consequence those backends having least connections are in the
@ -2903,6 +2884,9 @@ static bool select_connect_backend_servers(
else if (master_host &&
(b->backend_server == master_host->backend_server))
{
/** not allowed to replace old master with new */
ss_dassert(*p_master_ref == NULL);
*p_master_ref = &backend_ref[i];
if (master_connected)
@ -4072,7 +4056,6 @@ static void rwsplit_process_router_options(
* Even if succp == true connecting to new slave may have failed. succp is to
* tell whether router has enough master/slave connections to continue work.
*/
static void handleError (
ROUTER* instance,
void* router_session,
@ -4086,10 +4069,17 @@ static void handleError (
ROUTER_CLIENT_SES* rses = (ROUTER_CLIENT_SES *)router_session;
CHK_DCB(backend_dcb);
#if defined(SS_DEBUG)
ss_dassert(!backend_dcb->dcb_errhandle_called);
backend_dcb->dcb_errhandle_called = true;
#endif
/** Don't handle same error twice on same DCB */
if (backend_dcb->dcb_errhandle_called)
{
/** we optimistically assume that previous call succeed */
*succp = true;
return;
}
else
{
backend_dcb->dcb_errhandle_called = true;
}
session = backend_dcb->session;
if (session != NULL)
@ -4107,7 +4097,8 @@ static void handleError (
return;
}
if (rses->rses_master_ref->bref_dcb == backend_dcb)
if (rses->rses_master_ref->bref_dcb == backend_dcb &&
!SERVER_IS_MASTER(rses->rses_master_ref->bref_backend->backend_server))
{
/** Master failed, can't recover */
LOGIF(LE, (skygw_log_write_flush(
@ -4206,6 +4197,11 @@ static bool handle_error_new_connection(
}
CHK_BACKEND_REF(bref);
/**
* If query was sent through the bref and it is waiting for reply from
* the backend server it is necessary to send an error to the client
* because it is waiting for reply.
*/
if (BREF_IS_WAITING_RESULT(bref))
{
DCB* client_dcb;
@ -4473,6 +4469,10 @@ static backend_ref_t* get_bref_from_dcb(
return bref;
}
/**
* Calls hang-up function for DCB if it is not both running and in
* master/slave/joined/ndb role. Called by DCB's callback routine.
*/
static int router_handle_state_switch(
DCB* dcb,
DCB_REASON reason,
@ -4513,6 +4513,7 @@ return_rc:
return rc;
}
static sescmd_cursor_t* backend_ref_get_sescmd_cursor (
backend_ref_t* bref)
{
@ -4634,7 +4635,7 @@ static BACKEND *get_root_master(
* Servers are checked even if they are in 'maintenance'
*
* @param rses pointer to router session
* @return pointer to backend reference of the root master
* @return pointer to backend reference of the root master or NULL
*
*/
static backend_ref_t* get_root_master_bref(
@ -4664,6 +4665,14 @@ static backend_ref_t* get_root_master_bref(
bref++;
i += 1;
}
if (candidate_bref == NULL)
{
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"Error : Could not find master among the backend "
"servers. Previous master state : %s",
STRSRVSTATUS(BREFSRV(rses->rses_master_ref)))));
}
return candidate_bref;
}