diff --git a/server/core/dcb.c b/server/core/dcb.c index 4d22c1338..fe571e845 100644 --- a/server/core/dcb.c +++ b/server/core/dcb.c @@ -121,8 +121,8 @@ DCB *rval; #if defined(SS_DEBUG) rval->dcb_chk_top = CHK_NUM_DCB; rval->dcb_chk_tail = CHK_NUM_DCB; - rval->dcb_errhandle_called = false; #endif + rval->dcb_errhandle_called = false; rval->dcb_role = role; spinlock_init(&rval->dcb_initlock); spinlock_init(&rval->writeqlock); diff --git a/server/include/dcb.h b/server/include/dcb.h index c455fcbb8..a2d220732 100644 --- a/server/include/dcb.h +++ b/server/include/dcb.h @@ -205,9 +205,9 @@ typedef struct dcb_callback { typedef struct dcb { #if defined(SS_DEBUG) skygw_chk_t dcb_chk_top; - bool dcb_errhandle_called; #endif - dcb_role_t dcb_role; + bool dcb_errhandle_called; /*< this can be called only once */ + dcb_role_t dcb_role; SPINLOCK dcb_initlock; DCBEVENTQ evq; /**< The event queue for this DCB */ int fd; /**< The descriptor */ diff --git a/server/modules/routing/readconnroute.c b/server/modules/routing/readconnroute.c index 949794e67..f5d15ee5d 100644 --- a/server/modules/routing/readconnroute.c +++ b/server/modules/routing/readconnroute.c @@ -828,6 +828,17 @@ static void handleError( SESSION *session = backend_dcb->session; session_state_t sesstate; + /** Don't handle same error twice on same DCB */ + if (backend_dcb->dcb_errhandle_called) + { + /** we optimistically assume that previous call succeed */ + *succp = true; + return; + } + else + { + backend_dcb->dcb_errhandle_called = true; + } spinlock_acquire(&session->ses_lock); sesstate = session->state; client_dcb = session->client; diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index d7f68b5c3..554c951e7 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -1029,6 +1029,9 @@ static void freeSession( /** * Provide the router with a pointer to a suitable backend dcb. + * + * As of Nov. 2014, slave which has least connections is always chosen. + * * Detect failures in server statuses and reselect backends if necessary. * If name is specified, server name becomes primary selection criteria. * @@ -2074,14 +2077,11 @@ static int routeQuery( "route to master " "but couldn't find " "master in a " - "suitable state " - "failed."))); + "suitable state."))); } /** - * Master has changed. Set the dcb pointer NULL and - * return with error indicator. + * Master has changed. Return with error indicator. */ - router_cli_ses->rses_master_ref->bref_dcb = NULL; rses_end_locked_router_action(router_cli_ses); succp = false; ret = 0; @@ -2655,7 +2655,7 @@ static bool select_connect_backend_servers( const int min_nslaves = 0; /*< not configurable at the time */ bool is_synced_master; int (*p)(const void *, const void *); - BACKEND* master_host = NULL; + BACKEND* master_host; if (p_master_ref == NULL || backend_ref == NULL) { @@ -2667,46 +2667,32 @@ static bool select_connect_backend_servers( /* get the root Master */ master_host = get_root_master(backend_ref, router_nservers); - /** - * Master is already chosen and connected. It means that the function - * was called from error handling function or from some other similar - * function where session was already established but new slaves needed - * to be selected. + /** + * Existing session : master is already chosen and connected. + * The function was called because new slave must be selected to replace + * failed one. */ - if (*p_master_ref != NULL && - BREF_IS_IN_USE((*p_master_ref))) - { - LOGIF(LD, (skygw_log_write( - LOGFILE_DEBUG, - "%lu [select_connect_backend_servers] Master %p fd %d found.", - pthread_self(), - (*p_master_ref)->bref_dcb, - (*p_master_ref)->bref_dcb->fd))); - - master_found = true; - master_connected = true; - + if (*p_master_ref != NULL) + { /** - * Ensure that *p_master_ref and master_host point to same backend - * and it has a master role. + * Ensure that backend reference is in use, stored master is + * still current root master. */ - ss_dassert(master_host && - ((*p_master_ref)->bref_backend->backend_server == - master_host->backend_server) && - (master_host->backend_server->status & - (SERVER_MASTER|SERVER_MAINT)) == SERVER_MASTER); - } - /** New session or master failure case */ + if (!BREF_IS_IN_USE((*p_master_ref)) || + !SERVER_IS_MASTER((*p_master_ref)->bref_backend->backend_server) || + master_host != (*p_master_ref)->bref_backend) + { + succp = false; + goto return_succp; + } + master_found = true; + master_connected = true; + } + /** + * New session : select master and slaves + */ else - { - LOGIF(LD, (skygw_log_write( - LOGFILE_DEBUG, - "%lu [select_connect_backend_servers] Session %p doesn't " - "currently have a master chosen. Proceeding to master " - "selection.", - pthread_self(), - session))); - + { master_found = false; master_connected = false; } @@ -2745,11 +2731,6 @@ static bool select_connect_backend_servers( b->backend_conn_count))); } #endif - /* assert with master_host */ - ss_dassert(!master_connected || - (master_host && - ((*p_master_ref)->bref_backend->backend_server == master_host->backend_server) && - SERVER_MASTER)); /** * Sort the pointer list to servers according to connection counts. As * a consequence those backends having least connections are in the @@ -2903,6 +2884,9 @@ static bool select_connect_backend_servers( else if (master_host && (b->backend_server == master_host->backend_server)) { + /** not allowed to replace old master with new */ + ss_dassert(*p_master_ref == NULL); + *p_master_ref = &backend_ref[i]; if (master_connected) @@ -4072,7 +4056,6 @@ static void rwsplit_process_router_options( * Even if succp == true connecting to new slave may have failed. succp is to * tell whether router has enough master/slave connections to continue work. */ - static void handleError ( ROUTER* instance, void* router_session, @@ -4086,10 +4069,17 @@ static void handleError ( ROUTER_CLIENT_SES* rses = (ROUTER_CLIENT_SES *)router_session; CHK_DCB(backend_dcb); -#if defined(SS_DEBUG) - ss_dassert(!backend_dcb->dcb_errhandle_called); - backend_dcb->dcb_errhandle_called = true; -#endif + /** Don't handle same error twice on same DCB */ + if (backend_dcb->dcb_errhandle_called) + { + /** we optimistically assume that previous call succeed */ + *succp = true; + return; + } + else + { + backend_dcb->dcb_errhandle_called = true; + } session = backend_dcb->session; if (session != NULL) @@ -4107,7 +4097,8 @@ static void handleError ( return; } - if (rses->rses_master_ref->bref_dcb == backend_dcb) + if (rses->rses_master_ref->bref_dcb == backend_dcb && + !SERVER_IS_MASTER(rses->rses_master_ref->bref_backend->backend_server)) { /** Master failed, can't recover */ LOGIF(LE, (skygw_log_write_flush( @@ -4206,6 +4197,11 @@ static bool handle_error_new_connection( } CHK_BACKEND_REF(bref); + /** + * If query was sent through the bref and it is waiting for reply from + * the backend server it is necessary to send an error to the client + * because it is waiting for reply. + */ if (BREF_IS_WAITING_RESULT(bref)) { DCB* client_dcb; @@ -4473,6 +4469,10 @@ static backend_ref_t* get_bref_from_dcb( return bref; } +/** + * Calls hang-up function for DCB if it is not both running and in + * master/slave/joined/ndb role. Called by DCB's callback routine. + */ static int router_handle_state_switch( DCB* dcb, DCB_REASON reason, @@ -4513,6 +4513,7 @@ return_rc: return rc; } + static sescmd_cursor_t* backend_ref_get_sescmd_cursor ( backend_ref_t* bref) { @@ -4634,7 +4635,7 @@ static BACKEND *get_root_master( * Servers are checked even if they are in 'maintenance' * * @param rses pointer to router session - * @return pointer to backend reference of the root master + * @return pointer to backend reference of the root master or NULL * */ static backend_ref_t* get_root_master_bref( @@ -4664,6 +4665,14 @@ static backend_ref_t* get_root_master_bref( bref++; i += 1; } + if (candidate_bref == NULL) + { + LOGIF(LE, (skygw_log_write_flush( + LOGFILE_ERROR, + "Error : Could not find master among the backend " + "servers. Previous master state : %s", + STRSRVSTATUS(BREFSRV(rses->rses_master_ref))))); + } return candidate_bref; }