Fixes for MXS-196 and other related problems.

This commit is contained in:
counterpoint
2015-07-06 08:48:27 +01:00
parent 03b5a634f4
commit a72f462e2d
4 changed files with 92 additions and 32 deletions

View File

@ -724,7 +724,7 @@ int rc;
*/
rc = poll_add_dcb(dcb);
if (rc == DCBFD_CLOSED) {
if (rc) {
dcb_set_state(dcb, DCB_STATE_DISCONNECTED, NULL);
dcb_final_free(dcb);
return NULL;
@ -2362,6 +2362,23 @@ bool dcb_set_state(
return succp;
}
void dcb_revert_state(
DCB* dcb,
const dcb_state_t new_state,
dcb_state_t old_state)
{
CHK_DCB(dcb);
spinlock_acquire(&dcb->dcb_initlock);
if ((DCB_STATE_POLLING == new_state || DCB_STATE_LISTENING == new_state) && (DCB_STATE_ALLOC == old_state || DCB_STATE_NOPOLLING == old_state))
{
dcb->state = old_state;
spinlock_release(&dcb->dcb_initlock);
return;
}
else assert(false);
}
static bool dcb_set_state_nomutex(
DCB* dcb,
const dcb_state_t new_state,

View File

@ -186,6 +186,11 @@ static struct {
*/
static void poll_loadav(void *);
/**
* Function to analyse error return from epoll_ctl
*/
static int poll_resolve_error(int, bool);
/**
* Initialise the polling system we are using for the gateway.
*
@ -275,20 +280,9 @@ poll_add_dcb(DCB *dcb)
*/
if (dcb_set_state(dcb, new_state, &old_state)) {
rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, dcb->fd, &ev);
if (rc != 0) {
int eno = errno;
errno = 0;
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"Error : Adding dcb %p in state %s "
"to poll set failed. epoll_ctl failed due "
"%d, %s.",
dcb,
STRDCBSTATE(dcb->state),
eno,
strerror(eno))));
} else {
if (rc) rc = poll_resolve_error(errno, true);
if (0 == rc)
{
LOGIF(LD, (skygw_log_write(
LOGFILE_DEBUG,
"%lu [poll_add_dcb] Added dcb %p in state %s to "
@ -297,7 +291,7 @@ poll_add_dcb(DCB *dcb)
dcb,
STRDCBSTATE(dcb->state))));
}
ss_info_dassert(rc == 0, "Unable to add poll"); /*< trap in debug */
else dcb_revert_state(dcb, new_state, old_state);
} else {
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
@ -351,17 +345,7 @@ poll_remove_dcb(DCB *dcb)
if (dcb->fd > 0)
{
rc = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, dcb->fd, &ev);
if (rc != 0) {
int eno = errno;
errno = 0;
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"Error : epoll_ctl failed due %d, %s.",
eno,
strerror(eno))));
}
ss_dassert(rc == 0); /*< trap in debug */
if (rc) rc = poll_resolve_error(errno, false);
}
}
/*<
@ -380,6 +364,63 @@ return_rc:
return rc;
}
/**
* Check error returns from epoll_ctl. Most result in a crash since they
* are "impossible". Adding when already present is assumed non-fatal.
* Likewise, removing when not present is assumed non-fatal.
* It is assumed that callers to poll routines can handle the failure
* that results from hitting system limit, although an error is written
* here to record the problem.
*
* @param errornum The errno set by epoll_ctl
* @param adding True for adding to poll list, false for removing
* @return -1 on error or 0 for possibly revised return code
*/
static int
poll_resolve_error(int errornum, bool adding)
{
if (adding)
{
if (EEXIST == errornum)
{
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"Error : epoll_ctl could not add, already exists.")));
// Assume another thread added and no serious harm done
return 0;
}
if (ENOSPC == errornum)
{
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"The limit imposed by /proc/sys/fs/epoll/max_user_watches was "
"encountered while trying to register (EPOLL_CTL_ADD) a new "
"file descriptor on an epoll instance.")));
/* Failure - assume handled by callers */
return -1;
}
}
else
{
/* Must be removing */
if (ENOENT == errornum)
{
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"Error : epoll_ctl could not remove, not found.")));
// Assume another thread removed and no serious harm done
return 0;
}
}
/* Common checks for add or remove - crash MaxScale */
if (EBADF == errornum) assert (!(EBADF == errornum));
if (EINVAL == errornum) assert (!(EINVAL == errornum));
if (ENOMEM == errornum) assert (!(ENOMEM == errornum));
if (EPERM == errornum) assert (!(EPERM == errornum));
/* Undocumented error number */
assert(false);
}
#define BLOCKINGPOLL 0 /*< Set BLOCKING POLL to 1 if using a single thread and to make
* debugging easier.
*/
@ -1605,7 +1646,7 @@ RESULT_ROW *row;
}
/**
* Return a resultset that has the current set of services in it
* Return a result set that has the current set of services in it
*
* @return A Result set
*/

View File

@ -338,7 +338,8 @@ int dcb_remove_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON, vo
int dcb_isvalid(DCB *); /* Check the DCB is in the linked list */
int dcb_count_by_usage(DCB_USAGE); /* Return counts of DCBs */
bool dcb_set_state(DCB* dcb, dcb_state_t new_state, dcb_state_t* old_state);
bool dcb_set_state(DCB *dcb, dcb_state_t new_state, dcb_state_t *old_state);
void dcb_revert_state(DCB *dcb, const dcb_state_t new_state, dcb_state_t old_state);
void dcb_call_foreach (struct server* server, DCB_REASON reason);
size_t dcb_get_session_id(DCB* dcb);
bool dcb_get_ses_log_info(DCB* dcb, size_t* sesid, int* enabled_logs);

View File

@ -1451,8 +1451,8 @@ int gw_MySQLListener(
// add listening socket to poll structure
if (poll_add_dcb(listen_dcb) == -1) {
fprintf(stderr,
"\n* Failed to start polling the socket due error "
"%i, %s.\n\n",
"\n* MaxScale encountered system limit while "
"attempting to register on an epoll instance.\n\n",
errno,
strerror(errno));
return 0;
@ -1687,7 +1687,8 @@ int gw_MySQLAccept(DCB *listener)
client_dcb,
1,
0,
"MaxScale internal error.");
"MaxScale encountered system limit while "
"attempting to register on an epoll instance.");
/** close client_dcb */
dcb_close(client_dcb);