diff --git a/server/core/dcb.c b/server/core/dcb.c index 8f2f159d4..99a01a640 100644 --- a/server/core/dcb.c +++ b/server/core/dcb.c @@ -724,7 +724,7 @@ int rc; */ rc = poll_add_dcb(dcb); - if (rc == DCBFD_CLOSED) { + if (rc) { dcb_set_state(dcb, DCB_STATE_DISCONNECTED, NULL); dcb_final_free(dcb); return NULL; @@ -2362,6 +2362,23 @@ bool dcb_set_state( return succp; } +void dcb_revert_state( + DCB* dcb, + const dcb_state_t new_state, + dcb_state_t old_state) +{ + CHK_DCB(dcb); + spinlock_acquire(&dcb->dcb_initlock); + + if ((DCB_STATE_POLLING == new_state || DCB_STATE_LISTENING == new_state) && (DCB_STATE_ALLOC == old_state || DCB_STATE_NOPOLLING == old_state)) + { + dcb->state = old_state; + spinlock_release(&dcb->dcb_initlock); + return; + } + else assert(false); +} + static bool dcb_set_state_nomutex( DCB* dcb, const dcb_state_t new_state, diff --git a/server/core/poll.c b/server/core/poll.c index 9a1a5565d..24b27006f 100644 --- a/server/core/poll.c +++ b/server/core/poll.c @@ -186,6 +186,11 @@ static struct { */ static void poll_loadav(void *); +/** + * Function to analyse error return from epoll_ctl + */ +static int poll_resolve_error(int, bool); + /** * Initialise the polling system we are using for the gateway. * @@ -275,20 +280,9 @@ poll_add_dcb(DCB *dcb) */ if (dcb_set_state(dcb, new_state, &old_state)) { rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, dcb->fd, &ev); - - if (rc != 0) { - int eno = errno; - errno = 0; - LOGIF(LE, (skygw_log_write_flush( - LOGFILE_ERROR, - "Error : Adding dcb %p in state %s " - "to poll set failed. epoll_ctl failed due " - "%d, %s.", - dcb, - STRDCBSTATE(dcb->state), - eno, - strerror(eno)))); - } else { + if (rc) rc = poll_resolve_error(errno, true); + if (0 == rc) + { LOGIF(LD, (skygw_log_write( LOGFILE_DEBUG, "%lu [poll_add_dcb] Added dcb %p in state %s to " @@ -297,7 +291,7 @@ poll_add_dcb(DCB *dcb) dcb, STRDCBSTATE(dcb->state)))); } - ss_info_dassert(rc == 0, "Unable to add poll"); /*< trap in debug */ + else dcb_revert_state(dcb, new_state, old_state); } else { LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, @@ -351,17 +345,7 @@ poll_remove_dcb(DCB *dcb) if (dcb->fd > 0) { rc = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, dcb->fd, &ev); - - if (rc != 0) { - int eno = errno; - errno = 0; - LOGIF(LE, (skygw_log_write_flush( - LOGFILE_ERROR, - "Error : epoll_ctl failed due %d, %s.", - eno, - strerror(eno)))); - } - ss_dassert(rc == 0); /*< trap in debug */ + if (rc) rc = poll_resolve_error(errno, false); } } /*< @@ -380,6 +364,63 @@ return_rc: return rc; } +/** + * Check error returns from epoll_ctl. Most result in a crash since they + * are "impossible". Adding when already present is assumed non-fatal. + * Likewise, removing when not present is assumed non-fatal. + * It is assumed that callers to poll routines can handle the failure + * that results from hitting system limit, although an error is written + * here to record the problem. + * + * @param errornum The errno set by epoll_ctl + * @param adding True for adding to poll list, false for removing + * @return -1 on error or 0 for possibly revised return code + */ +static int +poll_resolve_error(int errornum, bool adding) +{ + if (adding) + { + if (EEXIST == errornum) + { + LOGIF(LE, (skygw_log_write_flush( + LOGFILE_ERROR, + "Error : epoll_ctl could not add, already exists."))); + // Assume another thread added and no serious harm done + return 0; + } + if (ENOSPC == errornum) + { + LOGIF(LE, (skygw_log_write_flush( + LOGFILE_ERROR, + "The limit imposed by /proc/sys/fs/epoll/max_user_watches was " + "encountered while trying to register (EPOLL_CTL_ADD) a new " + "file descriptor on an epoll instance."))); + /* Failure - assume handled by callers */ + return -1; + } + } + else + { + /* Must be removing */ + if (ENOENT == errornum) + { + LOGIF(LE, (skygw_log_write_flush( + LOGFILE_ERROR, + "Error : epoll_ctl could not remove, not found."))); + // Assume another thread removed and no serious harm done + return 0; + } + } + /* Common checks for add or remove - crash MaxScale */ + if (EBADF == errornum) assert (!(EBADF == errornum)); + if (EINVAL == errornum) assert (!(EINVAL == errornum)); + if (ENOMEM == errornum) assert (!(ENOMEM == errornum)); + if (EPERM == errornum) assert (!(EPERM == errornum)); + /* Undocumented error number */ + assert(false); +} + #define BLOCKINGPOLL 0 /*< Set BLOCKING POLL to 1 if using a single thread and to make * debugging easier. */ @@ -1605,7 +1646,7 @@ RESULT_ROW *row; } /** - * Return a resultset that has the current set of services in it + * Return a result set that has the current set of services in it * * @return A Result set */ diff --git a/server/include/dcb.h b/server/include/dcb.h index 19f1e72ea..7f0997a9a 100644 --- a/server/include/dcb.h +++ b/server/include/dcb.h @@ -338,7 +338,8 @@ int dcb_remove_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON, vo int dcb_isvalid(DCB *); /* Check the DCB is in the linked list */ int dcb_count_by_usage(DCB_USAGE); /* Return counts of DCBs */ -bool dcb_set_state(DCB* dcb, dcb_state_t new_state, dcb_state_t* old_state); +bool dcb_set_state(DCB *dcb, dcb_state_t new_state, dcb_state_t *old_state); +void dcb_revert_state(DCB *dcb, const dcb_state_t new_state, dcb_state_t old_state); void dcb_call_foreach (struct server* server, DCB_REASON reason); size_t dcb_get_session_id(DCB* dcb); bool dcb_get_ses_log_info(DCB* dcb, size_t* sesid, int* enabled_logs); diff --git a/server/modules/protocol/mysql_client.c b/server/modules/protocol/mysql_client.c index c3e463139..f4d3663a9 100644 --- a/server/modules/protocol/mysql_client.c +++ b/server/modules/protocol/mysql_client.c @@ -1451,8 +1451,8 @@ int gw_MySQLListener( // add listening socket to poll structure if (poll_add_dcb(listen_dcb) == -1) { fprintf(stderr, - "\n* Failed to start polling the socket due error " - "%i, %s.\n\n", + "\n* MaxScale encountered system limit while " + "attempting to register on an epoll instance.\n\n", errno, strerror(errno)); return 0; @@ -1687,7 +1687,8 @@ int gw_MySQLAccept(DCB *listener) client_dcb, 1, 0, - "MaxScale internal error."); + "MaxScale encountered system limit while " + "attempting to register on an epoll instance."); /** close client_dcb */ dcb_close(client_dcb);