Introduction of a new polling mechanism to make the thread usage more fair and avoid
having busy DCB's being able to block execution of events on less busy DCBs
This commit is contained in:
parent
7562d49a95
commit
92ce8a47ba
@ -141,6 +141,13 @@ DCB *rval;
|
||||
rval->polloutbusy = 0;
|
||||
rval->writecheck = 0;
|
||||
rval->fd = -1;
|
||||
|
||||
rval->evq.next = NULL;
|
||||
rval->evq.prev = NULL;
|
||||
rval->evq.pending_events = 0;
|
||||
rval->evq.processing = 0;
|
||||
spinlock_init(&rval->evq.eventqlock);
|
||||
|
||||
memset(&rval->stats, 0, sizeof(DCBSTATS)); // Zero the statistics
|
||||
rval->state = DCB_STATE_ALLOC;
|
||||
bitmask_init(&rval->memdata.bitmask);
|
||||
@ -446,9 +453,10 @@ bool succp = false;
|
||||
CHK_DCB(ptr);
|
||||
|
||||
/*
|
||||
* Skip processing of the excluded DCB
|
||||
* Skip processing of the excluded DCB or DCB's that are
|
||||
* in the event queue waiting to be processed.
|
||||
*/
|
||||
if (ptr == excluded)
|
||||
if (ptr == excluded || ptr->evq.next || ptr->evq.prev)
|
||||
{
|
||||
lptr = ptr;
|
||||
ptr = ptr->memdata.next;
|
||||
@ -1316,10 +1324,6 @@ DCB *dcb;
|
||||
dcb_printf(pdcb, "\t\tNo. of Writes: %d\n", dcb->stats.n_writes);
|
||||
dcb_printf(pdcb, "\t\tNo. of Buffered Writes: %d\n", dcb->stats.n_buffered);
|
||||
dcb_printf(pdcb, "\t\tNo. of Accepts: %d\n", dcb->stats.n_accepts);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy polls: %d\n", dcb->stats.n_busypolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of read rechecks: %d\n", dcb->stats.n_readrechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy write polls: %d\n", dcb->stats.n_busywrpolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of write rechecks: %d\n", dcb->stats.n_writerechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of High Water Events: %d\n", dcb->stats.n_high_water);
|
||||
dcb_printf(pdcb, "\t\tNo. of Low Water Events: %d\n", dcb->stats.n_low_water);
|
||||
if (dcb->flags & DCBF_CLONE)
|
||||
@ -1427,10 +1431,6 @@ dprintDCB(DCB *pdcb, DCB *dcb)
|
||||
dcb->stats.n_buffered);
|
||||
dcb_printf(pdcb, "\t\tNo. of Accepts: %d\n",
|
||||
dcb->stats.n_accepts);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy polls: %d\n", dcb->stats.n_busypolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of read rechecks: %d\n", dcb->stats.n_readrechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy write polls: %d\n", dcb->stats.n_busywrpolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of write rechecks: %d\n", dcb->stats.n_writerechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of High Water Events: %d\n",
|
||||
dcb->stats.n_high_water);
|
||||
dcb_printf(pdcb, "\t\tNo. of Low Water Events: %d\n",
|
||||
@ -1955,99 +1955,6 @@ int rval = 0;
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by the EPOLLIN event. Take care of calling the protocol
|
||||
* read entry point and managing multiple threads competing for the DCB
|
||||
* without blocking those threads.
|
||||
*
|
||||
* This mechanism does away with the need for a mutex on the EPOLLIN event
|
||||
* and instead implements a queuing mechanism in which nested events are
|
||||
* queued on the DCB such that when the thread processing the first event
|
||||
* returns it will read the queued event and process it. This allows the
|
||||
* thread that would otherwise have to wait to process the nested event
|
||||
* to return immediately and and process other events.
|
||||
*
|
||||
* @param dcb The DCB that has data available
|
||||
* @param thread_id The ID of the calling thread
|
||||
* @param nozombies If non-zero then do not do zombie processing
|
||||
*/
|
||||
void
|
||||
dcb_pollin(DCB *dcb, int thread_id, int nozombies)
|
||||
{
|
||||
|
||||
spinlock_acquire(&dcb->pollinlock);
|
||||
if (dcb->pollinbusy == 0)
|
||||
{
|
||||
dcb->pollinbusy = 1;
|
||||
do {
|
||||
if (dcb->readcheck)
|
||||
{
|
||||
dcb->stats.n_readrechecks++;
|
||||
if (!nozombies)
|
||||
dcb_process_zombies(thread_id, dcb);
|
||||
}
|
||||
dcb->readcheck = 0;
|
||||
spinlock_release(&dcb->pollinlock);
|
||||
dcb->func.read(dcb);
|
||||
spinlock_acquire(&dcb->pollinlock);
|
||||
} while (dcb->readcheck);
|
||||
dcb->pollinbusy = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dcb->stats.n_busypolls++;
|
||||
dcb->readcheck = 1;
|
||||
}
|
||||
spinlock_release(&dcb->pollinlock);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called by the EPOLLOUT event. Take care of calling the protocol
|
||||
* write_ready entry point and managing multiple threads competing for the DCB
|
||||
* without blocking those threads.
|
||||
*
|
||||
* This mechanism does away with the need for a mutex on the EPOLLOUT event
|
||||
* and instead implements a queuing mechanism in which nested events are
|
||||
* queued on the DCB such that when the thread processing the first event
|
||||
* returns it will read the queued event and process it. This allows the
|
||||
* thread that would otherwise have to wait to process the nested event
|
||||
* to return immediately and and process other events.
|
||||
*
|
||||
* @param dcb The DCB thats available for writes
|
||||
* @param thread_id The ID of the calling thread
|
||||
* @param nozombies If non-zero then do not do zombie processing
|
||||
*/
|
||||
void
|
||||
dcb_pollout(DCB *dcb, int thread_id, int nozombies)
|
||||
{
|
||||
|
||||
spinlock_acquire(&dcb->polloutlock);
|
||||
if (dcb->polloutbusy == 0)
|
||||
{
|
||||
dcb->polloutbusy = 1;
|
||||
do {
|
||||
if (dcb->writecheck)
|
||||
{
|
||||
if (!nozombies)
|
||||
dcb_process_zombies(thread_id, dcb);
|
||||
dcb->stats.n_writerechecks++;
|
||||
}
|
||||
dcb->writecheck = 0;
|
||||
spinlock_release(&dcb->polloutlock);
|
||||
dcb->func.write_ready(dcb);
|
||||
spinlock_acquire(&dcb->polloutlock);
|
||||
} while (dcb->writecheck);
|
||||
dcb->polloutbusy = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dcb->stats.n_busywrpolls++;
|
||||
dcb->writecheck = 1;
|
||||
}
|
||||
spinlock_release(&dcb->polloutlock);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the next DCB in the list of all DCB's
|
||||
|
@ -69,6 +69,11 @@ static GWBITMASK poll_mask;
|
||||
static simple_mutex_t epoll_wait_mutex; /*< serializes calls to epoll_wait */
|
||||
#endif
|
||||
static int n_waiting = 0; /*< No. of threads in epoll_wait */
|
||||
static int process_pollq(int thread_id);
|
||||
|
||||
|
||||
DCB *eventq = NULL;
|
||||
SPINLOCK pollqlock = SPINLOCK_INIT;
|
||||
|
||||
/**
|
||||
* Thread load average, this is the average number of descriptors in each
|
||||
@ -129,6 +134,8 @@ static struct {
|
||||
int n_nothreads; /*< Number of times no threads are polling */
|
||||
int n_fds[MAXNFDS]; /*< Number of wakeups with particular
|
||||
n_fds value */
|
||||
int evq_length; /*< Event queue length */
|
||||
int evq_max; /*< Maximum event queue length */
|
||||
} pollStats;
|
||||
|
||||
/**
|
||||
@ -338,6 +345,17 @@ return_rc:
|
||||
* deschedule a process if a timeout is included, but will not do this if a 0 timeout
|
||||
* value is given. this improves performance when the gateway is under heavy load.
|
||||
*
|
||||
* In order to provide a fairer means of sharign the threads between the different
|
||||
* DCB's the poll mechanism has been decoupled from the processing of the events.
|
||||
* The events are now recieved via the epoll_wait call, a queue of DCB's that have
|
||||
* events pending is maintained and as new events arrive the DCB is added to the end
|
||||
* of this queue. If an eent arrives for a DCB alreayd in the queue, then the event
|
||||
* bits are added to the DCB but the DCB mantains the same point inthe queue unless
|
||||
* the original events are already being processed. If they are being processed then
|
||||
* the DCB is moved to the back of the queue, this means that a DCB that is receiving
|
||||
* events at a high rate will not block the execution of events for other DCB's and
|
||||
* should result in a fairer polling strategy.
|
||||
*
|
||||
* @param arg The thread ID passed as a void * to satisfy the threading package
|
||||
*/
|
||||
void
|
||||
@ -346,12 +364,7 @@ poll_waitevents(void *arg)
|
||||
struct epoll_event events[MAX_EVENTS];
|
||||
int i, nfds;
|
||||
int thread_id = (int)arg;
|
||||
bool no_op = false;
|
||||
static bool process_zombies_only = false; /*< flag for all threads */
|
||||
DCB *zombies = NULL;
|
||||
#if PROFILE_POLL
|
||||
CYCLES cycles[2];
|
||||
#endif
|
||||
|
||||
/** Add this thread to the bitmask of running polling threads */
|
||||
bitmask_set(&poll_mask, thread_id);
|
||||
@ -365,20 +378,15 @@ CYCLES cycles[2];
|
||||
|
||||
while (1)
|
||||
{
|
||||
/* Process of the queue of waiting requests */
|
||||
while (process_pollq(thread_id))
|
||||
zombies = dcb_process_zombies(thread_id, NULL);
|
||||
|
||||
atomic_add(&n_waiting, 1);
|
||||
#if BLOCKINGPOLL
|
||||
nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
|
||||
atomic_add(&n_waiting, -1);
|
||||
#else /* BLOCKINGPOLL */
|
||||
if (!no_op) {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] MaxScale thread "
|
||||
"%d > epoll_wait <",
|
||||
pthread_self(),
|
||||
thread_id)));
|
||||
no_op = TRUE;
|
||||
}
|
||||
#if MUTEX_EPOLL
|
||||
simple_mutex_lock(&epoll_wait_mutex, TRUE);
|
||||
#endif
|
||||
@ -399,32 +407,14 @@ CYCLES cycles[2];
|
||||
pthread_self(),
|
||||
nfds,
|
||||
eno)));
|
||||
no_op = FALSE;
|
||||
}
|
||||
else if (nfds == 0)
|
||||
{
|
||||
atomic_add(&n_waiting, -1);
|
||||
if (process_zombies_only) {
|
||||
#if MUTEX_EPOLL
|
||||
simple_mutex_unlock(&epoll_wait_mutex);
|
||||
#endif
|
||||
goto process_zombies;
|
||||
} else {
|
||||
nfds = epoll_wait(epoll_fd,
|
||||
atomic_add(&n_waiting, 1);
|
||||
nfds = epoll_wait(epoll_fd,
|
||||
events,
|
||||
MAX_EVENTS,
|
||||
EPOLL_TIMEOUT);
|
||||
/*<
|
||||
* When there are zombies to be cleaned up but
|
||||
* no client requests, allow all threads to call
|
||||
* dcb_process_zombies without having to wait
|
||||
* for the timeout.
|
||||
*/
|
||||
if (nfds == 0 && dcb_get_zombies() != NULL)
|
||||
{
|
||||
process_zombies_only = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -460,241 +450,60 @@ CYCLES cycles[2];
|
||||
atomic_add(&load_samples, 1);
|
||||
atomic_add(&load_nfds, nfds);
|
||||
|
||||
/*
|
||||
* Process every DCB that has a new event and add
|
||||
* it to the poll queue.
|
||||
* If the DCB is currently beign processed then we
|
||||
* or in the new eent bits to the pending event bits
|
||||
* and leave it in the queue.
|
||||
* If the DCB was not already in the queue then it was
|
||||
* idle and is added to the queue to process after
|
||||
* setting the eent bits.
|
||||
*/
|
||||
for (i = 0; i < nfds; i++)
|
||||
{
|
||||
DCB *dcb = (DCB *)events[i].data.ptr;
|
||||
__uint32_t ev = events[i].events;
|
||||
|
||||
#if PROFILE_POLL
|
||||
if (i > 0)
|
||||
spinlock_acquire(&pollqlock);
|
||||
if (DCB_POLL_BUSY(dcb))
|
||||
{
|
||||
LOGIF(LT, (skygw_log_write(
|
||||
LOGFILE_TRACE,
|
||||
"Delayed behind event that "
|
||||
"took %ld cycles",
|
||||
cycles[1] - cycles[0])));
|
||||
dcb->evq.pending_events |= ev;
|
||||
}
|
||||
cycles[0] = rdtsc();
|
||||
#endif
|
||||
|
||||
CHK_DCB(dcb);
|
||||
if (thread_data)
|
||||
else
|
||||
{
|
||||
thread_data[thread_id].cur_dcb = dcb;
|
||||
thread_data[thread_id].event = ev;
|
||||
}
|
||||
|
||||
#if defined(SS_DEBUG)
|
||||
if (dcb_fake_write_ev[dcb->fd] != 0) {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Added fake events %d to ev %d.",
|
||||
pthread_self(),
|
||||
dcb_fake_write_ev[dcb->fd],
|
||||
ev)));
|
||||
ev |= dcb_fake_write_ev[dcb->fd];
|
||||
dcb_fake_write_ev[dcb->fd] = 0;
|
||||
}
|
||||
#endif
|
||||
ss_debug(spinlock_acquire(&dcb->dcb_initlock);)
|
||||
ss_dassert(dcb->state != DCB_STATE_ALLOC);
|
||||
ss_dassert(dcb->state != DCB_STATE_DISCONNECTED);
|
||||
ss_dassert(dcb->state != DCB_STATE_FREED);
|
||||
ss_debug(spinlock_release(&dcb->dcb_initlock);)
|
||||
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] event %d dcb %p "
|
||||
"role %s",
|
||||
pthread_self(),
|
||||
ev,
|
||||
dcb,
|
||||
STRDCBROLE(dcb->dcb_role))));
|
||||
|
||||
if (ev & EPOLLOUT)
|
||||
{
|
||||
int eno = 0;
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
if (eno == 0) {
|
||||
#if MUTEX_BLOCK
|
||||
simple_mutex_lock(
|
||||
&dcb->dcb_write_lock,
|
||||
true);
|
||||
ss_info_dassert(
|
||||
!dcb->dcb_write_active,
|
||||
"Write already active");
|
||||
dcb->dcb_write_active = TRUE;
|
||||
atomic_add(
|
||||
&pollStats.n_write,
|
||||
1);
|
||||
dcb->func.write_ready(dcb);
|
||||
dcb->dcb_write_active = FALSE;
|
||||
simple_mutex_unlock(
|
||||
&dcb->dcb_write_lock);
|
||||
#else
|
||||
atomic_add(&pollStats.n_write,
|
||||
1);
|
||||
dcb_pollout(dcb, thread_id, nfds);
|
||||
#endif
|
||||
} else {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLOUT due %d, %s. "
|
||||
"dcb %p, fd %i",
|
||||
pthread_self(),
|
||||
eno,
|
||||
strerror(eno),
|
||||
dcb,
|
||||
dcb->fd)));
|
||||
}
|
||||
}
|
||||
if (ev & EPOLLIN)
|
||||
{
|
||||
#if MUTEX_BLOCK
|
||||
simple_mutex_lock(&dcb->dcb_read_lock,
|
||||
true);
|
||||
ss_info_dassert(!dcb->dcb_read_active,
|
||||
"Read already active");
|
||||
dcb->dcb_read_active = TRUE;
|
||||
#endif
|
||||
|
||||
if (dcb->state == DCB_STATE_LISTENING)
|
||||
dcb->evq.pending_events = ev;
|
||||
if (eventq)
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Accept in fd %d",
|
||||
pthread_self(),
|
||||
dcb->fd)));
|
||||
atomic_add(
|
||||
&pollStats.n_accept, 1);
|
||||
dcb->func.accept(dcb);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Read in dcb %p fd %d",
|
||||
pthread_self(),
|
||||
dcb,
|
||||
dcb->fd)));
|
||||
atomic_add(&pollStats.n_read, 1);
|
||||
#if MUTEX_BLOCK
|
||||
dcb->func.read(dcb);
|
||||
#else
|
||||
dcb_pollin(dcb, thread_id, nfds);
|
||||
#endif
|
||||
}
|
||||
#if MUTEX_BLOCK
|
||||
dcb->dcb_read_active = FALSE;
|
||||
simple_mutex_unlock(
|
||||
&dcb->dcb_read_lock);
|
||||
#endif
|
||||
}
|
||||
if (ev & EPOLLERR)
|
||||
{
|
||||
int eno = gw_getsockerrno(dcb->fd);
|
||||
#if defined(SS_DEBUG)
|
||||
if (eno == 0) {
|
||||
eno = dcb_fake_write_errno[dcb->fd];
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Added fake errno %d. "
|
||||
"%s",
|
||||
pthread_self(),
|
||||
eno,
|
||||
strerror(eno))));
|
||||
}
|
||||
dcb_fake_write_errno[dcb->fd] = 0;
|
||||
#endif
|
||||
if (eno != 0) {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLERR due %d, %s.",
|
||||
pthread_self(),
|
||||
eno,
|
||||
strerror(eno))));
|
||||
}
|
||||
atomic_add(&pollStats.n_error, 1);
|
||||
dcb->func.error(dcb);
|
||||
}
|
||||
|
||||
if (ev & EPOLLHUP)
|
||||
{
|
||||
int eno = 0;
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLHUP on dcb %p, fd %d. "
|
||||
"Errno %d, %s.",
|
||||
pthread_self(),
|
||||
dcb,
|
||||
dcb->fd,
|
||||
eno,
|
||||
strerror(eno))));
|
||||
atomic_add(&pollStats.n_hup, 1);
|
||||
spinlock_acquire(&dcb->dcb_initlock);
|
||||
if ((dcb->flags & DCBF_HUNG) == 0)
|
||||
{
|
||||
dcb->flags |= DCBF_HUNG;
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
dcb->func.hangup(dcb);
|
||||
dcb->evq.prev = eventq->evq.prev;
|
||||
eventq->evq.prev->evq.next = dcb;
|
||||
eventq->evq.prev = dcb;
|
||||
dcb->evq.next = eventq;
|
||||
}
|
||||
else
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
}
|
||||
|
||||
if (ev & EPOLLRDHUP)
|
||||
{
|
||||
int eno = 0;
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLRDHUP on dcb %p, fd %d. "
|
||||
"Errno %d, %s.",
|
||||
pthread_self(),
|
||||
dcb,
|
||||
dcb->fd,
|
||||
eno,
|
||||
strerror(eno))));
|
||||
atomic_add(&pollStats.n_hup, 1);
|
||||
spinlock_acquire(&dcb->dcb_initlock);
|
||||
if ((dcb->flags & DCBF_HUNG) == 0)
|
||||
{
|
||||
dcb->flags |= DCBF_HUNG;
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
dcb->func.hangup(dcb);
|
||||
eventq = dcb;
|
||||
dcb->evq.prev = dcb;
|
||||
dcb->evq.next = dcb;
|
||||
}
|
||||
pollStats.evq_length++;
|
||||
if (pollStats.evq_length > pollStats.evq_max)
|
||||
{
|
||||
pollStats.evq_max = pollStats.evq_length;
|
||||
}
|
||||
else
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
}
|
||||
#if PROFILE_POLL
|
||||
cycles[1] = rdtsc();
|
||||
#endif
|
||||
} /*< for */
|
||||
no_op = FALSE;
|
||||
spinlock_release(&pollqlock);
|
||||
}
|
||||
|
||||
/*< for */
|
||||
}
|
||||
process_zombies:
|
||||
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].state = THREAD_ZPROCESSING;
|
||||
}
|
||||
zombies = dcb_process_zombies(thread_id, NULL);
|
||||
|
||||
if (zombies == NULL) {
|
||||
process_zombies_only = false;
|
||||
}
|
||||
|
||||
if (do_shutdown)
|
||||
{
|
||||
/*<
|
||||
@ -717,6 +526,316 @@ process_zombies:
|
||||
mysql_thread_end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Process of the queue of DCB's that have outstanding events
|
||||
*
|
||||
* The first event on the queue will be chosen to be executed by this thread,
|
||||
* all other events will be left on the queue and may be picked up by other
|
||||
* threads. When the processing is complete the thread will take the DCB off the
|
||||
* queue if there are no pending events that have arrived since the thread started
|
||||
* to process the DCB. If there are pending events the DCB will be moved to the
|
||||
* back of the queue so that other DCB's will have a share of the threads to
|
||||
* execute events for them.
|
||||
*
|
||||
* @param thread_id The thread ID of the calling thread
|
||||
* @return 0 if no DCB's have been processed
|
||||
*/
|
||||
static int
|
||||
process_pollq(int thread_id)
|
||||
{
|
||||
DCB *dcb;
|
||||
int found = 0;
|
||||
uint32_t ev;
|
||||
|
||||
spinlock_acquire(&pollqlock);
|
||||
if (eventq == NULL)
|
||||
{
|
||||
/* Nothing to process */
|
||||
spinlock_release(&pollqlock);
|
||||
return 0;
|
||||
}
|
||||
dcb = eventq;
|
||||
if (dcb->evq.next == dcb->evq.prev && dcb->evq.processing == 0)
|
||||
{
|
||||
found = 1;
|
||||
dcb->evq.processing = 1;
|
||||
}
|
||||
else if (dcb->evq.next == dcb->evq.prev)
|
||||
{
|
||||
/* Only item in queue is being processed */
|
||||
spinlock_release(&pollqlock);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
do {
|
||||
dcb = dcb->evq.next;
|
||||
} while (dcb != eventq && dcb->evq.processing == 1);
|
||||
|
||||
if (dcb->evq.processing == 0)
|
||||
{
|
||||
/* Found DCB to process */
|
||||
dcb->evq.processing = 1;
|
||||
found = 1;
|
||||
}
|
||||
}
|
||||
if (found)
|
||||
{
|
||||
ev = dcb->evq.pending_events;
|
||||
dcb->evq.pending_events = 0;
|
||||
}
|
||||
spinlock_release(&pollqlock);
|
||||
|
||||
if (found == 0)
|
||||
return 0;
|
||||
|
||||
|
||||
CHK_DCB(dcb);
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].state = THREAD_PROCESSING;
|
||||
thread_data[thread_id].cur_dcb = dcb;
|
||||
thread_data[thread_id].event = ev;
|
||||
}
|
||||
|
||||
#if defined(SS_DEBUG)
|
||||
if (dcb_fake_write_ev[dcb->fd] != 0) {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Added fake events %d to ev %d.",
|
||||
pthread_self(),
|
||||
dcb_fake_write_ev[dcb->fd],
|
||||
ev)));
|
||||
ev |= dcb_fake_write_ev[dcb->fd];
|
||||
dcb_fake_write_ev[dcb->fd] = 0;
|
||||
}
|
||||
#endif
|
||||
ss_debug(spinlock_acquire(&dcb->dcb_initlock);)
|
||||
ss_dassert(dcb->state != DCB_STATE_ALLOC);
|
||||
ss_dassert(dcb->state != DCB_STATE_DISCONNECTED);
|
||||
ss_dassert(dcb->state != DCB_STATE_FREED);
|
||||
ss_debug(spinlock_release(&dcb->dcb_initlock);)
|
||||
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] event %d dcb %p "
|
||||
"role %s",
|
||||
pthread_self(),
|
||||
ev,
|
||||
dcb,
|
||||
STRDCBROLE(dcb->dcb_role))));
|
||||
|
||||
if (ev & EPOLLOUT)
|
||||
{
|
||||
int eno = 0;
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
if (eno == 0) {
|
||||
#if MUTEX_BLOCK
|
||||
simple_mutex_lock(
|
||||
&dcb->dcb_write_lock,
|
||||
true);
|
||||
ss_info_dassert(
|
||||
!dcb->dcb_write_active,
|
||||
"Write already active");
|
||||
dcb->dcb_write_active = TRUE;
|
||||
atomic_add(
|
||||
&pollStats.n_write,
|
||||
1);
|
||||
dcb->func.write_ready(dcb);
|
||||
dcb->dcb_write_active = FALSE;
|
||||
simple_mutex_unlock(
|
||||
&dcb->dcb_write_lock);
|
||||
#else
|
||||
atomic_add(&pollStats.n_write,
|
||||
1);
|
||||
dcb->func.write_ready(dcb);
|
||||
#endif
|
||||
} else {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLOUT due %d, %s. "
|
||||
"dcb %p, fd %i",
|
||||
pthread_self(),
|
||||
eno,
|
||||
strerror(eno),
|
||||
dcb,
|
||||
dcb->fd)));
|
||||
}
|
||||
}
|
||||
if (ev & EPOLLIN)
|
||||
{
|
||||
#if MUTEX_BLOCK
|
||||
simple_mutex_lock(&dcb->dcb_read_lock,
|
||||
true);
|
||||
ss_info_dassert(!dcb->dcb_read_active,
|
||||
"Read already active");
|
||||
dcb->dcb_read_active = TRUE;
|
||||
#endif
|
||||
|
||||
if (dcb->state == DCB_STATE_LISTENING)
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Accept in fd %d",
|
||||
pthread_self(),
|
||||
dcb->fd)));
|
||||
atomic_add(
|
||||
&pollStats.n_accept, 1);
|
||||
dcb->func.accept(dcb);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Read in dcb %p fd %d",
|
||||
pthread_self(),
|
||||
dcb,
|
||||
dcb->fd)));
|
||||
atomic_add(&pollStats.n_read, 1);
|
||||
dcb->func.read(dcb);
|
||||
}
|
||||
#if MUTEX_BLOCK
|
||||
dcb->dcb_read_active = FALSE;
|
||||
simple_mutex_unlock(
|
||||
&dcb->dcb_read_lock);
|
||||
#endif
|
||||
}
|
||||
if (ev & EPOLLERR)
|
||||
{
|
||||
int eno = gw_getsockerrno(dcb->fd);
|
||||
#if defined(SS_DEBUG)
|
||||
if (eno == 0) {
|
||||
eno = dcb_fake_write_errno[dcb->fd];
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"Added fake errno %d. "
|
||||
"%s",
|
||||
pthread_self(),
|
||||
eno,
|
||||
strerror(eno))));
|
||||
}
|
||||
dcb_fake_write_errno[dcb->fd] = 0;
|
||||
#endif
|
||||
if (eno != 0) {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLERR due %d, %s.",
|
||||
pthread_self(),
|
||||
eno,
|
||||
strerror(eno))));
|
||||
}
|
||||
atomic_add(&pollStats.n_error, 1);
|
||||
dcb->func.error(dcb);
|
||||
}
|
||||
|
||||
if (ev & EPOLLHUP)
|
||||
{
|
||||
int eno = 0;
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLHUP on dcb %p, fd %d. "
|
||||
"Errno %d, %s.",
|
||||
pthread_self(),
|
||||
dcb,
|
||||
dcb->fd,
|
||||
eno,
|
||||
strerror(eno))));
|
||||
atomic_add(&pollStats.n_hup, 1);
|
||||
spinlock_acquire(&dcb->dcb_initlock);
|
||||
if ((dcb->flags & DCBF_HUNG) == 0)
|
||||
{
|
||||
dcb->flags |= DCBF_HUNG;
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
dcb->func.hangup(dcb);
|
||||
}
|
||||
else
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
}
|
||||
|
||||
if (ev & EPOLLRDHUP)
|
||||
{
|
||||
int eno = 0;
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLRDHUP on dcb %p, fd %d. "
|
||||
"Errno %d, %s.",
|
||||
pthread_self(),
|
||||
dcb,
|
||||
dcb->fd,
|
||||
eno,
|
||||
strerror(eno))));
|
||||
atomic_add(&pollStats.n_hup, 1);
|
||||
spinlock_acquire(&dcb->dcb_initlock);
|
||||
if ((dcb->flags & DCBF_HUNG) == 0)
|
||||
{
|
||||
dcb->flags |= DCBF_HUNG;
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
dcb->func.hangup(dcb);
|
||||
}
|
||||
else
|
||||
spinlock_release(&dcb->dcb_initlock);
|
||||
}
|
||||
|
||||
spinlock_acquire(&pollqlock);
|
||||
if (dcb->evq.pending_events == 0)
|
||||
{
|
||||
/* No pending events so remove from the queue */
|
||||
if (dcb->evq.prev != dcb)
|
||||
{
|
||||
dcb->evq.prev->evq.next = dcb->evq.next;
|
||||
dcb->evq.next->evq.prev = dcb->evq.prev;
|
||||
if (eventq == dcb)
|
||||
eventq = dcb->evq.next;
|
||||
}
|
||||
else
|
||||
{
|
||||
eventq = NULL;
|
||||
}
|
||||
dcb->evq.next = NULL;
|
||||
dcb->evq.prev = NULL;
|
||||
pollStats.evq_length--;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We have a pending event, move to the end of the queue
|
||||
* if there are any other DCB's in the queue.
|
||||
*
|
||||
* If we are the first item on the queue this is easy, we
|
||||
* just bump the eventq pointer.
|
||||
*/
|
||||
if (dcb->evq.prev != dcb)
|
||||
{
|
||||
if (eventq == dcb)
|
||||
eventq = dcb->evq.next;
|
||||
else
|
||||
{
|
||||
dcb->evq.prev->evq.next = dcb->evq.next;
|
||||
dcb->evq.next->evq.prev = dcb->evq.prev;
|
||||
dcb->evq.prev = eventq->evq.prev;
|
||||
dcb->evq.next = eventq;
|
||||
eventq->evq.prev = dcb;
|
||||
dcb->evq.prev->evq.next = dcb;
|
||||
}
|
||||
}
|
||||
}
|
||||
dcb->evq.processing = 0;
|
||||
spinlock_release(&pollqlock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the polling loop
|
||||
*/
|
||||
@ -761,6 +880,10 @@ int i;
|
||||
pollStats.n_accept);
|
||||
dcb_printf(dcb, "Number of times no threads polling: %d\n",
|
||||
pollStats.n_nothreads);
|
||||
dcb_printf(dcb, "Current event queue length: %d\n",
|
||||
pollStats.evq_length);
|
||||
dcb_printf(dcb, "Maximum event queue length: %d\n",
|
||||
pollStats.evq_max);
|
||||
|
||||
dcb_printf(dcb, "No of poll completions with descriptors\n");
|
||||
dcb_printf(dcb, "\tNo. of descriptors\tNo. of poll completions.\n");
|
||||
|
@ -53,7 +53,8 @@ struct service;
|
||||
* 07/02/2014 Massimiliano Pinto Added ipv4 data struct into for dcb
|
||||
* 07/05/2014 Mark Riddoch Addition of callback mechanism
|
||||
* 08/05/2014 Mark Riddoch Addition of writeq high and low watermarks
|
||||
* 27/08/2014 Mark Ridddoch Addition of write event queuing
|
||||
* 27/08/2014 Mark Riddoch Addition of write event queuing
|
||||
* 23/09/2014 Mark Riddoch New poll processing queue
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
@ -97,6 +98,14 @@ typedef struct gw_protocol {
|
||||
int (*session)(struct dcb *, void *);
|
||||
} GWPROTOCOL;
|
||||
|
||||
typedef struct {
|
||||
struct dcb *next;
|
||||
struct dcb *prev;
|
||||
uint32_t pending_events;
|
||||
int processing;
|
||||
SPINLOCK eventqlock;
|
||||
} DCBEVENTQ;
|
||||
|
||||
/**
|
||||
* The GWPROTOCOL version data. The following should be updated whenever
|
||||
* the GWPROTOCOL structure is changed. See the rules defined in modinfo.h
|
||||
@ -114,10 +123,6 @@ typedef struct dcbstats {
|
||||
int n_buffered; /*< Number of buffered writes */
|
||||
int n_high_water; /*< Number of crosses of high water mark */
|
||||
int n_low_water; /*< Number of crosses of low water mark */
|
||||
int n_busypolls; /*< Number of read polls whiel reading */
|
||||
int n_readrechecks; /*< Number of rechecks for reads */
|
||||
int n_busywrpolls; /*< Number of write polls while writing */
|
||||
int n_writerechecks;/*< Number of rechecks for writes */
|
||||
} DCBSTATS;
|
||||
|
||||
/**
|
||||
@ -204,6 +209,7 @@ typedef struct dcb {
|
||||
#endif
|
||||
dcb_role_t dcb_role;
|
||||
SPINLOCK dcb_initlock;
|
||||
DCBEVENTQ evq; /**< The event queue for this DCB */
|
||||
#if 1
|
||||
simple_mutex_t dcb_read_lock;
|
||||
simple_mutex_t dcb_write_lock;
|
||||
@ -271,8 +277,8 @@ int fail_accept_errno;
|
||||
#define DCB_BELOW_LOW_WATER(x) ((x)->low_water && (x)->writeqlen < (x)->low_water)
|
||||
#define DCB_ABOVE_HIGH_WATER(x) ((x)->high_water && (x)->writeqlen > (x)->high_water)
|
||||
|
||||
void dcb_pollin(DCB *, int, int);
|
||||
void dcb_pollout(DCB *, int, int);
|
||||
#define DCB_POLL_BUSY(x) ((x)->evq.next != NULL)
|
||||
|
||||
DCB *dcb_get_zombies(void);
|
||||
int gw_write(
|
||||
#if defined(SS_DEBUG)
|
||||
|
Loading…
x
Reference in New Issue
Block a user