Move fake events to a thread-specific queue

The fake poll events are now stored in thread specific queues. This
removes the need for the poll event queue.
This commit is contained in:
Markus Makela
2016-10-25 13:19:25 +03:00
parent b79210c760
commit 8efdaa1ea6
7 changed files with 92 additions and 199 deletions

View File

@ -81,8 +81,19 @@ int max_poll_sleep;
*/
#define MUTEX_EPOLL 0
/** Fake epoll event struct */
typedef struct fake_event
{
DCB *dcb; /*< The DCB where this event was generated */
GWBUF *data; /*< Fake data, placed in the DCB's read queue */
uint32_t event; /*< The EPOLL event type */
struct fake_event *tail; /*< The last event */
struct fake_event *next; /*< The next event */
} fake_event_t;
static int *epoll_fd; /*< The epoll file descriptor */
static int next_epoll_fd = 0; /*< Which thread handles the next DCB */
static fake_event_t **fake_events; /*< Thread-specific fake event queue */
static int do_shutdown = 0; /*< Flag the shutdown of the poll subsystem */
static GWBITMASK poll_mask;
#if MUTEX_EPOLL
@ -91,7 +102,7 @@ static simple_mutex_t epoll_wait_mutex; /*< serializes calls to epoll_wait */
static int n_waiting = 0; /*< No. of threads in epoll_wait */
static int process_pollq(int thread_id, struct epoll_event *event);
static void poll_add_event_to_dcb(DCB* dcb, GWBUF* buf, __uint32_t ev);
static void poll_add_event_to_dcb(DCB* dcb, GWBUF* buf, uint32_t ev);
static bool poll_dcb_session_check(DCB *dcb, const char *);
DCB *eventq = NULL;
@ -224,6 +235,11 @@ poll_init()
}
}
if ((fake_events = MXS_CALLOC(sizeof(fake_event_t*), n_threads)) == NULL)
{
exit(-1);
}
memset(&pollStats, 0, sizeof(pollStats));
memset(&queueStats, 0, sizeof(queueStats));
bitmask_init(&poll_mask);
@ -339,7 +355,7 @@ poll_add_dcb(DCB *dcb)
STRDCBSTATE(dcb->state));
}
dcb->state = new_state;
spinlock_release(&dcb->dcb_initlock);
/*
* The only possible failure that will not cause a crash is
* running out of system resources.
@ -356,6 +372,7 @@ poll_add_dcb(DCB *dcb)
}
dcb->owner = owner;
spinlock_release(&dcb->dcb_initlock);
rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev);
if (rc)
@ -702,6 +719,19 @@ poll_waitevents(void *arg)
process_pollq(thread_id, &events[i]);
}
/** Process fake events */
while (fake_events[thread_id])
{
fake_event_t *event = fake_events[thread_id];
fake_events[thread_id] = fake_events[thread_id]->next;
struct epoll_event ev;
event->dcb->dcb_fakequeue = event->data;
ev.data.ptr = event->dcb;
ev.events = event->event;
process_pollq(thread_id, &ev);
}
if (check_timeouts && hkheartbeat >= next_timeout_check)
{
process_idle_sessions();
@ -795,7 +825,6 @@ process_pollq(int thread_id, struct epoll_event *event)
unsigned long qtime;
DCB *dcb = event->data.ptr;
atomic_add(&pollStats.evq_pending, -1);
#if PROFILE_POLL
memlog_log(plog, hkheartbeat - dcb->evq.inserted);
@ -1132,8 +1161,6 @@ dprintPollStats(DCB *dcb)
pollStats.evq_length);
dcb_printf(dcb, "Maximum event queue length: %" PRId32 "\n",
pollStats.evq_max);
dcb_printf(dcb, "No. of DCBs with pending events: %" PRId32 "\n",
pollStats.evq_pending);
dcb_printf(dcb, "No. of wakeups with pending queue: %" PRId32 "\n",
pollStats.wake_evqpending);
@ -1366,7 +1393,6 @@ poll_loadav(void *data)
current_avg = 0.0;
}
avg_samples[next_sample] = current_avg;
evqp_samples[next_sample] = pollStats.evq_pending;
next_sample++;
if (next_sample >= n_avg_samples)
{
@ -1396,50 +1422,30 @@ void poll_add_epollin_event_to_dcb(DCB* dcb,
static void poll_add_event_to_dcb(DCB* dcb,
GWBUF* buf,
__uint32_t ev)
uint32_t ev)
{
/** Add buf to readqueue */
spinlock_acquire(&dcb->authlock);
dcb->dcb_readqueue = gwbuf_append(dcb->dcb_readqueue, buf);
spinlock_release(&dcb->authlock);
fake_event_t *event = MXS_MALLOC(sizeof(*event));
spinlock_acquire(&pollqlock);
if (event)
{
event->data = buf;
event->dcb = dcb;
event->event = ev;
event->next = NULL;
event->tail = event;
/** Set event to DCB */
if (DCB_POLL_BUSY(dcb))
{
if (dcb->evq.pending_events == 0)
int thr = dcb->owner;
if (fake_events[thr])
{
pollStats.evq_pending++;
}
dcb->evq.pending_events |= ev;
}
else
{
dcb->evq.pending_events = ev;
/** Add DCB to eventqueue if it isn't already there */
if (eventq)
{
dcb->evq.prev = eventq->evq.prev;
eventq->evq.prev->evq.next = dcb;
eventq->evq.prev = dcb;
dcb->evq.next = eventq;
fake_events[thr]->tail->next = event;
fake_events[thr]->tail = event;
}
else
{
eventq = dcb;
dcb->evq.prev = dcb;
dcb->evq.next = dcb;
}
pollStats.evq_length++;
pollStats.evq_pending++;
if (pollStats.evq_length > pollStats.evq_max)
{
pollStats.evq_max = pollStats.evq_length;
fake_events[thr] = event;
}
}
spinlock_release(&pollqlock);
}
/*
@ -1458,7 +1464,7 @@ static void poll_add_event_to_dcb(DCB* dcb,
void
poll_fake_write_event(DCB *dcb)
{
poll_fake_event(dcb, EPOLLOUT);
poll_add_event_to_dcb(dcb, NULL, EPOLLOUT);
}
/*
@ -1477,79 +1483,7 @@ poll_fake_write_event(DCB *dcb)
void
poll_fake_read_event(DCB *dcb)
{
poll_fake_event(dcb, EPOLLIN);
}
/*
* Insert a fake completion event for a DCB into the polling queue.
*
* This is used to trigger transmission activity on another DCB from
* within the event processing routine of a DCB. or to allow a DCB
* to defer some further output processing, to allow for other DCBs
* to receive a slice of the processing time. Fake events are added
* to the tail of the event queue, in the same way that real events
* are, so maintain the "fairness" of processing.
*
* @param dcb DCB to emulate an event for
* @param ev Event to emulate
*/
void
poll_fake_event(DCB *dcb, enum EPOLL_EVENTS ev)
{
spinlock_acquire(&pollqlock);
/*
* If the DCB is already on the queue, there are no pending events and
* there are other events on the queue, then
* take it off the queue. This stops the DCB hogging the threads.
*/
if (DCB_POLL_BUSY(dcb) && dcb->evq.pending_events == 0 && dcb->evq.prev != dcb)
{
dcb->evq.prev->evq.next = dcb->evq.next;
dcb->evq.next->evq.prev = dcb->evq.prev;
if (eventq == dcb)
{
eventq = dcb->evq.next;
}
dcb->evq.next = NULL;
dcb->evq.prev = NULL;
pollStats.evq_length--;
}
if (DCB_POLL_BUSY(dcb))
{
if (dcb->evq.pending_events == 0)
{
pollStats.evq_pending++;
}
dcb->evq.pending_events |= ev;
}
else
{
dcb->evq.pending_events = ev;
dcb->evq.inserted = hkheartbeat;
if (eventq)
{
dcb->evq.prev = eventq->evq.prev;
eventq->evq.prev->evq.next = dcb;
eventq->evq.prev = dcb;
dcb->evq.next = eventq;
}
else
{
eventq = dcb;
dcb->evq.prev = dcb;
dcb->evq.next = dcb;
}
pollStats.evq_length++;
pollStats.evq_pending++;
dcb->evq.inserted = hkheartbeat;
if (pollStats.evq_length > pollStats.evq_max)
{
pollStats.evq_max = pollStats.evq_length;
}
}
spinlock_release(&pollqlock);
poll_add_event_to_dcb(dcb, NULL, EPOLLIN);
}
/*
@ -1567,42 +1501,7 @@ poll_fake_hangup_event(DCB *dcb)
#else
uint32_t ev = EPOLLHUP;
#endif
spinlock_acquire(&pollqlock);
if (DCB_POLL_BUSY(dcb))
{
if (dcb->evq.pending_events == 0)
{
pollStats.evq_pending++;
}
dcb->evq.pending_events |= ev;
}
else
{
dcb->evq.pending_events = ev;
dcb->evq.inserted = hkheartbeat;
if (eventq)
{
dcb->evq.prev = eventq->evq.prev;
eventq->evq.prev->evq.next = dcb;
eventq->evq.prev = dcb;
dcb->evq.next = eventq;
}
else
{
eventq = dcb;
dcb->evq.prev = dcb;
dcb->evq.next = dcb;
}
pollStats.evq_length++;
pollStats.evq_pending++;
dcb->evq.inserted = hkheartbeat;
if (pollStats.evq_length > pollStats.evq_max)
{
pollStats.evq_max = pollStats.evq_length;
}
}
spinlock_release(&pollqlock);
poll_add_event_to_dcb(dcb, NULL, ev);
}
/**
@ -1696,14 +1595,14 @@ poll_get_stat(POLL_STAT stat)
return ts_stats_sum(pollStats.n_accept);
case POLL_STAT_EVQ_LEN:
return pollStats.evq_length;
case POLL_STAT_EVQ_PENDING:
return pollStats.evq_pending;
case POLL_STAT_EVQ_MAX:
return pollStats.evq_max;
case POLL_STAT_MAX_QTIME:
return (int)queueStats.maxqtime;
case POLL_STAT_MAX_EXECTIME:
return (int)queueStats.maxexectime;
default:
break;
}
return 0;
}