MXS-1674 Add worker load calculation

By definition, the load is calculated using the following formula: L = 100 * ((T - t) / T) where T is a time period and t the time of that period that the worker spends in epoll_wait(). So, if there is so much work that epoll_wait() always returns immediately, then the load is 100 and if the thread spends the entire period in epoll_wait(), then the load is 0. The basic idea is that the timeout given to epoll_wait() is adjusted so that epoll_wait() will always return roughly at 10 seconds interval. By making a note of when we are about to enter epoll_wait() and when we return from it, we have all the information we need for calculating the load. Due to the nature of things, we will not be able to calculate the load at exact 10-second boundaries, but it will be pretty close. And the load is always calculated using the true length of the period. We will then calculate 1 minute load by averaging the load value for 6 consecutive 10-second periods and the 1 hour load by averaging the load value of 60 consecutive 1 minute loads. So, while the 10-second load represents the load of the most recently measured 10-second period (and not the load of the most recent 10 seconds), the 1 minute load and the 1 hour load represents the load of the most recent minute and hour respectively.
2018-02-20 08:41:53 +02:00
parent 6c6baebc65
commit fd4fd4eead
3 changed files with 441 additions and 5 deletions
--- a/server/core/worker.cc
+++ b/server/core/worker.cc
@ -41,6 +41,7 @@
 #define WORKER_ABSENT_ID -1

 using maxscale::Worker;
+using maxscale::WorkerLoad;
 using maxscale::Closer;
 using maxscale::Semaphore;
 using std::vector;
@ -154,6 +155,49 @@ void poll_resolve_error(int fd, int errornum, int op)
 static bool modules_thread_init();
 static void modules_thread_finish();

+WorkerLoad::WorkerLoad()
+    : m_start_time(0)
+    , m_wait_start(0)
+    , m_wait_time(0)
+    , m_load_1_minute(&m_load_1_hour)
+    , m_load_10_seconds(&m_load_1_minute)
+{
+}
+
+void WorkerLoad::about_to_work(uint64_t now)
+{
+    uint64_t duration = now - m_start_time;
+
+    m_wait_time += (now - m_wait_start);
+
+    if (duration > TEN_SECONDS)
+    {
+        int load_percentage = 100 * ((duration - m_wait_time) / (double)duration);
+
+        m_start_time = now;
+        m_wait_time = 0;
+
+        m_load_10_seconds.add_value(load_percentage);
+    }
+}
+
+WorkerLoad::Average::~Average()
+{
+}
+
+//static
+uint64_t WorkerLoad::get_time()
+{
+    uint64_t now;
+
+    timespec t;
+
+    ss_debug(int rv=)clock_gettime(CLOCK_MONOTONIC, &t);
+    ss_dassert(rv == 0);
+
+    return t.tv_sec * 1000 + (t.tv_nsec / 1000000);
+}
+
 Worker::Worker(int id,
               int epoll_fd)
    : m_id(id)
@ -1112,13 +1156,31 @@ void Worker::poll_waitevents()

    m_state = IDLE;

+    m_load.reset();
+
    while (!should_shutdown())
    {
+        int nfds;
+
        m_state = POLLING;

        atomic_add_int64(&m_statistics.n_polls, 1);
-        int nfds;
-        if ((nfds = epoll_wait(m_epoll_fd, events, MAX_EVENTS, -1)) == -1)
+
+        uint64_t now = Load::get_time();
+        int timeout = Load::GRANULARITY - (now - m_load.start_time());
+
+        if (timeout < 0)
+        {
+            // If the processing of the last batch of events took us past the next
+            // time boundary, we ensure we return immediately.
+            timeout = 0;
+        }
+
+        m_load.about_to_wait(now);
+        nfds = epoll_wait(m_epoll_fd, events, MAX_EVENTS, timeout);
+        m_load.about_to_work();
+
+        if (nfds == -1)
        {
            int eno = errno;
            errno = 0;