From 1701979696bae88c1560a526fb705e0ed4a6521c Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 09:42:08 +0200
Subject: [PATCH 01/42] Cache: Add LRU storage

The LRUStorage hierarchy implements the decorator pattern and
is capable of providing LRU behaviour to any underlying storage.
There are two concrete classes - LRUStorageST and LRUStorageMT -
that can be used in single thread and multiple thread contentx,
respectively.

Also tests the convention of placing _ as suffix on member
variables.
---
 server/modules/filter/cache/CMakeLists.txt  |  17 +-
 server/modules/filter/cache/lrustorage.cc   | 314 ++++++++++++++++++++
 server/modules/filter/cache/lrustorage.h    | 177 +++++++++++
 server/modules/filter/cache/lrustoragemt.cc |  63 ++++
 server/modules/filter/cache/lrustoragemt.h  |  43 +++
 server/modules/filter/cache/lrustoragest.cc |  50 ++++
 server/modules/filter/cache/lrustoragest.h  |  39 +++
 7 files changed, 702 insertions(+), 1 deletion(-)
 create mode 100644 server/modules/filter/cache/lrustorage.cc
 create mode 100644 server/modules/filter/cache/lrustorage.h
 create mode 100644 server/modules/filter/cache/lrustoragemt.cc
 create mode 100644 server/modules/filter/cache/lrustoragemt.h
 create mode 100644 server/modules/filter/cache/lrustoragest.cc
 create mode 100644 server/modules/filter/cache/lrustoragest.h

diff --git a/server/modules/filter/cache/CMakeLists.txt b/server/modules/filter/cache/CMakeLists.txt
index 94ba9e012..b085753ef 100644
--- a/server/modules/filter/cache/CMakeLists.txt
+++ b/server/modules/filter/cache/CMakeLists.txt
@@ -1,5 +1,20 @@
 if (JANSSON_FOUND)
-  add_library(cache SHARED cache.cc cachefilter.cc cachemt.cc cachept.cc cachesimple.cc cachest.cc rules.cc sessioncache.cc storage.cc storagefactory.cc storagereal.cc)
+  add_library(cache SHARED
+    cache.cc
+    cachefilter.cc
+    cachemt.cc
+    cachept.cc
+    cachesimple.cc
+    cachest.cc
+    lrustorage.cc
+    lrustoragemt.cc
+    lrustoragest.cc
+    rules.cc
+    sessioncache.cc
+    storage.cc
+    storagefactory.cc
+    storagereal.cc
+    )
   target_link_libraries(cache maxscale-common jansson)
   set_target_properties(cache PROPERTIES VERSION "1.0.0")
   set_target_properties(cache PROPERTIES LINK_FLAGS -Wl,-z,defs)
diff --git a/server/modules/filter/cache/lrustorage.cc b/server/modules/filter/cache/lrustorage.cc
new file mode 100644
index 000000000..72e1de16f
--- /dev/null
+++ b/server/modules/filter/cache/lrustorage.cc
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include "lrustorage.h"
+
+
+LRUStorage::LRUStorage(Storage* pstorage, size_t max_count, size_t max_size)
+    : pstorage_(pstorage)
+    , max_count_(max_count)
+    , max_size_(max_size)
+    , count_(0)
+    , size_(0)
+    , phead_(NULL)
+    , ptail_(NULL)
+{
+}
+
+LRUStorage::~LRUStorage()
+{
+}
+
+cache_result_t LRUStorage::get_key(const char* zdefault_db,
+                                   const GWBUF* pquery,
+                                   CACHE_KEY* pkey)
+{
+    return pstorage_->get_key(zdefault_db, pquery, pkey);
+}
+
+cache_result_t LRUStorage::do_get_value(const CACHE_KEY& key,
+                                        uint32_t flags,
+                                        GWBUF** ppvalue)
+{
+    NodesPerKey::iterator i = nodes_per_key_.find(key);
+    bool existed = (i != nodes_per_key_.end());
+
+    cache_result_t result = pstorage_->get_value(key, flags, ppvalue);
+
+    if (result == CACHE_RESULT_OK)
+    {
+        if (existed)
+        {
+            if (ptail_ == i->second)
+            {
+                ptail_ = i->second->next();
+            }
+
+            phead_ = i->second->prepend(phead_);
+        }
+        else
+        {
+            MXS_ERROR("Item found in storage, but not in key mapping.");
+        }
+    }
+
+    return result;
+}
+
+cache_result_t LRUStorage::do_put_value(const CACHE_KEY& key,
+                                        const GWBUF* pvalue)
+{
+    cache_result_t result = CACHE_RESULT_ERROR;
+
+    size_t value_size = GWBUF_LENGTH(pvalue);
+    size_t new_size = size_ + value_size;
+
+    Node* pnode = NULL;
+
+    NodesPerKey::iterator i = nodes_per_key_.find(key);
+    bool existed = (i != nodes_per_key_.end());
+
+    if (existed)
+    {
+        // TODO: Also in this case max_size_ needs to be honoured.
+        pnode = i->second;
+    }
+    else
+    {
+        if ((new_size > max_size_) || (count_ == max_count_))
+        {
+            if (new_size > max_size_)
+            {
+                MXS_NOTICE("New size %lu > max size %lu. Removing least recently used.",
+                           new_size, max_size_);
+
+                pnode = free_lru(value_size);
+            }
+            else
+            {
+                ss_dassert(count_ == max_count_);
+                MXS_NOTICE("Max count %lu reached, removing least recently used.", max_count_);
+                pnode = free_lru();
+            }
+        }
+        else
+        {
+            pnode = new (std::nothrow) Node;
+        }
+
+        if (pnode)
+        {
+            try
+            {
+                std::pair<NodesPerKey::iterator, bool>
+                    rv = nodes_per_key_.insert(std::make_pair(key, pnode));
+                ss_dassert(rv.second);
+
+                i = rv.first;
+            }
+            catch (const std::exception& x)
+            {
+                delete pnode;
+                pnode = NULL;
+                result = CACHE_RESULT_OUT_OF_RESOURCES;
+            }
+        }
+    }
+
+    if (pnode)
+    {
+        result = pstorage_->put_value(key, pvalue);
+
+        if (result == CACHE_RESULT_OK)
+        {
+            if (existed)
+            {
+                size_ -= pnode->size();
+            }
+            else
+            {
+                ++count_;
+            }
+
+            pnode->reset(&i->first, value_size);
+            size_ += pnode->size();
+
+            if (ptail_ == pnode)
+            {
+                ptail_ = pnode->prev();
+            }
+
+            phead_ = pnode->prepend(phead_);
+
+            if (!ptail_)
+            {
+                ptail_ = phead_;
+            }
+        }
+        else if (!existed)
+        {
+            MXS_ERROR("Could not put a value to the storage.");
+            nodes_per_key_.erase(i);
+            delete pnode;
+        }
+    }
+
+    return result;
+}
+
+cache_result_t LRUStorage::do_del_value(const CACHE_KEY& key)
+{
+    NodesPerKey::iterator i = nodes_per_key_.find(key);
+
+    cache_result_t result = pstorage_->del_value(key);
+
+    if (result == CACHE_RESULT_OK)
+    {
+        if (i == nodes_per_key_.end())
+        {
+            Node* pnode = i->second;
+
+            ss_dassert(size_ > pnode->size());
+            ss_dassert(count_ > 0);
+
+            size_ -= pnode->size();
+            --count_;
+
+            phead_ = pnode->remove();
+            delete pnode;
+
+            if (!phead_)
+            {
+                ptail_ = NULL;
+            }
+
+            nodes_per_key_.erase(i);
+        }
+        else
+        {
+            MXS_ERROR("Key was found from storage, but not from LRU register.");
+        }
+    }
+
+    return result;
+}
+
+/**
+ * Free the data associated with the least recently used node.
+ *
+ * @return The node itself, for reuse.
+ */
+LRUStorage::Node* LRUStorage::free_lru()
+{
+    ss_dassert(ptail_);
+
+    Node* pnode = NULL;
+
+    if (free_node_data(ptail_))
+    {
+        pnode = ptail_;
+    }
+
+    return pnode;
+}
+
+/**
+ * Free the data associated with sufficient number of least recently used nodes,
+ * to make the required space available.
+ *
+ * @return The last node whose data was freed, for reuse.
+ */
+LRUStorage::Node* LRUStorage::free_lru(size_t needed_space)
+{
+    Node* pnode = NULL;
+
+    size_t freed_space = 0;
+    bool error = false;
+
+    while (!error && ptail_ && (freed_space < needed_space))
+    {
+        size_t size = ptail_->size();
+
+        if (free_node_data(ptail_))
+        {
+            freed_space += size;
+
+            pnode = ptail_;
+            ptail_ = ptail_->remove();
+
+            if (freed_space < needed_space)
+            {
+                delete pnode;
+                pnode = NULL;
+            }
+        }
+        else
+        {
+            error = true;
+        }
+    }
+
+    if (pnode)
+    {
+        pnode->reset();
+    }
+
+    return pnode;
+}
+
+/**
+ * Free the data associated with a node.
+ *
+ * @return True, if the data could be freed, false otherwise.
+ */
+bool LRUStorage::free_node_data(Node* pnode)
+{
+    bool success = true;
+
+    const CACHE_KEY* pkey = pnode->key();
+    ss_dassert(pkey);
+
+    NodesPerKey::iterator i = nodes_per_key_.find(*pkey);
+
+    if (i != nodes_per_key_.end())
+    {
+        MXS_ERROR("Item in LRU list was not found in key mapping.");
+    }
+
+    cache_result_t result = pstorage_->del_value(*pkey);
+
+    switch (result)
+    {
+    case CACHE_RESULT_NOT_FOUND:
+        MXS_ERROR("Item in LRU list was not found in storage.");
+    case CACHE_RESULT_OK:
+        if (i != nodes_per_key_.end())
+        {
+            nodes_per_key_.erase(i);
+        }
+
+        ss_dassert(size_ >= pnode->size());
+        ss_dassert(count_ > 0);
+
+        size_ -= pnode->size();
+        count_ -= 1;
+        break;
+
+    default:
+        MXS_ERROR("Could not remove value from storage, cannot "
+                  "remove from LRU list or key mapping either.");
+        success = false;
+    }
+
+    return success;
+}
diff --git a/server/modules/filter/cache/lrustorage.h b/server/modules/filter/cache/lrustorage.h
new file mode 100644
index 000000000..59bc57e2f
--- /dev/null
+++ b/server/modules/filter/cache/lrustorage.h
@@ -0,0 +1,177 @@
+#pragma once
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include <maxscale/cdefs.h>
+#include <tr1/unordered_map>
+#include "storage.h"
+#include "cachefilter.h"
+
+class LRUStorage : public Storage
+{
+public:
+    ~LRUStorage();
+
+    /**
+     * @see Storage::get_key
+     */
+    cache_result_t get_key(const char* zDefaultDb,
+                           const GWBUF* pQuery,
+                           CACHE_KEY* pKey);
+
+protected:
+    LRUStorage(Storage* pstorage, size_t max_count, size_t max_size);
+
+    /**
+     * Fetches the value from the underlying storage and, if found, moves the
+     * entry to the top of the LRU list.
+     *
+     * @see Storage::get_value
+     */
+    cache_result_t do_get_value(const CACHE_KEY& key,
+                                uint32_t flags,
+                                GWBUF** ppValue);
+
+    /**
+     * Stores the value to the underlying storage and, if successful, either
+     * places the entry at or moves the existing entry to the top of the LRU
+     * list.
+     *
+     * @see Storage::put_value
+     */
+    cache_result_t do_put_value(const CACHE_KEY& key,
+                                const GWBUF* pValue);
+
+    /**
+     * Deletes the value from the underlying storage and, if successful, removes
+     * the entry from the LRU list.
+     *
+     * @see Storage::del_value
+     */
+    cache_result_t do_del_value(const CACHE_KEY& key);
+
+private:
+    LRUStorage(const LRUStorage&);
+    LRUStorage& operator = (const LRUStorage&);
+
+    /**
+     * The Node class is used for maintaining LRU information.
+     */
+    class Node
+    {
+    public:
+        Node()
+            : pkey_(NULL)
+            , size_(0)
+            , pnext_(NULL)
+            , pprev_(NULL)
+        {}
+        ~Node()
+        {
+            if (pnext_)
+            {
+                pnext_->pprev_ = pprev_;
+            }
+
+            if (pprev_)
+            {
+                pprev_->pnext_ = pnext_;
+            }
+        }
+
+        const CACHE_KEY* key() const { return pkey_; }
+        size_t size() const { return size_; }
+        Node* next() const { return pnext_; }
+        Node* prev() const { return pprev_; }
+
+        /**
+         * Move the node before the node provided as argument.
+         *
+         * @param  pnode  The node in front of which this should be moved.
+         * @return This node.
+         */
+        Node* prepend(Node* pnode)
+        {
+            if (pnode)
+            {
+                if (pprev_)
+                {
+                    pprev_->pnext_ = pnext_;
+                }
+
+                if (pnext_)
+                {
+                    pnext_->pprev_ = pprev_;
+                }
+
+                if (pnode->pprev_)
+                {
+                    pnode->pprev_->pnext_ = this;
+                }
+
+                pnode->pprev_ = this;
+                pnext_ = pnode;
+            }
+
+            return this;
+        }
+
+        /**
+         * Remove this node from the list.
+         *
+         * @return The previous node if there is one, or the next node.
+         */
+        Node* remove()
+        {
+            if (pprev_)
+            {
+                pprev_->pnext_ = pnext_;
+            }
+
+            if (pnext_)
+            {
+                pnext_->pprev_ = pprev_;
+            }
+
+            return pprev_ ? pprev_ : pnext_;
+        }
+
+        void reset(const CACHE_KEY* pkey = NULL, size_t size = 0)
+        {
+            pkey_ = pkey;
+            size_ = size;
+        }
+
+    private:
+        const CACHE_KEY* pkey_;  /*< Points at the key stored in nodes_per_key_ below. */
+        size_t           size_;  /*< The size of the data referred to by pkey_. */
+        Node*            pnext_; /*< The next node in the LRU list. */
+        Node*            pprev_; /*< The previous node in the LRU list. */
+    };
+
+    Node* free_lru();
+    Node* free_lru(size_t space);
+    bool free_node_data(Node* pnode);
+
+private:
+    typedef std::tr1::unordered_map<CACHE_KEY, Node*> NodesPerKey;
+
+    Storage*    pstorage_;      /*< The actual storage. */
+    size_t      max_count_;     /*< The maximum number of items in the LRU list, */
+    size_t      max_size_;      /*< The maximum size of all cached items. */
+    size_t      count_;         /*< The current count of cached items. */
+    size_t      size_;          /*< The current size of all cached items. */
+    NodesPerKey nodes_per_key_; /*< Mapping from cache keys to corresponding Node. */
+    Node*       phead_;         /*< The node at the LRU list. */
+    Node*       ptail_;         /*< The node at bottom of the LRU list.*/
+};
diff --git a/server/modules/filter/cache/lrustoragemt.cc b/server/modules/filter/cache/lrustoragemt.cc
new file mode 100644
index 000000000..608d66997
--- /dev/null
+++ b/server/modules/filter/cache/lrustoragemt.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include "lrustoragemt.h"
+
+LRUStorageMT::LRUStorageMT(Storage* pstorage, size_t max_count, size_t max_size)
+    : LRUStorage(pstorage, max_count, max_size)
+{
+    spinlock_init(&lock_);
+}
+
+LRUStorageMT::~LRUStorageMT()
+{
+}
+
+LRUStorageMT* LRUStorageMT::create(Storage* pstorage, size_t max_count, size_t max_size)
+{
+    LRUStorageMT* plru_storage = NULL;
+
+    CPP_GUARD(plru_storage = new LRUStorageMT(pstorage, max_count, max_size));
+
+    return plru_storage;
+}
+
+cache_result_t LRUStorageMT::get_value(const CACHE_KEY& key,
+                                       uint32_t flags,
+                                       GWBUF** ppvalue)
+{
+    spinlock_acquire(&lock_);
+    cache_result_t rv =  LRUStorage::do_get_value(key, flags, ppvalue);
+    spinlock_release(&lock_);
+
+    return rv;
+}
+
+cache_result_t LRUStorageMT::put_value(const CACHE_KEY& key,
+                                       const GWBUF* pvalue)
+{
+    spinlock_acquire(&lock_);
+    cache_result_t rv =  LRUStorage::do_put_value(key, pvalue);
+    spinlock_release(&lock_);
+
+    return rv;
+}
+
+cache_result_t LRUStorageMT::del_value(const CACHE_KEY& key)
+{
+    spinlock_acquire(&lock_);
+    cache_result_t rv =  LRUStorage::do_del_value(key);
+    spinlock_release(&lock_);
+
+    return rv;
+}
diff --git a/server/modules/filter/cache/lrustoragemt.h b/server/modules/filter/cache/lrustoragemt.h
new file mode 100644
index 000000000..0a13ca49f
--- /dev/null
+++ b/server/modules/filter/cache/lrustoragemt.h
@@ -0,0 +1,43 @@
+#pragma once
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include <maxscale/cdefs.h>
+#include <maxscale/spinlock.h>
+#include "lrustorage.h"
+
+class LRUStorageMT : public LRUStorage
+{
+public:
+    ~LRUStorageMT();
+
+    LRUStorageMT* create(Storage* pstorage, size_t max_count, size_t max_size);
+
+    cache_result_t get_value(const CACHE_KEY& key,
+                             uint32_t flags,
+                             GWBUF** ppvalue);
+
+    cache_result_t put_value(const CACHE_KEY& key,
+                             const GWBUF* pvalue);
+
+    cache_result_t del_value(const CACHE_KEY& key);
+
+private:
+    LRUStorageMT(Storage* pstorage, size_t max_count, size_t max_size);
+
+    LRUStorageMT(const LRUStorageMT&);
+    LRUStorageMT& operator = (const LRUStorageMT&);
+
+private:
+    SPINLOCK lock_;
+};
diff --git a/server/modules/filter/cache/lrustoragest.cc b/server/modules/filter/cache/lrustoragest.cc
new file mode 100644
index 000000000..89e1f73a5
--- /dev/null
+++ b/server/modules/filter/cache/lrustoragest.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include "lrustoragest.h"
+
+LRUStorageST::LRUStorageST(Storage* pstorage, size_t max_count, size_t max_size)
+    : LRUStorage(pstorage, max_count, max_size)
+{
+}
+
+LRUStorageST::~LRUStorageST()
+{
+}
+
+LRUStorageST* LRUStorageST::create(Storage* pstorage, size_t max_count, size_t max_size)
+{
+    LRUStorageST* plru_storage = NULL;
+
+    CPP_GUARD(plru_storage = new LRUStorageST(pstorage, max_count, max_size));
+
+    return plru_storage;
+}
+
+cache_result_t LRUStorageST::get_value(const CACHE_KEY& key,
+                                       uint32_t flags,
+                                       GWBUF** ppvalue)
+{
+    return LRUStorage::do_get_value(key, flags, ppvalue);
+}
+
+cache_result_t LRUStorageST::put_value(const CACHE_KEY& key,
+                                       const GWBUF* pvalue)
+{
+    return LRUStorage::do_put_value(key, pvalue);
+}
+
+cache_result_t LRUStorageST::del_value(const CACHE_KEY& key)
+{
+    return LRUStorage::do_del_value(key);
+}
diff --git a/server/modules/filter/cache/lrustoragest.h b/server/modules/filter/cache/lrustoragest.h
new file mode 100644
index 000000000..f707bdbc8
--- /dev/null
+++ b/server/modules/filter/cache/lrustoragest.h
@@ -0,0 +1,39 @@
+#pragma once
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include <maxscale/cdefs.h>
+#include "lrustorage.h"
+
+class LRUStorageST : public LRUStorage
+{
+public:
+    ~LRUStorageST();
+
+    LRUStorageST* create(Storage* pstorage, size_t max_count, size_t max_size);
+
+    cache_result_t get_value(const CACHE_KEY& key,
+                             uint32_t flags,
+                             GWBUF** ppValue);
+
+    cache_result_t put_value(const CACHE_KEY& key,
+                             const GWBUF* pValue);
+
+    cache_result_t del_value(const CACHE_KEY& key);
+
+private:
+    LRUStorageST(Storage* pstorage, size_t max_count, size_t max_size);
+
+    LRUStorageST(const LRUStorageST&);
+    LRUStorageST& operator = (const LRUStorageST&);
+};

From e597523c4722295cacc38c3b27c9984331edb773 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 13:06:46 +0200
Subject: [PATCH 02/42] Cache: Allow storage modules to specify capabilites

Will be used to decide whether a LRUStorage facade is needed in front
of the actual storage or not.
---
 .../modules/filter/cache/cache_storage_api.h  | 42 ++++++++++++++-----
 .../storage_rocksdb/storage_rocksdb.cc        | 18 +++++++-
 server/modules/filter/cache/storagefactory.cc | 24 ++++++++---
 server/modules/filter/cache/storagefactory.h  |  7 ++--
 4 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/server/modules/filter/cache/cache_storage_api.h b/server/modules/filter/cache/cache_storage_api.h
index 76cade33c..aafd54483 100644
--- a/server/modules/filter/cache/cache_storage_api.h
+++ b/server/modules/filter/cache/cache_storage_api.h
@@ -55,35 +55,57 @@ typedef struct cache_key
     char data[CACHE_KEY_MAXLEN];
 } CACHE_KEY;
 
+typedef enum cache_storage_capabilities
+{
+    CACHE_STORAGE_CAP_NONE      = 0x00,
+    CACHE_STORAGE_CAP_ST        = 0x01, /*< Storage can optimize for single thread. */
+    CACHE_STORAGE_CAP_MT        = 0x02, /*< Storage can handle multiple threads. */
+    CACHE_STORAGE_CAP_LRU       = 0x04, /*< Storage capable of LRU eviction. */
+    CACHE_STORAGE_CAP_MAX_COUNT = 0x08, /*< Storage capable of capping number of entries.*/
+    CACHE_STORAGE_CAP_MAX_SIZE  = 0x10, /*< Storage capable of capping size of cache.*/
+} cache_storage_capabilities_t;
+
 typedef struct cache_storage_api
 {
     /**
      * Called immediately after the storage module has been loaded.
      *
+     * @param capabilities On successful return, contains a bitmask of
+     *                     cache_storage_capabilities_t values.
      * @return True if the initialization succeeded, false otherwise.
      */
-    bool (*initialize)();
+    bool (*initialize)(uint32_t* capabilities);
 
     /**
      * Creates an instance of cache storage. This function should, if necessary,
      * create the actual storage, initialize it and prepare to put and get
      * cache items.
      *
-     * @param model Whether the storage will be used in a single thread or
-     *              multi thread context. In the latter case the storage must
-     *              perform thread synchronization as appropriate, in the former
-     *              case it need not.
-     * @param name  The name of the cache instance.
-     * @param ttl   Time to live; number of seconds the value is valid.
-     * @param argc  The number of elements in the argv array.
-     * @param argv  Array of arguments, as passed in the `storage_options` parameter
-     *              in the cache section in the MaxScale configuration file.
+     * @param model     Whether the storage will be used in a single thread or
+     *                  multi thread context. In the latter case the storage must
+     *                  perform thread synchronization as appropriate, in the former
+     *                  case it need not.
+     * @param name      The name of the cache instance.
+     * @param ttl       Time to live; number of seconds the value is valid.
+     * @param max_count The maximum number of items the storage may store, before
+     *                  it should evict some items. Caller should specify 0, unless
+     *                  CACHE_STORAGE_CAP_MAX_COUNT is returned at initialization.
+     * @param max_count The maximum size of the storage may may occupy, before it
+                        should evict some items. Caller should specify 0, unless
+     *                  CACHE_STORAGE_CAP_MAX_SIZE is returned at initialization.
+     * @param argc      The number of elements in the argv array.
+     * @param argv      Array of arguments, as passed in the `storage_options`
+     *                  parameter in the cache section in the MaxScale configuration
+     *                  file.
+     *
      * @return A new cache instance, or NULL if the instance could not be
      *         created.
      */
     CACHE_STORAGE* (*createInstance)(cache_thread_model_t model,
                                      const char *name,
                                      uint32_t ttl,
+                                     uint32_t max_count,
+                                     uint32_t max_size,
                                      int argc, char* argv[]);
 
     /**
diff --git a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
index 704b39dab..baeff5844 100644
--- a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
+++ b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
@@ -18,20 +18,36 @@
 namespace
 {
 
-bool initialize()
+bool initialize(uint32_t* pCapabilities)
 {
+    *pCapabilities = CACHE_STORAGE_CAP_MT;
+
     return RocksDBStorage::Initialize();
 }
 
 CACHE_STORAGE* createInstance(cache_thread_model_t, // Ignored, RocksDB always MT safe.
                               const char* zName,
                               uint32_t ttl,
+                              uint32_t maxCount,
+                              uint32_t maxSize,
                               int argc, char* argv[])
 {
     ss_dassert(zName);
 
     CACHE_STORAGE* pStorage = 0;
 
+    if (maxCount != 0)
+    {
+        MXS_WARNING("A maximum item count of %u specifed, although 'storage_rocksdb' "
+                    "does not enforce such a limit.", maxCount);
+    }
+
+    if (maxSize != 0)
+    {
+        MXS_WARNING("A maximum size of %u specified, although 'storage_rocksdb' "
+                    "does not enforce such a limit.", maxSize);
+    }
+
     try
     {
         pStorage = reinterpret_cast<CACHE_STORAGE*>(RocksDBStorage::Create(zName, ttl, argc, argv));
diff --git a/server/modules/filter/cache/storagefactory.cc b/server/modules/filter/cache/storagefactory.cc
index a80861adb..7bbe021ca 100644
--- a/server/modules/filter/cache/storagefactory.cc
+++ b/server/modules/filter/cache/storagefactory.cc
@@ -26,7 +26,10 @@
 namespace
 {
 
-bool open_cache_storage(const char* zName, void** pHandle, CACHE_STORAGE_API** ppApi)
+bool open_cache_storage(const char* zName,
+                        void** pHandle,
+                        CACHE_STORAGE_API** ppApi,
+                        uint32_t* pCapabilities)
 {
     bool rv = false;
 
@@ -45,7 +48,7 @@ bool open_cache_storage(const char* zName, void** pHandle, CACHE_STORAGE_API** p
 
             if (pApi)
             {
-                if ((pApi->initialize)())
+                if ((pApi->initialize)(pCapabilities))
                 {
                     *pHandle = handle;
                     *ppApi = pApi;
@@ -96,9 +99,12 @@ void close_cache_storage(void* handle, CACHE_STORAGE_API* pApi)
 
 }
 
-StorageFactory::StorageFactory(void* handle, CACHE_STORAGE_API* pApi)
+StorageFactory::StorageFactory(void* handle,
+                               CACHE_STORAGE_API* pApi,
+                               uint32_t capabilities)
     : m_handle(handle)
     , m_pApi(pApi)
+    , m_capabilities(capabilities)
 {
     ss_dassert(handle);
     ss_dassert(pApi);
@@ -118,10 +124,11 @@ StorageFactory* StorageFactory::Open(const char* zName)
 
     void* handle;
     CACHE_STORAGE_API* pApi;
+    uint32_t capabilities;
 
-    if (open_cache_storage(zName, &handle, &pApi))
+    if (open_cache_storage(zName, &handle, &pApi, &capabilities))
     {
-        CPP_GUARD(pFactory = new StorageFactory(handle, pApi));
+        CPP_GUARD(pFactory = new StorageFactory(handle, pApi, capabilities));
 
         if (!pFactory)
         {
@@ -141,7 +148,12 @@ Storage* StorageFactory::createStorage(cache_thread_model_t model,
     ss_dassert(m_pApi);
 
     Storage* pStorage = 0;
-    CACHE_STORAGE* pRawStorage = m_pApi->createInstance(model, zName, ttl, argc, argv);
+    // TODO: Handle max_count and max_size.
+    uint32_t max_count = 0;
+    uint32_t max_size = 0;
+
+    CACHE_STORAGE* pRawStorage = m_pApi->createInstance(model, zName, ttl, max_count, max_size,
+                                                        argc, argv);
 
     if (pRawStorage)
     {
diff --git a/server/modules/filter/cache/storagefactory.h b/server/modules/filter/cache/storagefactory.h
index f2c1a9d22..b6ddf55fa 100644
--- a/server/modules/filter/cache/storagefactory.h
+++ b/server/modules/filter/cache/storagefactory.h
@@ -32,14 +32,15 @@ public:
                            int argc, char* argv[]);
 
 private:
-    StorageFactory(void* handle, CACHE_STORAGE_API* pApi);
+    StorageFactory(void* handle, CACHE_STORAGE_API* pApi, uint32_t capabilities);
 
     StorageFactory(const StorageFactory&);
     StorageFactory& operator = (const StorageFactory&);
 
 private:
-    void*              m_handle;
-    CACHE_STORAGE_API* m_pApi;
+    void*              m_handle;       /*< dl handle of storage. */
+    CACHE_STORAGE_API* m_pApi;         /*< API of storage. */
+    uint32_t           m_capabilities; /*< Capabilities of storage. */
 };
 
 #endif

From 9c1b9c188ee12a353437479ef3ab94817ccb178a Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 15:03:52 +0200
Subject: [PATCH 03/42] Cache: Enable LRU eviction

The maximum count and maximum size of the cache can now be
specified and a storage can declare what capabilities it has.
If a storage modile cannot enforce the maximum count or maximum
size limits, the storage is decorated with an LRU storage that
can.
---
 Documentation/Filters/Cache.md                | 35 ++++++-
 .../modules/filter/cache/cache_storage_api.h  |  7 +-
 server/modules/filter/cache/cachefilter.cc    | 91 +++++++++++++++++--
 server/modules/filter/cache/cachefilter.h     |  8 +-
 server/modules/filter/cache/cachemt.cc        | 10 +-
 server/modules/filter/cache/cachept.cc        |  2 +
 server/modules/filter/cache/cachesimple.cc    |  1 +
 server/modules/filter/cache/cachest.cc        |  9 +-
 server/modules/filter/cache/lrustorage.cc     |  2 +-
 server/modules/filter/cache/lrustoragemt.cc   |  3 +
 server/modules/filter/cache/lrustoragemt.h    |  2 +-
 server/modules/filter/cache/lrustoragest.cc   |  2 +
 server/modules/filter/cache/lrustoragest.h    |  2 +-
 .../storage_rocksdb/rocksdbinternals.cc       |  1 +
 .../storage/storage_rocksdb/rocksdbstorage.cc |  1 +
 .../storage_rocksdb/storage_rocksdb.cc        |  8 +-
 server/modules/filter/cache/storagefactory.cc | 55 +++++++++--
 server/modules/filter/cache/storagefactory.h  |  2 +
 18 files changed, 216 insertions(+), 25 deletions(-)

diff --git a/Documentation/Filters/Cache.md b/Documentation/Filters/Cache.md
index b466dca6c..932d031ff 100644
--- a/Documentation/Filters/Cache.md
+++ b/Documentation/Filters/Cache.md
@@ -91,6 +91,34 @@ If nothing is specified, the default _ttl_ value is 10.
 ttl=60
 ```
 
+#### `max_count`
+
+The maximum number of items the cache may contain. If the limit has been
+reached and a new item should be stored, then an older item will be evicted.
+
+Note that if `cached_data` is `thread_specific` then this limit will be
+applied to each cache _separately_.
+```
+max_size=1000
+```
+The default value is 0, which means no limit.
+
+#### `max_size`
+
+The maximum size - expressed in kibibytes - the cache may occupy. If the limit
+has been reached and a new item should be stored, then some older item(s) will
+be evicted to make space.
+
+Note that the value of `max_size` must be at least as large as the value of
+`max_resultset_size`.
+
+Note that if `cached_data` is `thread_specific` then this limit will be
+applied to each cache _separately_.
+```
+max_count=10000
+```
+The default value is 0, which means no limit.
+
 #### `rules`
 
 Specifies the path of the file where the caching rules are stored. A relative
@@ -113,7 +141,12 @@ allowed values are:
      on the other hand that the very same data may be fetched and stored
      multiple times.
 
-Default is `shared`.
+```
+cached_data=thread_specific
+```
+
+Default is `shared`. See `max_count` and `max_size` what implication changing
+this setting to `thread_specific` has.
 
 #### `debug`
 
diff --git a/server/modules/filter/cache/cache_storage_api.h b/server/modules/filter/cache/cache_storage_api.h
index aafd54483..3395dc8bc 100644
--- a/server/modules/filter/cache/cache_storage_api.h
+++ b/server/modules/filter/cache/cache_storage_api.h
@@ -65,6 +65,11 @@ typedef enum cache_storage_capabilities
     CACHE_STORAGE_CAP_MAX_SIZE  = 0x10, /*< Storage capable of capping size of cache.*/
 } cache_storage_capabilities_t;
 
+static inline bool cache_storage_has_cap(uint32_t capabilities, uint32_t mask)
+{
+    return (capabilities & mask) == mask;
+}
+
 typedef struct cache_storage_api
 {
     /**
@@ -105,7 +110,7 @@ typedef struct cache_storage_api
                                      const char *name,
                                      uint32_t ttl,
                                      uint32_t max_count,
-                                     uint32_t max_size,
+                                     uint64_t max_size,
                                      int argc, char* argv[]);
 
     /**
diff --git a/server/modules/filter/cache/cachefilter.cc b/server/modules/filter/cache/cachefilter.cc
index 69335ebb1..984be83be 100644
--- a/server/modules/filter/cache/cachefilter.cc
+++ b/server/modules/filter/cache/cachefilter.cc
@@ -36,6 +36,8 @@ static const CACHE_CONFIG DEFAULT_CONFIG =
     NULL,
     0,
     CACHE_DEFAULT_TTL,
+    CACHE_DEFAULT_MAX_COUNT,
+    CACHE_DEFAULT_MAX_SIZE,
     CACHE_DEFAULT_DEBUG,
     CACHE_DEFAULT_THREAD_MODEL,
 };
@@ -333,24 +335,42 @@ static bool process_params(char **pzOptions, FILTER_PARAMETER **ppParams, CACHE_
 
         if (strcmp(pParam->name, "max_resultset_rows") == 0)
         {
-            int v = atoi(pParam->value);
+            char* end;
+            int32_t value = strtol(pParam->value, &end, 0);
 
-            if (v > 0)
+            if ((*end == 0) && (value >= 0))
             {
-                config.max_resultset_rows = v;
+                if (value != 0)
+                {
+                    config.max_resultset_rows = value;
+                }
+                else
+                {
+                    config.max_resultset_rows = CACHE_DEFAULT_MAX_RESULTSET_ROWS;
+                }
             }
             else
             {
-                config.max_resultset_rows = CACHE_DEFAULT_MAX_RESULTSET_ROWS;
+                MXS_ERROR("The value of the configuration entry '%s' must "
+                          "be an integer larger than 0.", pParam->name);
+                error = true;
             }
         }
         else if (strcmp(pParam->name, "max_resultset_size") == 0)
         {
-            int v = atoi(pParam->value);
+            char* end;
+            int64_t value = strtoll(pParam->value, &end, 0);
 
-            if (v > 0)
+            if ((*end == 0) && (value >= 0))
             {
-                config.max_resultset_size = v * 1024;
+                if (value != 0)
+                {
+                    config.max_resultset_size = value * 1024;
+                }
+                else
+                {
+                    config.max_resultset_size = CACHE_DEFAULT_MAX_RESULTSET_SIZE;
+                }
             }
             else
             {
@@ -452,6 +472,52 @@ static bool process_params(char **pzOptions, FILTER_PARAMETER **ppParams, CACHE_
                 error = true;
             }
         }
+        else if (strcmp(pParam->name, "max_count") == 0)
+        {
+            char* end;
+            int32_t value = strtoul(pParam->value, &end, 0);
+
+            if ((*end == 0) && (value >= 0))
+            {
+                if (value != 0)
+                {
+                    config.max_count = value;
+                }
+                else
+                {
+                    config.max_count = CACHE_DEFAULT_MAX_COUNT;
+                }
+            }
+            else
+            {
+                MXS_ERROR("The value of the configuration entry '%s' must "
+                          "be an integer larger than or equal to 0.", pParam->name);
+                error = true;
+            }
+        }
+        else if (strcmp(pParam->name, "max_size") == 0)
+        {
+            char* end;
+            int64_t value = strtoull(pParam->value, &end, 0);
+
+            if ((*end == 0) && (value >= 0))
+            {
+                if (value != 0)
+                {
+                    config.max_size = value * 1024;
+                }
+                else
+                {
+                    config.max_size = CACHE_DEFAULT_MAX_SIZE;
+                }
+            }
+            else
+            {
+                MXS_ERROR("The value of the configuration entry '%s' must "
+                          "be an integer larger than or equal to 0.", pParam->name);
+                error = true;
+            }
+        }
         else if (strcmp(pParam->name, "debug") == 0)
         {
             int v = atoi(pParam->value);
@@ -492,6 +558,17 @@ static bool process_params(char **pzOptions, FILTER_PARAMETER **ppParams, CACHE_
         }
     }
 
+    if (!error)
+    {
+        if (config.max_size < config.max_resultset_size)
+        {
+            MXS_ERROR("The value of 'max_size' must be at least as larged as that "
+                      "of 'max_resultset_size'.");
+
+            error = true;
+        }
+    }
+
     if (error)
     {
         cache_config_finish(config);
diff --git a/server/modules/filter/cache/cachefilter.h b/server/modules/filter/cache/cachefilter.h
index 92c99bbf8..bc6ce5e21 100644
--- a/server/modules/filter/cache/cachefilter.h
+++ b/server/modules/filter/cache/cachefilter.h
@@ -39,13 +39,17 @@ class StorageFactory;
 #define CACHE_DEBUG_MAX          (CACHE_DEBUG_RULES | CACHE_DEBUG_USAGE | CACHE_DEBUG_DECISIONS)
 
 // Count
-#define CACHE_DEFAULT_MAX_RESULTSET_ROWS UINT_MAX
+#define CACHE_DEFAULT_MAX_RESULTSET_ROWS UINT32_MAX
 // Bytes
 #define CACHE_DEFAULT_MAX_RESULTSET_SIZE 64 * 1024
 // Seconds
 #define CACHE_DEFAULT_TTL                10
 // Integer value
 #define CACHE_DEFAULT_DEBUG              0
+// Positive integer
+#define CACHE_DEFAULT_MAX_COUNT          UINT32_MAX
+// Positive integer
+#define CACHE_DEFAULT_MAX_SIZE           UINT64_MAX
 // Thread model
 #define CACHE_DEFAULT_THREAD_MODEL       CACHE_THREAD_MODEL_MT
 
@@ -59,6 +63,8 @@ typedef struct cache_config
     char** storage_argv;               /**< Cooked options for storage module. */
     int storage_argc;                  /**< Number of cooked options. */
     uint32_t ttl;                      /**< Time to live. */
+    uint32_t max_count;                /**< Maximum number of entries in the cache.*/
+    uint64_t max_size;                 /**< Maximum size of the cache.*/
     uint32_t debug;                    /**< Debug settings. */
     cache_thread_model_t thread_model; /**< Thread model. */
 } CACHE_CONFIG;
diff --git a/server/modules/filter/cache/cachemt.cc b/server/modules/filter/cache/cachemt.cc
index 6694032fe..53658115b 100644
--- a/server/modules/filter/cache/cachemt.cc
+++ b/server/modules/filter/cache/cachemt.cc
@@ -11,6 +11,7 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "cache"
 #include "cachemt.h"
 #include "storage.h"
 #include "storagefactory.h"
@@ -23,6 +24,8 @@ CacheMT::CacheMT(const std::string& name,
     : CacheSimple(name, pConfig, pRules, pFactory, pStorage)
 {
     spinlock_init(&m_lockPending);
+
+    MXS_NOTICE("Created multi threaded cache.");
 }
 
 CacheMT::~CacheMT()
@@ -89,10 +92,15 @@ CacheMT* CacheMT::Create(const std::string&  name,
     CacheMT* pCache = NULL;
 
     uint32_t ttl = pConfig->ttl;
+    uint32_t maxCount = pConfig->max_count;
+    uint32_t maxSize = pConfig->max_size;
+
     int argc = pConfig->storage_argc;
     char** argv = pConfig->storage_argv;
 
-    Storage* pStorage = pFactory->createStorage(CACHE_THREAD_MODEL_MT, name.c_str(), ttl, argc, argv);
+    Storage* pStorage = pFactory->createStorage(CACHE_THREAD_MODEL_MT, name.c_str(),
+                                                ttl, maxCount, maxSize,
+                                                argc, argv);
 
     if (pStorage)
     {
diff --git a/server/modules/filter/cache/cachept.cc b/server/modules/filter/cache/cachept.cc
index a9eb3e3d2..38884391a 100644
--- a/server/modules/filter/cache/cachept.cc
+++ b/server/modules/filter/cache/cachept.cc
@@ -11,6 +11,7 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "cache"
 #include "cachept.h"
 #include <maxscale/atomic.h>
 #include <maxscale/platform.h>
@@ -52,6 +53,7 @@ CachePT::CachePT(const std::string&  name,
     : Cache(name, pConfig, pRules, pFactory)
     , m_caches(caches)
 {
+    MXS_NOTICE("Created cache per thread.");
 }
 
 CachePT::~CachePT()
diff --git a/server/modules/filter/cache/cachesimple.cc b/server/modules/filter/cache/cachesimple.cc
index 052821e80..fbe33a0b9 100644
--- a/server/modules/filter/cache/cachesimple.cc
+++ b/server/modules/filter/cache/cachesimple.cc
@@ -11,6 +11,7 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "cache"
 #include "cachesimple.h"
 #include "storage.h"
 #include "storagefactory.h"
diff --git a/server/modules/filter/cache/cachest.cc b/server/modules/filter/cache/cachest.cc
index 65e2db078..6591c5b50 100644
--- a/server/modules/filter/cache/cachest.cc
+++ b/server/modules/filter/cache/cachest.cc
@@ -11,6 +11,7 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "cache"
 #include "cachest.h"
 #include "storage.h"
 #include "storagefactory.h"
@@ -22,6 +23,7 @@ CacheST::CacheST(const std::string& name,
                  Storage* pStorage)
     : CacheSimple(name, pConfig, pRules, pFactory, pStorage)
 {
+    MXS_NOTICE("Created single threaded cache.");
 }
 
 CacheST::~CacheST()
@@ -82,10 +84,15 @@ CacheST* CacheST::Create(const std::string&  name,
     CacheST* pCache = NULL;
 
     uint32_t ttl = pConfig->ttl;
+    uint32_t maxCount = pConfig->max_count;
+    uint32_t maxSize = pConfig->max_size;
+
     int argc = pConfig->storage_argc;
     char** argv = pConfig->storage_argv;
 
-    Storage* pStorage = pFactory->createStorage(CACHE_THREAD_MODEL_ST, name.c_str(), ttl, argc, argv);
+    Storage* pStorage = pFactory->createStorage(CACHE_THREAD_MODEL_ST, name.c_str(),
+                                                ttl, maxCount, maxSize,
+                                                argc, argv);
 
     if (pStorage)
     {
diff --git a/server/modules/filter/cache/lrustorage.cc b/server/modules/filter/cache/lrustorage.cc
index 72e1de16f..3e3711cd8 100644
--- a/server/modules/filter/cache/lrustorage.cc
+++ b/server/modules/filter/cache/lrustorage.cc
@@ -11,9 +11,9 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "cache"
 #include "lrustorage.h"
 
-
 LRUStorage::LRUStorage(Storage* pstorage, size_t max_count, size_t max_size)
     : pstorage_(pstorage)
     , max_count_(max_count)
diff --git a/server/modules/filter/cache/lrustoragemt.cc b/server/modules/filter/cache/lrustoragemt.cc
index 608d66997..5d61c3c68 100644
--- a/server/modules/filter/cache/lrustoragemt.cc
+++ b/server/modules/filter/cache/lrustoragemt.cc
@@ -11,12 +11,15 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "cache"
 #include "lrustoragemt.h"
 
 LRUStorageMT::LRUStorageMT(Storage* pstorage, size_t max_count, size_t max_size)
     : LRUStorage(pstorage, max_count, max_size)
 {
     spinlock_init(&lock_);
+
+    MXS_NOTICE("Created multi threaded LRU storage.");
 }
 
 LRUStorageMT::~LRUStorageMT()
diff --git a/server/modules/filter/cache/lrustoragemt.h b/server/modules/filter/cache/lrustoragemt.h
index 0a13ca49f..cd13427f4 100644
--- a/server/modules/filter/cache/lrustoragemt.h
+++ b/server/modules/filter/cache/lrustoragemt.h
@@ -21,7 +21,7 @@ class LRUStorageMT : public LRUStorage
 public:
     ~LRUStorageMT();
 
-    LRUStorageMT* create(Storage* pstorage, size_t max_count, size_t max_size);
+    static LRUStorageMT* create(Storage* pstorage, size_t max_count, size_t max_size);
 
     cache_result_t get_value(const CACHE_KEY& key,
                              uint32_t flags,
diff --git a/server/modules/filter/cache/lrustoragest.cc b/server/modules/filter/cache/lrustoragest.cc
index 89e1f73a5..166f8724a 100644
--- a/server/modules/filter/cache/lrustoragest.cc
+++ b/server/modules/filter/cache/lrustoragest.cc
@@ -11,11 +11,13 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "cache"
 #include "lrustoragest.h"
 
 LRUStorageST::LRUStorageST(Storage* pstorage, size_t max_count, size_t max_size)
     : LRUStorage(pstorage, max_count, max_size)
 {
+    MXS_NOTICE("Created single threaded LRU storage.");
 }
 
 LRUStorageST::~LRUStorageST()
diff --git a/server/modules/filter/cache/lrustoragest.h b/server/modules/filter/cache/lrustoragest.h
index f707bdbc8..8c0ac5d91 100644
--- a/server/modules/filter/cache/lrustoragest.h
+++ b/server/modules/filter/cache/lrustoragest.h
@@ -20,7 +20,7 @@ class LRUStorageST : public LRUStorage
 public:
     ~LRUStorageST();
 
-    LRUStorageST* create(Storage* pstorage, size_t max_count, size_t max_size);
+    static LRUStorageST* create(Storage* pstorage, size_t max_count, size_t max_size);
 
     cache_result_t get_value(const CACHE_KEY& key,
                              uint32_t flags,
diff --git a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc
index c60519549..6687a96f7 100644
--- a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc
+++ b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc
@@ -11,6 +11,7 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "storage_rocksdb"
 #include "rocksdbinternals.h"
 #include <rocksdb/env.h>
 #include <util/coding.h>
diff --git a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc
index 9217d647a..7ae61e62e 100644
--- a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc
+++ b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc
@@ -11,6 +11,7 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "storage_rocksdb"
 #include "rocksdbstorage.h"
 #include <openssl/sha.h>
 #include <sys/stat.h>
diff --git a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
index baeff5844..4de3d6868 100644
--- a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
+++ b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
@@ -11,7 +11,9 @@
  * Public License.
  */
 
+#define MXS_MODULE_NAME "storage_rocksdb"
 #include "storage_rocksdb.h"
+#include <inttypes.h>
 #include "../../cache_storage_api.h"
 #include "rocksdbstorage.h"
 
@@ -29,7 +31,7 @@ CACHE_STORAGE* createInstance(cache_thread_model_t, // Ignored, RocksDB always M
                               const char* zName,
                               uint32_t ttl,
                               uint32_t maxCount,
-                              uint32_t maxSize,
+                              uint64_t maxSize,
                               int argc, char* argv[])
 {
     ss_dassert(zName);
@@ -38,13 +40,13 @@ CACHE_STORAGE* createInstance(cache_thread_model_t, // Ignored, RocksDB always M
 
     if (maxCount != 0)
     {
-        MXS_WARNING("A maximum item count of %u specifed, although 'storage_rocksdb' "
+        MXS_WARNING("A maximum item count of %" PRIu32 " specifed, although 'storage_rocksdb' "
                     "does not enforce such a limit.", maxCount);
     }
 
     if (maxSize != 0)
     {
-        MXS_WARNING("A maximum size of %u specified, although 'storage_rocksdb' "
+        MXS_WARNING("A maximum size of %" PRIu64 " specified, although 'storage_rocksdb' "
                     "does not enforce such a limit.", maxSize);
     }
 
diff --git a/server/modules/filter/cache/storagefactory.cc b/server/modules/filter/cache/storagefactory.cc
index 7bbe021ca..0e74af99b 100644
--- a/server/modules/filter/cache/storagefactory.cc
+++ b/server/modules/filter/cache/storagefactory.cc
@@ -20,6 +20,8 @@
 #include <maxscale/gwdirs.h>
 #include <maxscale/log_manager.h>
 #include "cachefilter.h"
+#include "lrustoragest.h"
+#include "lrustoragemt.h"
 #include "storagereal.h"
 
 
@@ -142,24 +144,63 @@ StorageFactory* StorageFactory::Open(const char* zName)
 Storage* StorageFactory::createStorage(cache_thread_model_t model,
                                        const char* zName,
                                        uint32_t ttl,
+                                       uint32_t maxCount,
+                                       uint64_t maxSize,
                                        int argc, char* argv[])
 {
     ss_dassert(m_handle);
     ss_dassert(m_pApi);
 
     Storage* pStorage = 0;
-    // TODO: Handle max_count and max_size.
-    uint32_t max_count = 0;
-    uint32_t max_size = 0;
 
-    CACHE_STORAGE* pRawStorage = m_pApi->createInstance(model, zName, ttl, max_count, max_size,
-                                                        argc, argv);
+    uint32_t mc = cache_storage_has_cap(m_capabilities, CACHE_STORAGE_CAP_MAX_COUNT) ? maxCount : 0;
+    uint64_t ms = cache_storage_has_cap(m_capabilities, CACHE_STORAGE_CAP_MAX_SIZE) ? maxSize : 0;
+
+    CACHE_STORAGE* pRawStorage = m_pApi->createInstance(model, zName, ttl, mc, ms, argc, argv);
 
     if (pRawStorage)
     {
-        CPP_GUARD(pStorage = new StorageReal(m_pApi, pRawStorage));
+        StorageReal* pStorageReal = NULL;
 
-        if (!pStorage)
+        CPP_GUARD(pStorageReal = new StorageReal(m_pApi, pRawStorage));
+
+        if (pStorageReal)
+        {
+            uint32_t mask = CACHE_STORAGE_CAP_MAX_COUNT | CACHE_STORAGE_CAP_MAX_SIZE;
+
+            if (!cache_storage_has_cap(m_capabilities, mask))
+            {
+                // Ok, so the cache cannot handle eviction. Let's decorate the
+                // real storage with a storage than can.
+
+                LRUStorage *pLruStorage = NULL;
+
+                if (model == CACHE_THREAD_MODEL_ST)
+                {
+                    pLruStorage = LRUStorageST::create(pStorageReal, maxCount, maxSize);
+                }
+                else
+                {
+                    ss_dassert(model == CACHE_THREAD_MODEL_MT);
+
+                    pLruStorage = LRUStorageMT::create(pStorageReal, maxCount, maxSize);
+                }
+
+                if (pLruStorage)
+                {
+                    pStorage = pLruStorage;
+                }
+                else
+                {
+                    delete pStorageReal;
+                }
+            }
+            else
+            {
+                pStorage = pStorageReal;
+            }
+        }
+        else
         {
             m_pApi->freeInstance(pRawStorage);
         }
diff --git a/server/modules/filter/cache/storagefactory.h b/server/modules/filter/cache/storagefactory.h
index b6ddf55fa..24f0e7871 100644
--- a/server/modules/filter/cache/storagefactory.h
+++ b/server/modules/filter/cache/storagefactory.h
@@ -29,6 +29,8 @@ public:
     Storage* createStorage(cache_thread_model_t model,
                            const char* zName,
                            uint32_t ttl,
+                           uint32_t max_count,
+                           uint64_t max_size,
                            int argc, char* argv[]);
 
 private:

From e5a9eceff45a673f5dba0a6fa4e5125eeb16b933 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 17:23:53 +0200
Subject: [PATCH 04/42] Cache: Fix prev/next twiddling

---
 server/modules/filter/cache/lrustorage.cc | 5 +++--
 server/modules/filter/cache/lrustorage.h  | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/server/modules/filter/cache/lrustorage.cc b/server/modules/filter/cache/lrustorage.cc
index 3e3711cd8..2c5c63e67 100644
--- a/server/modules/filter/cache/lrustorage.cc
+++ b/server/modules/filter/cache/lrustorage.cc
@@ -51,7 +51,7 @@ cache_result_t LRUStorage::do_get_value(const CACHE_KEY& key,
         {
             if (ptail_ == i->second)
             {
-                ptail_ = i->second->next();
+                ptail_ = i->second->prev();
             }
 
             phead_ = i->second->prepend(phead_);
@@ -217,6 +217,7 @@ LRUStorage::Node* LRUStorage::free_lru()
     if (free_node_data(ptail_))
     {
         pnode = ptail_;
+        ptail_ = ptail_->remove();
     }
 
     return pnode;
@@ -280,7 +281,7 @@ bool LRUStorage::free_node_data(Node* pnode)
 
     NodesPerKey::iterator i = nodes_per_key_.find(*pkey);
 
-    if (i != nodes_per_key_.end())
+    if (i == nodes_per_key_.end())
     {
         MXS_ERROR("Item in LRU list was not found in key mapping.");
     }
diff --git a/server/modules/filter/cache/lrustorage.h b/server/modules/filter/cache/lrustorage.h
index 59bc57e2f..15ef0a9f3 100644
--- a/server/modules/filter/cache/lrustorage.h
+++ b/server/modules/filter/cache/lrustorage.h
@@ -119,8 +119,10 @@ private:
                     pnode->pprev_->pnext_ = this;
                 }
 
-                pnode->pprev_ = this;
+                pprev_ = pnode->pprev_;
                 pnext_ = pnode;
+
+                pnode->pprev_ = this;
             }
 
             return this;

From b87ea735fbd44a4f63094bdba965b1bf54de0bac Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 17:48:07 +0200
Subject: [PATCH 05/42] Cache: Correct examples in documentation

---
 Documentation/Filters/Cache.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/Filters/Cache.md b/Documentation/Filters/Cache.md
index 932d031ff..a65f5c369 100644
--- a/Documentation/Filters/Cache.md
+++ b/Documentation/Filters/Cache.md
@@ -99,7 +99,7 @@ reached and a new item should be stored, then an older item will be evicted.
 Note that if `cached_data` is `thread_specific` then this limit will be
 applied to each cache _separately_.
 ```
-max_size=1000
+max_count=1000
 ```
 The default value is 0, which means no limit.
 
@@ -115,7 +115,7 @@ Note that the value of `max_size` must be at least as large as the value of
 Note that if `cached_data` is `thread_specific` then this limit will be
 applied to each cache _separately_.
 ```
-max_count=10000
+max_size=1000
 ```
 The default value is 0, which means no limit.
 

From 23bb7c4de798dbfea8b2ff416730c204b9e73bf6 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 21:18:14 +0200
Subject: [PATCH 06/42] Cache: Add in memory storage

Storage implementation that simply uses std::unordered_map as
storage structure.
---
 .../storage/storage_inmemory/CMakeLists.txt   |  10 +
 .../storage_inmemory/inmemorystorage.cc       | 195 +++++++++++++++
 .../storage_inmemory/inmemorystorage.h        |  62 +++++
 .../storage_inmemory/inmemorystoragemt.cc     |  60 +++++
 .../storage_inmemory/inmemorystoragemt.h      |  39 +++
 .../storage_inmemory/inmemorystoragest.cc     |  47 ++++
 .../storage_inmemory/inmemorystoragest.h      |  35 +++
 .../storage_inmemory/storage_inmemory.cc      | 233 ++++++++++++++++++
 8 files changed, 681 insertions(+)
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/CMakeLists.txt
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.h
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.h
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.h
 create mode 100644 server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc

diff --git a/server/modules/filter/cache/storage/storage_inmemory/CMakeLists.txt b/server/modules/filter/cache/storage/storage_inmemory/CMakeLists.txt
new file mode 100644
index 000000000..93a39ea58
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_library(storage_inmemory SHARED
+    inmemorystorage.cc
+    inmemorystoragest.cc
+    inmemorystoragemt.cc
+    storage_inmemory.cc
+    )
+target_link_libraries(storage_inmemory cache maxscale-common)
+set_target_properties(storage_inmemory PROPERTIES VERSION "1.0.0")
+set_target_properties(storage_inmemory PROPERTIES LINK_FLAGS -Wl,-z,defs)
+install_module(storage_inmemory experimental)
diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc
new file mode 100644
index 000000000..874c68f2b
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#define MXS_MODULE_NAME "storage_inmemory"
+#include "inmemorystorage.h"
+#include <openssl/sha.h>
+#include <algorithm>
+#include <set>
+#include <maxscale/alloc.h>
+#include <maxscale/modutil.h>
+#include <maxscale/query_classifier.h>
+
+using std::set;
+using std::string;
+
+
+namespace
+{
+
+const size_t INMEMORY_KEY_LENGTH = 2 * SHA512_DIGEST_LENGTH;
+
+#if INMEMORY_KEY_LENGTH > CACHE_KEY_MAXLEN
+#error storage_inmemory key is too long.
+#endif
+
+}
+
+InMemoryStorage::InMemoryStorage(const string& name,
+                                 uint32_t ttl)
+    : name_(name)
+    , ttl_(ttl)
+{
+}
+
+InMemoryStorage::~InMemoryStorage()
+{
+}
+
+cache_result_t InMemoryStorage::get_key(const char* zdefault_db, const GWBUF* pquery, CACHE_KEY* pkey)
+{
+    ss_dassert(GWBUF_IS_CONTIGUOUS(pquery));
+
+    int n;
+    bool fullnames = true;
+    char** pztables = qc_get_table_names(const_cast<GWBUF*>(pquery), &n, fullnames);
+
+    set<string> dbs; // Elements in set are sorted.
+
+    for (int i = 0; i < n; ++i)
+    {
+        char *ztable = pztables[i];
+        char *zdot = strchr(ztable, '.');
+
+        if (zdot)
+        {
+            *zdot = 0;
+            dbs.insert(ztable);
+        }
+        else if (zdefault_db)
+        {
+            // If zdefault_db is NULL, then there will be a table for which we
+            // do not know the database. However, that will fail in the server,
+            // so nothing will be stored.
+            dbs.insert(zdefault_db);
+        }
+        MXS_FREE(ztable);
+    }
+    MXS_FREE(pztables);
+
+    // dbs now contain each accessed database in sorted order. Now copy them to a single string.
+    string tag;
+    for (set<string>::const_iterator i = dbs.begin(); i != dbs.end(); ++i)
+    {
+        tag.append(*i);
+    }
+
+    memset(pkey->data, 0, CACHE_KEY_MAXLEN);
+
+    const unsigned char* pdata;
+
+    // We store the databases in the first half of the key. That will ensure that
+    // identical queries targeting different default databases will not clash.
+    // This will also mean that entries related to the same databases will
+    // be placed near each other.
+    pdata = reinterpret_cast<const unsigned char*>(tag.data());
+    SHA512(pdata, tag.length(), reinterpret_cast<unsigned char*>(pkey->data));
+
+    char *psql;
+    int length;
+
+    modutil_extract_SQL(const_cast<GWBUF*>(pquery), &psql, &length);
+
+    // Then we store the query itself in the second half of the key.
+    pdata = reinterpret_cast<const unsigned char*>(psql);
+    SHA512(pdata, length, reinterpret_cast<unsigned char*>(pkey->data) + SHA512_DIGEST_LENGTH);
+
+    return CACHE_RESULT_OK;
+}
+
+cache_result_t InMemoryStorage::do_get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult)
+{
+    cache_result_t result = CACHE_RESULT_NOT_FOUND;
+
+    Entries::iterator i = entries_.find(key);
+
+    if (i != entries_.end())
+    {
+        Entry& entry = i->second;
+
+        uint32_t now = time(NULL);
+
+        bool is_stale = (now - entry.time > ttl_);
+
+        if (!is_stale || ((flags & CACHE_FLAGS_INCLUDE_STALE) != 0))
+        {
+            size_t length = entry.value.size();
+
+            *ppresult = gwbuf_alloc(length);
+
+            if (*ppresult)
+            {
+                memcpy(GWBUF_DATA(*ppresult), entry.value.data(), length);
+
+                if (is_stale)
+                {
+                    result = CACHE_RESULT_STALE;
+                }
+                else
+                {
+                    result = CACHE_RESULT_OK;
+                }
+            }
+        }
+        else
+        {
+            MXS_NOTICE("Cache item is stale, not using.");
+            result = CACHE_RESULT_NOT_FOUND;
+        }
+    }
+    else
+    {
+        result = CACHE_RESULT_NOT_FOUND;
+    }
+
+    return result;
+}
+
+cache_result_t InMemoryStorage::do_put_value(const CACHE_KEY& key, const GWBUF* pvalue)
+{
+    ss_dassert(GWBUF_IS_CONTIGUOUS(pvalue));
+
+    const uint8_t* pdata = GWBUF_DATA(pvalue);
+    size_t size = GWBUF_LENGTH(pvalue);
+
+    Entry& entry = entries_[key];
+
+    if (size < entry.value.capacity())
+    {
+        // If the needed value is less than what is currently stored,
+        // we shrink the buffer so as not to waste space.
+        Value value(size);
+        entry.value.swap(value);
+    }
+    else
+    {
+        entry.value.resize(size);
+    }
+
+    copy(GWBUF_DATA(pvalue), GWBUF_DATA(pvalue) + size, entry.value.begin());
+    entry.time = time(NULL);
+
+    return CACHE_RESULT_OK;
+}
+
+cache_result_t InMemoryStorage::do_del_value(const CACHE_KEY& key)
+{
+    Entries::iterator i = entries_.find(key);
+
+    if (i != entries_.end())
+    {
+        entries_.erase(i);
+    }
+
+    return i != entries_.end() ? CACHE_RESULT_OK : CACHE_RESULT_NOT_FOUND;
+}
diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.h b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.h
new file mode 100644
index 000000000..d5bfeb610
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.h
@@ -0,0 +1,62 @@
+#pragma once
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include <maxscale/cdefs.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <tr1/unordered_map>
+#include "../../cachefilter.h"
+
+class InMemoryStorage
+{
+public:
+    virtual ~InMemoryStorage();
+
+    cache_result_t get_key(const char* zdefault_db, const GWBUF* pquery, CACHE_KEY* pkey);
+
+    virtual cache_result_t get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult) = 0;
+    virtual cache_result_t put_value(const CACHE_KEY& key, const GWBUF* pvalue) = 0;
+    virtual cache_result_t del_value(const CACHE_KEY& key) = 0;
+
+protected:
+    InMemoryStorage(const std::string& name, uint32_t ttl);
+
+    cache_result_t do_get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult);
+    cache_result_t do_put_value(const CACHE_KEY& key, const GWBUF* pvalue);
+    cache_result_t do_del_value(const CACHE_KEY& key);
+
+private:
+    InMemoryStorage(const InMemoryStorage&);
+    InMemoryStorage& operator = (const InMemoryStorage&);
+
+private:
+    typedef std::vector<uint8_t> Value;
+
+    struct Entry
+    {
+        Entry()
+        : time(0)
+        {}
+
+        uint32_t time;
+        Value    value;
+    };
+
+    typedef std::tr1::unordered_map<CACHE_KEY, Entry> Entries;
+
+    std::string name_;
+    uint32_t    ttl_;
+    Entries     entries_;
+};
diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc
new file mode 100644
index 000000000..b5497c27f
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#define MXS_MODULE_NAME "storage_inmemory"
+#include "inmemorystoragemt.h"
+
+InMemoryStorageMT::InMemoryStorageMT(const std::string& name, uint32_t ttl)
+    : InMemoryStorage(name, ttl)
+{
+    spinlock_init(&lock_);
+}
+
+InMemoryStorageMT::~InMemoryStorageMT()
+{
+}
+
+// static
+InMemoryStorageMT* InMemoryStorageMT::create(const std::string& name,
+                                             uint32_t ttl,
+                                             int argc, char* argv[])
+{
+    return new InMemoryStorageMT(name, ttl);
+}
+
+cache_result_t InMemoryStorageMT::get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult)
+{
+    spinlock_acquire(&lock_);
+    cache_result_t result = do_get_value(key, flags, ppresult);
+    spinlock_release(&lock_);
+
+    return result;
+}
+
+cache_result_t InMemoryStorageMT::put_value(const CACHE_KEY& key, const GWBUF* pvalue)
+{
+    spinlock_acquire(&lock_);
+    cache_result_t result = do_put_value(key, pvalue);
+    spinlock_release(&lock_);
+
+    return result;
+}
+
+cache_result_t InMemoryStorageMT::del_value(const CACHE_KEY& key)
+{
+    spinlock_acquire(&lock_);
+    cache_result_t result = do_del_value(key);
+    spinlock_release(&lock_);
+
+    return result;
+}
diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.h b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.h
new file mode 100644
index 000000000..0ad14d36e
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.h
@@ -0,0 +1,39 @@
+#pragma once
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include <maxscale/cdefs.h>
+#include <maxscale/spinlock.h>
+#include "inmemorystorage.h"
+
+class InMemoryStorageMT : public InMemoryStorage
+{
+public:
+    ~InMemoryStorageMT();
+
+    static InMemoryStorageMT* create(const std::string& name, uint32_t ttl, int argc, char* argv[]);
+
+    cache_result_t get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult);
+    cache_result_t put_value(const CACHE_KEY& key, const GWBUF* pvalue);
+    cache_result_t del_value(const CACHE_KEY& key);
+
+private:
+    InMemoryStorageMT(const std::string& name, uint32_t ttl);
+
+private:
+    InMemoryStorageMT(const InMemoryStorageMT&);
+    InMemoryStorageMT& operator = (const InMemoryStorageMT&);
+
+private:
+    SPINLOCK lock_;
+};
diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc
new file mode 100644
index 000000000..0b81dd799
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#define MXS_MODULE_NAME "storage_inmemory"
+#include "inmemorystoragest.h"
+
+InMemoryStorageST::InMemoryStorageST(const std::string& name, uint32_t ttl)
+    : InMemoryStorage(name, ttl)
+{
+}
+
+InMemoryStorageST::~InMemoryStorageST()
+{
+}
+
+// static
+InMemoryStorageST* InMemoryStorageST::create(const std::string& name,
+                                             uint32_t ttl,
+                                             int argc, char* argv[])
+{
+    return new InMemoryStorageST(name, ttl);
+}
+
+cache_result_t InMemoryStorageST::get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult)
+{
+    return do_get_value(key, flags, ppresult);
+}
+
+cache_result_t InMemoryStorageST::put_value(const CACHE_KEY& key, const GWBUF* pvalue)
+{
+    return do_put_value(key, pvalue);
+}
+
+cache_result_t InMemoryStorageST::del_value(const CACHE_KEY& key)
+{
+    return do_del_value(key);
+}
diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.h b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.h
new file mode 100644
index 000000000..4dac01c5d
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.h
@@ -0,0 +1,35 @@
+#pragma once
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include <maxscale/cdefs.h>
+#include "inmemorystorage.h"
+
+class InMemoryStorageST : public InMemoryStorage
+{
+public:
+    ~InMemoryStorageST();
+
+    static InMemoryStorageST* create(const std::string& name, uint32_t ttl, int argc, char* argv[]);
+
+    cache_result_t get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult);
+    cache_result_t put_value(const CACHE_KEY& key, const GWBUF* pvalue);
+    cache_result_t del_value(const CACHE_KEY& key);
+
+private:
+    InMemoryStorageST(const std::string& name, uint32_t ttl);
+
+private:
+    InMemoryStorageST(const InMemoryStorageST&);
+    InMemoryStorageST& operator = (const InMemoryStorageST&);
+};
diff --git a/server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc b/server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc
new file mode 100644
index 000000000..b9cb1cd69
--- /dev/null
+++ b/server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#define MXS_MODULE_NAME "storage_inmemory"
+#include <inttypes.h>
+#include "../../cache_storage_api.h"
+#include "inmemorystoragest.h"
+#include "inmemorystoragemt.h"
+
+namespace
+{
+
+bool initialize(uint32_t* pcapabilities)
+{
+    *pcapabilities = CACHE_STORAGE_CAP_ST;
+    *pcapabilities = CACHE_STORAGE_CAP_MT;
+
+    return true;
+}
+
+CACHE_STORAGE* createInstance(cache_thread_model_t model,
+                              const char* zname,
+                              uint32_t ttl,
+                              uint32_t max_count,
+                              uint64_t max_size,
+                              int argc, char* argv[])
+{
+    ss_dassert(zname);
+
+    CACHE_STORAGE* pStorage = 0;
+
+    if (max_count != 0)
+    {
+        MXS_WARNING("A maximum item count of %" PRIu32 " specified, although 'storage_inMemory' "
+                    "does not enforce such a limit.", max_count);
+    }
+
+    if (max_size != 0)
+    {
+        MXS_WARNING("A maximum size of %" PRIu64 " specified, although 'storage_inMemory' "
+                    "does not enforce such a limit.", max_size);
+    }
+
+    try
+    {
+        switch (model)
+        {
+        case CACHE_THREAD_MODEL_ST:
+            pStorage = reinterpret_cast<CACHE_STORAGE*>(InMemoryStorageST::create(zname, ttl, argc, argv));
+            break;
+
+        default:
+            MXS_ERROR("Unknown thread model %d, creating multi-thread aware storage.", (int)model);
+        case CACHE_THREAD_MODEL_MT:
+            pStorage = reinterpret_cast<CACHE_STORAGE*>(InMemoryStorageST::create(zname, ttl, argc, argv));
+        }
+
+        MXS_NOTICE("Storage module created.");
+    }
+    catch (const std::bad_alloc&)
+    {
+        MXS_OOM();
+    }
+    catch (const std::exception& x)
+    {
+        MXS_ERROR("Standard exception caught: %s", x.what());
+    }
+    catch (...)
+    {
+        MXS_ERROR("Unknown exception caught.");
+    }
+
+    return pStorage;
+}
+
+void freeInstance(CACHE_STORAGE* pinstance)
+{
+    delete reinterpret_cast<InMemoryStorage*>(pinstance);
+}
+
+cache_result_t getKey(CACHE_STORAGE* pstorage,
+                      const char* zdefault_db,
+                      const GWBUF* pquery,
+                      CACHE_KEY* pkey)
+{
+    ss_dassert(pstorage);
+    // zdefault_db may be NULL.
+    ss_dassert(pquery);
+    ss_dassert(pkey);
+
+    cache_result_t result = CACHE_RESULT_ERROR;
+
+    try
+    {
+        result = reinterpret_cast<InMemoryStorage*>(pstorage)->get_key(zdefault_db, pquery, pkey);
+    }
+    catch (const std::bad_alloc&)
+    {
+        MXS_OOM();
+    }
+    catch (const std::exception& x)
+    {
+        MXS_ERROR("Standard exception caught: %s", x.what());
+    }
+    catch (...)
+    {
+        MXS_ERROR("Unknown exception caught.");
+    }
+
+    return result;
+}
+
+cache_result_t getValue(CACHE_STORAGE* pstorage,
+                        const CACHE_KEY* pkey,
+                        uint32_t flags,
+                        GWBUF** ppresult)
+{
+    ss_dassert(pstorage);
+    ss_dassert(pkey);
+    ss_dassert(ppresult);
+
+    cache_result_t result = CACHE_RESULT_ERROR;
+
+    try
+    {
+        result = reinterpret_cast<InMemoryStorage*>(pstorage)->get_value(*pkey, flags, ppresult);
+    }
+    catch (const std::bad_alloc&)
+    {
+        MXS_OOM();
+    }
+    catch (const std::exception& x)
+    {
+        MXS_ERROR("Standard exception caught: %s", x.what());
+    }
+    catch (...)
+    {
+        MXS_ERROR("Unknown exception caught.");
+    }
+
+    return result;
+}
+
+cache_result_t putValue(CACHE_STORAGE* pstorage,
+                        const CACHE_KEY* pkey,
+                        const GWBUF* pvalue)
+{
+    ss_dassert(pstorage);
+    ss_dassert(pkey);
+    ss_dassert(pvalue);
+
+    cache_result_t result = CACHE_RESULT_ERROR;
+
+    try
+    {
+        result = reinterpret_cast<InMemoryStorage*>(pstorage)->put_value(*pkey, pvalue);
+    }
+    catch (const std::bad_alloc&)
+    {
+        MXS_OOM();
+    }
+    catch (const std::exception& x)
+    {
+        MXS_ERROR("Standard exception caught: %s", x.what());
+    }
+    catch (...)
+    {
+        MXS_ERROR("Unknown exception caught.");
+    }
+
+    return result;
+}
+
+cache_result_t delValue(CACHE_STORAGE* pstorage,
+                        const CACHE_KEY* pkey)
+{
+    ss_dassert(pstorage);
+    ss_dassert(pkey);
+
+    cache_result_t result = CACHE_RESULT_ERROR;
+
+    try
+    {
+        result = reinterpret_cast<InMemoryStorage*>(pstorage)->del_value(*pkey);
+    }
+    catch (const std::bad_alloc&)
+    {
+        MXS_OOM();
+    }
+    catch (const std::exception& x)
+    {
+        MXS_ERROR("Standard exception caught: %s", x.what());
+    }
+    catch (...)
+    {
+        MXS_ERROR("Unknown exception caught.");
+    }
+
+    return result;
+}
+
+}
+
+extern "C"
+{
+
+CACHE_STORAGE_API* CacheGetStorageAPI()
+{
+    static CACHE_STORAGE_API api =
+        {
+            initialize,
+            createInstance,
+            freeInstance,
+            getKey,
+            getValue,
+            putValue,
+            delValue,
+        };
+
+    return &api;
+}
+
+}

From efa8713b175dfaa7142ccb4c422b739e1c8ee224 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 22:07:45 +0200
Subject: [PATCH 07/42] Cache: Include inmemory storage in build

Add logging note to storage_rocksdb at the same time.
---
 server/modules/filter/cache/storage/CMakeLists.txt              | 1 +
 .../filter/cache/storage/storage_rocksdb/storage_rocksdb.cc     | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/server/modules/filter/cache/storage/CMakeLists.txt b/server/modules/filter/cache/storage/CMakeLists.txt
index fbf6d7e57..52287a9f9 100644
--- a/server/modules/filter/cache/storage/CMakeLists.txt
+++ b/server/modules/filter/cache/storage/CMakeLists.txt
@@ -1 +1,2 @@
 add_subdirectory(storage_rocksdb)
+add_subdirectory(storage_inmemory)
diff --git a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
index 4de3d6868..8f68de876 100644
--- a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
+++ b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc
@@ -53,6 +53,8 @@ CACHE_STORAGE* createInstance(cache_thread_model_t, // Ignored, RocksDB always M
     try
     {
         pStorage = reinterpret_cast<CACHE_STORAGE*>(RocksDBStorage::Create(zName, ttl, argc, argv));
+
+        MXS_NOTICE("Storage module created.");
     }
     catch (const std::bad_alloc&)
     {

From 2423031df8d78e8301fa4ebed10c67df82c21120 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 22:09:27 +0200
Subject: [PATCH 08/42] Cache: Use lock guard for locking spinlocks

Now the locks will always be release, also in the presence of an
unantecipated exception.
---
 server/modules/filter/cache/cachefilter.h     | 25 +++++++++++++++++++
 server/modules/filter/cache/cachemt.cc        | 12 ++++-----
 server/modules/filter/cache/lrustoragemt.cc   | 18 +++++--------
 .../storage_inmemory/inmemorystoragemt.cc     | 18 +++++--------
 4 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/server/modules/filter/cache/cachefilter.h b/server/modules/filter/cache/cachefilter.h
index bc6ce5e21..2da6bcfaa 100644
--- a/server/modules/filter/cache/cachefilter.h
+++ b/server/modules/filter/cache/cachefilter.h
@@ -106,6 +106,31 @@ struct hash<CACHE_KEY>
 
 }
 
+/**
+ * LockGuard is a RAII class whose constructor acquires a spinlock and
+ * destructor releases the same spinlock. To be used for locking a spinlock
+ * in an exceptionsafe manner for the duration of a scope.
+ */
+class LockGuard
+{
+public:
+    LockGuard(SPINLOCK* plock)
+        : lock_(*plock)
+    {
+        spinlock_acquire(&lock_);
+    }
+    ~LockGuard()
+    {
+        spinlock_release(&lock_);
+    }
+
+private:
+    LockGuard(const LockGuard&);
+    LockGuard& operator = (const LockGuard&);
+
+    SPINLOCK& lock_;
+};
+
 #define CPP_GUARD(statement)\
     do { try { statement; }                                              \
     catch (const std::exception& x) { MXS_ERROR("Caught standard exception: %s", x.what()); }\
diff --git a/server/modules/filter/cache/cachemt.cc b/server/modules/filter/cache/cachemt.cc
index 53658115b..784b84b2a 100644
--- a/server/modules/filter/cache/cachemt.cc
+++ b/server/modules/filter/cache/cachemt.cc
@@ -69,18 +69,16 @@ CacheMT* CacheMT::Create(const std::string& name, StorageFactory* pFactory, cons
 
 bool CacheMT::must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache)
 {
-    spinlock_acquire(&m_lockPending);
-    bool rv = CacheSimple::do_must_refresh(key, pSessionCache);
-    spinlock_release(&m_lockPending);
+    LockGuard guard(&m_lockPending);
 
-    return rv;
+    return do_must_refresh(key, pSessionCache);
 }
 
 void CacheMT::refreshed(const CACHE_KEY& key,  const SessionCache* pSessionCache)
 {
-    spinlock_acquire(&m_lockPending);
-    CacheSimple::do_refreshed(key, pSessionCache);
-    spinlock_release(&m_lockPending);
+    LockGuard guard(&m_lockPending);
+
+    do_refreshed(key, pSessionCache);
 }
 
 // static
diff --git a/server/modules/filter/cache/lrustoragemt.cc b/server/modules/filter/cache/lrustoragemt.cc
index 5d61c3c68..7a3c5a8fb 100644
--- a/server/modules/filter/cache/lrustoragemt.cc
+++ b/server/modules/filter/cache/lrustoragemt.cc
@@ -39,28 +39,22 @@ cache_result_t LRUStorageMT::get_value(const CACHE_KEY& key,
                                        uint32_t flags,
                                        GWBUF** ppvalue)
 {
-    spinlock_acquire(&lock_);
-    cache_result_t rv =  LRUStorage::do_get_value(key, flags, ppvalue);
-    spinlock_release(&lock_);
+    LockGuard guard(&lock_);
 
-    return rv;
+    return do_get_value(key, flags, ppvalue);
 }
 
 cache_result_t LRUStorageMT::put_value(const CACHE_KEY& key,
                                        const GWBUF* pvalue)
 {
-    spinlock_acquire(&lock_);
-    cache_result_t rv =  LRUStorage::do_put_value(key, pvalue);
-    spinlock_release(&lock_);
+    LockGuard guard(&lock_);
 
-    return rv;
+    return do_put_value(key, pvalue);
 }
 
 cache_result_t LRUStorageMT::del_value(const CACHE_KEY& key)
 {
-    spinlock_acquire(&lock_);
-    cache_result_t rv =  LRUStorage::do_del_value(key);
-    spinlock_release(&lock_);
+    LockGuard guard(&lock_);
 
-    return rv;
+    return do_del_value(key);
 }
diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc
index b5497c27f..17ce84ec7 100644
--- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc
@@ -34,27 +34,21 @@ InMemoryStorageMT* InMemoryStorageMT::create(const std::string& name,
 
 cache_result_t InMemoryStorageMT::get_value(const CACHE_KEY& key, uint32_t flags, GWBUF** ppresult)
 {
-    spinlock_acquire(&lock_);
-    cache_result_t result = do_get_value(key, flags, ppresult);
-    spinlock_release(&lock_);
+    LockGuard guard(&lock_);
 
-    return result;
+    return do_get_value(key, flags, ppresult);
 }
 
 cache_result_t InMemoryStorageMT::put_value(const CACHE_KEY& key, const GWBUF* pvalue)
 {
-    spinlock_acquire(&lock_);
-    cache_result_t result = do_put_value(key, pvalue);
-    spinlock_release(&lock_);
+    LockGuard guard(&lock_);
 
-    return result;
+    return do_put_value(key, pvalue);
 }
 
 cache_result_t InMemoryStorageMT::del_value(const CACHE_KEY& key)
 {
-    spinlock_acquire(&lock_);
-    cache_result_t result = do_del_value(key);
-    spinlock_release(&lock_);
+    LockGuard guard(&lock_);
 
-    return result;
+    return do_del_value(key);
 }

From 2a2c53085799e4846fb07ae54f56ba2bad7bfbcb Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 29 Nov 2016 22:17:38 +0200
Subject: [PATCH 09/42] Cache: Trivial cleanup of inmemory storage

---
 .../storage/storage_inmemory/inmemorystorage.cc    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc
index 874c68f2b..3844e3768 100644
--- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc
+++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc
@@ -140,17 +140,16 @@ cache_result_t InMemoryStorage::do_get_value(const CACHE_KEY& key, uint32_t flag
                     result = CACHE_RESULT_OK;
                 }
             }
+            else
+            {
+                result = CACHE_RESULT_OUT_OF_RESOURCES;
+            }
         }
         else
         {
             MXS_NOTICE("Cache item is stale, not using.");
-            result = CACHE_RESULT_NOT_FOUND;
         }
     }
-    else
-    {
-        result = CACHE_RESULT_NOT_FOUND;
-    }
 
     return result;
 }
@@ -159,7 +158,6 @@ cache_result_t InMemoryStorage::do_put_value(const CACHE_KEY& key, const GWBUF*
 {
     ss_dassert(GWBUF_IS_CONTIGUOUS(pvalue));
 
-    const uint8_t* pdata = GWBUF_DATA(pvalue);
     size_t size = GWBUF_LENGTH(pvalue);
 
     Entry& entry = entries_[key];
@@ -176,7 +174,9 @@ cache_result_t InMemoryStorage::do_put_value(const CACHE_KEY& key, const GWBUF*
         entry.value.resize(size);
     }
 
-    copy(GWBUF_DATA(pvalue), GWBUF_DATA(pvalue) + size, entry.value.begin());
+    const uint8_t* pdata = GWBUF_DATA(pvalue);
+
+    copy(pdata, pdata + size, entry.value.begin());
     entry.time = time(NULL);
 
     return CACHE_RESULT_OK;

From 2e4ac55aa498a9f18814df92a6a99d34d8123b22 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Wed, 30 Nov 2016 09:09:08 +0200
Subject: [PATCH 10/42] cache: Add CacheRules class

Easier to use in a C++ context than the C API
---
 server/modules/filter/cache/rules.cc | 110 +++++++++++-------------
 server/modules/filter/cache/rules.h  | 122 +++++++++++++++++++++++++++
 2 files changed, 171 insertions(+), 61 deletions(-)

diff --git a/server/modules/filter/cache/rules.cc b/server/modules/filter/cache/rules.cc
index 26d6ee95a..473249043 100644
--- a/server/modules/filter/cache/rules.cc
+++ b/server/modules/filter/cache/rules.cc
@@ -156,13 +156,6 @@ static mysql_account_kind_t mysql_to_pcre(char *pcre, const char *mysql, pcre_qu
  * API begin
  */
 
-/**
- * Returns a string representation of a attribute.
- *
- * @param attribute An attribute type.
- *
- * @return Corresponding string, not to be freed.
- */
 const char *cache_rule_attribute_to_string(cache_rule_attribute_t attribute)
 {
     switch (attribute)
@@ -188,13 +181,6 @@ const char *cache_rule_attribute_to_string(cache_rule_attribute_t attribute)
     }
 }
 
-/**
- * Returns a string representation of an operator.
- *
- * @param op An operator.
- *
- * @return Corresponding string, not to be freed.
- */
 const char *cache_rule_op_to_string(cache_rule_op_t op)
 {
     switch (op)
@@ -217,13 +203,6 @@ const char *cache_rule_op_to_string(cache_rule_op_t op)
     }
 }
 
-/**
- * Create a default cache rules object.
- *
- * @param debug The debug level.
- *
- * @return The rules object or NULL is allocation fails.
- */
 CACHE_RULES *cache_rules_create(uint32_t debug)
 {
     CACHE_RULES *rules = (CACHE_RULES*)MXS_CALLOC(1, sizeof(CACHE_RULES));
@@ -236,14 +215,6 @@ CACHE_RULES *cache_rules_create(uint32_t debug)
     return rules;
 }
 
-/**
- * Loads the caching rules from a file and returns corresponding object.
- *
- * @param path  The path of the file containing the rules.
- * @param debug The debug level.
- *
- * @return The corresponding rules object, or NULL in case of error.
- */
 CACHE_RULES *cache_rules_load(const char *path, uint32_t debug)
 {
     CACHE_RULES *rules = NULL;
@@ -279,14 +250,6 @@ CACHE_RULES *cache_rules_load(const char *path, uint32_t debug)
     return rules;
 }
 
-/**
- * Parses the caching rules from a string and returns corresponding object.
- *
- * @param json  String containing json.
- * @param debug The debug level.
- *
- * @return The corresponding rules object, or NULL in case of error.
- */
 CACHE_RULES *cache_rules_parse(const char *json, uint32_t debug)
 {
     CACHE_RULES *rules = NULL;
@@ -308,13 +271,6 @@ CACHE_RULES *cache_rules_parse(const char *json, uint32_t debug)
     return rules;
 }
 
-/**
- * Frees the rules object.
- *
- * @param path The path of the file containing the rules.
- *
- * @return The corresponding rules object, or NULL in case of error.
- */
 void cache_rules_free(CACHE_RULES *rules)
 {
     if (rules)
@@ -325,15 +281,6 @@ void cache_rules_free(CACHE_RULES *rules)
     }
 }
 
-/**
- * Returns boolean indicating whether the result of the query should be stored.
- *
- * @param self       The CACHE_RULES object.
- * @param default_db The current default database, NULL if there is none.
- * @param query      The query, expected to contain a COM_QUERY.
- *
- * @return True, if the results should be stored.
- */
 bool cache_rules_should_store(CACHE_RULES *self, const char *default_db, const GWBUF* query)
 {
     bool should_store = false;
@@ -356,14 +303,6 @@ bool cache_rules_should_store(CACHE_RULES *self, const char *default_db, const G
     return should_store;
 }
 
-/**
- * Returns boolean indicating whether the cache should be used, that is consulted.
- *
- * @param self     The CACHE_RULES object.
- * @param session  The current session.
- *
- * @return True, if the cache should be used.
- */
 bool cache_rules_should_use(CACHE_RULES *self, const SESSION *session)
 {
     bool should_use = false;
@@ -401,6 +340,55 @@ bool cache_rules_should_use(CACHE_RULES *self, const SESSION *session)
     return should_use;
 }
 
+
+CacheRules::CacheRules(CACHE_RULES* prules)
+    : prules_(prules)
+{
+}
+
+CacheRules::~CacheRules()
+{
+    cache_rules_free(prules_);
+}
+
+CacheRules* CacheRules::create(uint32_t debug)
+{
+    CacheRules* pthis = NULL;
+
+    CACHE_RULES* prules = cache_rules_create(debug);
+
+    if (prules)
+    {
+        pthis = new (std::nothrow) CacheRules(prules);
+    }
+
+    return pthis;
+}
+
+CacheRules* CacheRules::load(const char *zpath, uint32_t debug)
+{
+    CacheRules* pthis = NULL;
+
+    CACHE_RULES* prules = cache_rules_load(zpath, debug);
+
+    if (prules)
+    {
+        pthis = new (std::nothrow) CacheRules(prules);
+    }
+
+    return pthis;
+}
+
+bool CacheRules::should_store(const char* zdefault_db, const GWBUF* pquery) const
+{
+    return cache_rules_should_store(prules_, zdefault_db, pquery);
+}
+
+bool CacheRules::should_use(const SESSION* psession) const
+{
+    return cache_rules_should_use(prules_, psession);
+}
+
 /*
  * API end
  */
diff --git a/server/modules/filter/cache/rules.h b/server/modules/filter/cache/rules.h
index 703c24d91..13bcaf55c 100644
--- a/server/modules/filter/cache/rules.h
+++ b/server/modules/filter/cache/rules.h
@@ -68,18 +68,140 @@ typedef struct cache_rules
     CACHE_RULE *use_rules;    // The rules for when to use data from the cache.
 } CACHE_RULES;
 
+/**
+ * Returns a string representation of a attribute.
+ *
+ * @param attribute An attribute type.
+ *
+ * @return Corresponding string, not to be freed.
+ */
 const char *cache_rule_attribute_to_string(cache_rule_attribute_t attribute);
+
+/**
+ * Returns a string representation of an operator.
+ *
+ * @param op An operator.
+ *
+ * @return Corresponding string, not to be freed.
+ */
 const char *cache_rule_op_to_string(cache_rule_op_t op);
 
+/**
+ * Create a default cache rules object.
+ *
+ * @param debug The debug level.
+ *
+ * @return The rules object or NULL is allocation fails.
+ */
 CACHE_RULES *cache_rules_create(uint32_t debug);
+
+/**
+ * Frees the rules object.
+ *
+ * @param path The path of the file containing the rules.
+ *
+ * @return The corresponding rules object, or NULL in case of error.
+ */
 void cache_rules_free(CACHE_RULES *rules);
 
+/**
+ * Loads the caching rules from a file and returns corresponding object.
+ *
+ * @param path  The path of the file containing the rules.
+ * @param debug The debug level.
+ *
+ * @return The corresponding rules object, or NULL in case of error.
+ */
 CACHE_RULES *cache_rules_load(const char *path, uint32_t debug);
+
+/**
+ * Parses the caching rules from a string and returns corresponding object.
+ *
+ * @param json  String containing json.
+ * @param debug The debug level.
+ *
+ * @return The corresponding rules object, or NULL in case of error.
+ */
 CACHE_RULES *cache_rules_parse(const char *json, uint32_t debug);
 
+/**
+ * Returns boolean indicating whether the result of the query should be stored.
+ *
+ * @param rules      The CACHE_RULES object.
+ * @param default_db The current default database, NULL if there is none.
+ * @param query      The query, expected to contain a COM_QUERY.
+ *
+ * @return True, if the results should be stored.
+ */
 bool cache_rules_should_store(CACHE_RULES *rules, const char *default_db, const GWBUF* query);
+
+/**
+ * Returns boolean indicating whether the cache should be used, that is consulted.
+ *
+ * @param rules    The CACHE_RULES object.
+ * @param session  The current session.
+ *
+ * @return True, if the cache should be used.
+ */
 bool cache_rules_should_use(CACHE_RULES *rules, const SESSION *session);
 
 MXS_END_DECLS
 
+#if defined(__cplusplus)
+
+class CacheRules
+{
+public:
+    ~CacheRules();
+
+    /**
+     * Creates an empty rules object.
+     *
+     * @param debug The debug level.
+     *
+     * @return An empty rules object, or NULL in case of error.
+     */
+    CacheRules* create(uint32_t debug);
+
+    /**
+     * Loads the caching rules from a file and returns corresponding object.
+     *
+     * @param path  The path of the file containing the rules.
+     * @param debug The debug level.
+     *
+     * @return The corresponding rules object, or NULL in case of error.
+     */
+    CacheRules* load(const char *zpath, uint32_t debug);
+
+   /**
+    * Returns boolean indicating whether the result of the query should be stored.
+    *
+    * @param zdefault_db The current default database, NULL if there is none.
+    * @param pquery      The query, expected to contain a COM_QUERY.
+    *
+    * @return True, if the results should be stored.
+    */
+    bool should_store(const char* zdefault_db, const GWBUF* pquery) const;
+
+   /**
+    * Returns boolean indicating whether the cache should be used, that is consulted.
+    *
+    * @param psession  The current session.
+    *
+    * @return True, if the cache should be used.
+    */
+    bool should_use(const SESSION* psession) const;
+
+private:
+    CacheRules(CACHE_RULES* prules);
+
+    CacheRules(const CacheRules&);
+    CacheRules& operator = (const CacheRules&);
+
+private:
+    CACHE_RULES* prules_;
+};
+
+#endif
+
 #endif

From d52145054f8d046dfb9dde5f4dadaa7c71ae7451 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Wed, 30 Nov 2016 09:27:30 +0200
Subject: [PATCH 11/42] Cache: Now use CacheRules instead of CACHE_RULES

---
 server/modules/filter/cache/cache.cc       | 26 +++++++++++-----------
 server/modules/filter/cache/cache.h        |  8 +++----
 server/modules/filter/cache/cachemt.cc     | 16 ++++++-------
 server/modules/filter/cache/cachemt.h      |  4 ++--
 server/modules/filter/cache/cachept.cc     | 10 ++++-----
 server/modules/filter/cache/cachept.h      |  4 ++--
 server/modules/filter/cache/cachesimple.cc | 10 ++++-----
 server/modules/filter/cache/cachesimple.h  |  6 ++---
 server/modules/filter/cache/cachest.cc     | 16 ++++++-------
 server/modules/filter/cache/cachest.h      |  4 ++--
 server/modules/filter/cache/rules.cc       |  2 ++
 server/modules/filter/cache/rules.h        |  4 ++--
 12 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/server/modules/filter/cache/cache.cc b/server/modules/filter/cache/cache.cc
index 3ea4e4fd7..5c6ec8f43 100644
--- a/server/modules/filter/cache/cache.cc
+++ b/server/modules/filter/cache/cache.cc
@@ -19,10 +19,10 @@
 #include "storagefactory.h"
 #include "storage.h"
 
-Cache::Cache(const std::string& name,
+Cache::Cache(const std::string&  name,
              const CACHE_CONFIG* pConfig,
-             CACHE_RULES* pRules,
-             StorageFactory* pFactory)
+             CacheRules*         pRules,
+             StorageFactory*     pFactory)
     : m_name(name)
     , m_config(*pConfig)
     , m_pRules(pRules)
@@ -32,23 +32,23 @@ Cache::Cache(const std::string& name,
 
 Cache::~Cache()
 {
-    cache_rules_free(m_pRules);
+    delete m_pRules;
     delete m_pFactory;
 }
 
 //static
 bool Cache::Create(const CACHE_CONFIG& config,
-                   CACHE_RULES**       ppRules)
+                   CacheRules**        ppRules)
 {
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
 
     if (config.rules)
     {
-        pRules = cache_rules_load(config.rules, config.debug);
+        pRules = CacheRules::load(config.rules, config.debug);
     }
     else
     {
-        pRules = cache_rules_create(config.debug);
+        pRules = CacheRules::create(config.debug);
     }
 
     if (pRules)
@@ -65,10 +65,10 @@ bool Cache::Create(const CACHE_CONFIG& config,
 
 //static
 bool Cache::Create(const CACHE_CONFIG& config,
-                   CACHE_RULES**       ppRules,
+                   CacheRules**        ppRules,
                    StorageFactory**    ppFactory)
 {
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
     StorageFactory* pFactory = NULL;
 
     if (Create(config, &pRules))
@@ -83,7 +83,7 @@ bool Cache::Create(const CACHE_CONFIG& config,
         else
         {
             MXS_ERROR("Could not open storage factory '%s'.", config.storage);
-            cache_rules_free(pRules);
+            delete pRules;
         }
     }
 
@@ -92,10 +92,10 @@ bool Cache::Create(const CACHE_CONFIG& config,
 
 bool Cache::should_store(const char* zDefaultDb, const GWBUF* pQuery)
 {
-    return cache_rules_should_store(m_pRules, zDefaultDb, pQuery);
+    return m_pRules->should_store(zDefaultDb, pQuery);
 }
 
 bool Cache::should_use(const SESSION* pSession)
 {
-    return cache_rules_should_use(m_pRules, pSession);
+    return m_pRules->should_use(pSession);
 }
diff --git a/server/modules/filter/cache/cache.h b/server/modules/filter/cache/cache.h
index d8e857b27..77f22d253 100644
--- a/server/modules/filter/cache/cache.h
+++ b/server/modules/filter/cache/cache.h
@@ -77,14 +77,14 @@ public:
 protected:
     Cache(const std::string&  name,
           const CACHE_CONFIG* pConfig,
-          CACHE_RULES*        pRules,
+          CacheRules*         pRules,
           StorageFactory*     pFactory);
 
     static bool Create(const CACHE_CONFIG& config,
-                       CACHE_RULES**       ppRules);
+                       CacheRules**        ppRules);
 
     static bool Create(const CACHE_CONFIG& config,
-                       CACHE_RULES**       ppRules,
+                       CacheRules**        ppRules,
                        StorageFactory**    ppFactory);
 
 private:
@@ -94,6 +94,6 @@ private:
 protected:
     const std::string   m_name;     // The name of the instance; the section name in the config.
     const CACHE_CONFIG& m_config;   // The configuration of the cache instance.
-    CACHE_RULES*        m_pRules;   // The rules of the cache instance.
+    CacheRules*         m_pRules;   // The rules of the cache instance.
     StorageFactory*     m_pFactory; // The storage factory.
 };
diff --git a/server/modules/filter/cache/cachemt.cc b/server/modules/filter/cache/cachemt.cc
index 784b84b2a..c93390119 100644
--- a/server/modules/filter/cache/cachemt.cc
+++ b/server/modules/filter/cache/cachemt.cc
@@ -16,11 +16,11 @@
 #include "storage.h"
 #include "storagefactory.h"
 
-CacheMT::CacheMT(const std::string& name,
+CacheMT::CacheMT(const std::string&  name,
                  const CACHE_CONFIG* pConfig,
-                 CACHE_RULES* pRules,
-                 StorageFactory* pFactory,
-                 Storage* pStorage)
+                 CacheRules*         pRules,
+                 StorageFactory*     pFactory,
+                 Storage*            pStorage)
     : CacheSimple(name, pConfig, pRules, pFactory, pStorage)
 {
     spinlock_init(&m_lockPending);
@@ -38,7 +38,7 @@ CacheMT* CacheMT::Create(const std::string& name, const CACHE_CONFIG* pConfig)
 
     CacheMT* pCache = NULL;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
     StorageFactory* pFactory = NULL;
 
     if (CacheSimple::Create(*pConfig, &pRules, &pFactory))
@@ -57,7 +57,7 @@ CacheMT* CacheMT::Create(const std::string& name, StorageFactory* pFactory, cons
 
     CacheMT* pCache = NULL;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
 
     if (CacheSimple::Create(*pConfig, &pRules))
     {
@@ -84,7 +84,7 @@ void CacheMT::refreshed(const CACHE_KEY& key,  const SessionCache* pSessionCache
 // static
 CacheMT* CacheMT::Create(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CACHE_RULES*        pRules,
+                         CacheRules*         pRules,
                          StorageFactory*     pFactory)
 {
     CacheMT* pCache = NULL;
@@ -111,7 +111,7 @@ CacheMT* CacheMT::Create(const std::string&  name,
         if (!pCache)
         {
             delete pStorage;
-            cache_rules_free(pRules);
+            delete pRules;
             delete pFactory;
         }
     }
diff --git a/server/modules/filter/cache/cachemt.h b/server/modules/filter/cache/cachemt.h
index edf01d254..78abb2392 100644
--- a/server/modules/filter/cache/cachemt.h
+++ b/server/modules/filter/cache/cachemt.h
@@ -31,13 +31,13 @@ public:
 private:
     CacheMT(const std::string&  name,
             const CACHE_CONFIG* pConfig,
-            CACHE_RULES*        pRules,
+            CacheRules*         pRules,
             StorageFactory*     pFactory,
             Storage*            pStorage);
 
     static CacheMT* Create(const std::string&  name,
                            const CACHE_CONFIG* pConfig,
-                           CACHE_RULES*        pRules,
+                           CacheRules*         pRules,
                            StorageFactory*     pFactory);
 
 private:
diff --git a/server/modules/filter/cache/cachept.cc b/server/modules/filter/cache/cachept.cc
index 38884391a..a0b56ff13 100644
--- a/server/modules/filter/cache/cachept.cc
+++ b/server/modules/filter/cache/cachept.cc
@@ -47,7 +47,7 @@ inline int thread_index()
 
 CachePT::CachePT(const std::string&  name,
                  const CACHE_CONFIG* pConfig,
-                 CACHE_RULES*        pRules,
+                 CacheRules*         pRules,
                  StorageFactory*     pFactory,
                  const Caches&       caches)
     : Cache(name, pConfig, pRules, pFactory)
@@ -67,7 +67,7 @@ CachePT* CachePT::Create(const std::string& name, const CACHE_CONFIG* pConfig)
 
     CachePT* pCache = NULL;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
     StorageFactory* pFactory = NULL;
 
     if (Cache::Create(*pConfig, &pRules, &pFactory))
@@ -87,7 +87,7 @@ CachePT* CachePT::Create(const std::string& name,
 
     CachePT* pCache = NULL;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
 
     if (Cache::Create(*pConfig, &pRules))
     {
@@ -130,7 +130,7 @@ cache_result_t CachePT::del_value(const CACHE_KEY& key)
 // static
 CachePT* CachePT::Create(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CACHE_RULES*        pRules,
+                         CacheRules*         pRules,
                          StorageFactory*     pFactory)
 {
     CachePT* pCache = NULL;
@@ -176,7 +176,7 @@ CachePT* CachePT::Create(const std::string&  name,
     }
     catch (const std::exception&)
     {
-        cache_rules_free(pRules);
+        delete pRules;
         delete pFactory;
     }
 
diff --git a/server/modules/filter/cache/cachept.h b/server/modules/filter/cache/cachept.h
index 108345e9a..214cb46a9 100644
--- a/server/modules/filter/cache/cachept.h
+++ b/server/modules/filter/cache/cachept.h
@@ -43,13 +43,13 @@ private:
 
     CachePT(const std::string&  name,
             const CACHE_CONFIG* pConfig,
-            CACHE_RULES*        pRules,
+            CacheRules*         pRules,
             StorageFactory*     pFactory,
             const Caches&       caches);
 
     static CachePT* Create(const std::string&  name,
                            const CACHE_CONFIG* pConfig,
-                           CACHE_RULES*        pRules,
+                           CacheRules*         pRules,
                            StorageFactory*     pFactory);
 
     Cache& thread_cache();
diff --git a/server/modules/filter/cache/cachesimple.cc b/server/modules/filter/cache/cachesimple.cc
index fbe33a0b9..e8f0e1979 100644
--- a/server/modules/filter/cache/cachesimple.cc
+++ b/server/modules/filter/cache/cachesimple.cc
@@ -18,7 +18,7 @@
 
 CacheSimple::CacheSimple(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CACHE_RULES*        pRules,
+                         CacheRules*         pRules,
                          StorageFactory*     pFactory,
                          Storage*            pStorage)
     : Cache(name, pConfig, pRules, pFactory)
@@ -34,11 +34,11 @@ CacheSimple::~CacheSimple()
 
 // static
 bool CacheSimple::Create(const CACHE_CONFIG& config,
-                         CACHE_RULES**       ppRules)
+                         CacheRules**        ppRules)
 {
     int rv = false;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
 
     if (Cache::Create(config, &pRules))
     {
@@ -50,12 +50,12 @@ bool CacheSimple::Create(const CACHE_CONFIG& config,
 
 // static
 bool CacheSimple::Create(const CACHE_CONFIG& config,
-                         CACHE_RULES**       ppRules,
+                         CacheRules**        ppRules,
                          StorageFactory**    ppFactory)
 {
     int rv = false;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
     StorageFactory* pFactory = NULL;
 
     if (Cache::Create(config, &pRules, &pFactory))
diff --git a/server/modules/filter/cache/cachesimple.h b/server/modules/filter/cache/cachesimple.h
index 67ee58765..4677bf617 100644
--- a/server/modules/filter/cache/cachesimple.h
+++ b/server/modules/filter/cache/cachesimple.h
@@ -35,15 +35,15 @@ public:
 protected:
     CacheSimple(const std::string&  name,
                 const CACHE_CONFIG* pConfig,
-                CACHE_RULES*        pRules,
+                CacheRules*         pRules,
                 StorageFactory*     pFactory,
                 Storage*            pStorage);
 
     static bool Create(const CACHE_CONFIG& config,
-                       CACHE_RULES**       ppRules);
+                       CacheRules**        ppRules);
 
     static bool Create(const CACHE_CONFIG& config,
-                       CACHE_RULES**       ppRules,
+                       CacheRules**        ppRules,
                        StorageFactory**    ppFactory);
 
 
diff --git a/server/modules/filter/cache/cachest.cc b/server/modules/filter/cache/cachest.cc
index 6591c5b50..ce64fa65d 100644
--- a/server/modules/filter/cache/cachest.cc
+++ b/server/modules/filter/cache/cachest.cc
@@ -16,11 +16,11 @@
 #include "storage.h"
 #include "storagefactory.h"
 
-CacheST::CacheST(const std::string& name,
+CacheST::CacheST(const std::string&  name,
                  const CACHE_CONFIG* pConfig,
-                 CACHE_RULES* pRules,
-                 StorageFactory* pFactory,
-                 Storage* pStorage)
+                 CacheRules*         pRules,
+                 StorageFactory*     pFactory,
+                 Storage*            pStorage)
     : CacheSimple(name, pConfig, pRules, pFactory, pStorage)
 {
     MXS_NOTICE("Created single threaded cache.");
@@ -36,7 +36,7 @@ CacheST* CacheST::Create(const std::string& name, const CACHE_CONFIG* pConfig)
 
     CacheST* pCache = NULL;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
     StorageFactory* pFactory = NULL;
 
     if (CacheSimple::Create(*pConfig, &pRules, &pFactory))
@@ -55,7 +55,7 @@ CacheST* CacheST::Create(const std::string& name, StorageFactory* pFactory, cons
 
     CacheST* pCache = NULL;
 
-    CACHE_RULES* pRules = NULL;
+    CacheRules* pRules = NULL;
 
     if (CacheSimple::Create(*pConfig, &pRules))
     {
@@ -78,7 +78,7 @@ void CacheST::refreshed(const CACHE_KEY& key,  const SessionCache* pSessionCache
 // static
 CacheST* CacheST::Create(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CACHE_RULES*        pRules,
+                         CacheRules*         pRules,
                          StorageFactory*     pFactory)
 {
     CacheST* pCache = NULL;
@@ -105,7 +105,7 @@ CacheST* CacheST::Create(const std::string&  name,
         if (!pCache)
         {
             delete pStorage;
-            cache_rules_free(pRules);
+            delete pRules;
             delete pFactory;
         }
     }
diff --git a/server/modules/filter/cache/cachest.h b/server/modules/filter/cache/cachest.h
index 62903d85f..5e149408a 100644
--- a/server/modules/filter/cache/cachest.h
+++ b/server/modules/filter/cache/cachest.h
@@ -30,13 +30,13 @@ public:
 private:
     CacheST(const std::string&  name,
             const CACHE_CONFIG* pConfig,
-            CACHE_RULES*        pRules,
+            CacheRules*         pRules,
             StorageFactory*     pFactory,
             Storage*            pStorage);
 
     static CacheST* Create(const std::string&  name,
                            const CACHE_CONFIG* pConfig,
-                           CACHE_RULES*        pRules,
+                           CacheRules*         pRules,
                            StorageFactory*     pFactory);
 private:
     CacheST(const CacheST&);
diff --git a/server/modules/filter/cache/rules.cc b/server/modules/filter/cache/rules.cc
index 473249043..87db8f0e5 100644
--- a/server/modules/filter/cache/rules.cc
+++ b/server/modules/filter/cache/rules.cc
@@ -351,6 +351,7 @@ CacheRules::~CacheRules()
     cache_rules_free(prules_);
 }
 
+// static
 CacheRules* CacheRules::create(uint32_t debug)
 {
     CacheRules* pthis = NULL;
@@ -365,6 +366,7 @@ CacheRules* CacheRules::create(uint32_t debug)
     return pthis;
 }
 
+// static
 CacheRules* CacheRules::load(const char *zpath, uint32_t debug)
 {
     CacheRules* pthis = NULL;
diff --git a/server/modules/filter/cache/rules.h b/server/modules/filter/cache/rules.h
index 13bcaf55c..085c22f57 100644
--- a/server/modules/filter/cache/rules.h
+++ b/server/modules/filter/cache/rules.h
@@ -161,7 +161,7 @@ public:
      *
      * @return An empty rules object, or NULL in case of error.
      */
-    CacheRules* create(uint32_t debug);
+    static CacheRules* create(uint32_t debug);
 
     /**
      * Loads the caching rules from a file and returns corresponding object.
@@ -171,7 +171,7 @@ public:
      *
      * @return The corresponding rules object, or NULL in case of error.
      */
-    CacheRules* load(const char *zpath, uint32_t debug);
+    static CacheRules* load(const char *zpath, uint32_t debug);
 
    /**
     * Returns boolean indicating whether the result of the query should be stored.

From 93103fd64a60c2fccb4c4715526bc442c5b79b0e Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Wed, 30 Nov 2016 09:47:12 +0200
Subject: [PATCH 12/42] Cache: shared_ptr used with CacheRules and
 StorageFactory

shared_ptr is now used for managing the lifetime of CacheRules
and StorageFactory instances.
---
 server/modules/filter/cache/cache.cc       | 14 ++++-----
 server/modules/filter/cache/cache.h        | 12 +++++---
 server/modules/filter/cache/cachemt.cc     | 33 +++++++++++++---------
 server/modules/filter/cache/cachemt.h      | 10 +++----
 server/modules/filter/cache/cachept.cc     | 29 ++++++++++---------
 server/modules/filter/cache/cachept.h      | 10 +++----
 server/modules/filter/cache/cachesimple.cc |  6 ++--
 server/modules/filter/cache/cachesimple.h  |  4 +--
 server/modules/filter/cache/cachest.cc     | 33 +++++++++++++---------
 server/modules/filter/cache/cachest.h      | 10 +++----
 10 files changed, 88 insertions(+), 73 deletions(-)

diff --git a/server/modules/filter/cache/cache.cc b/server/modules/filter/cache/cache.cc
index 5c6ec8f43..9d865d2c4 100644
--- a/server/modules/filter/cache/cache.cc
+++ b/server/modules/filter/cache/cache.cc
@@ -21,19 +21,17 @@
 
 Cache::Cache(const std::string&  name,
              const CACHE_CONFIG* pConfig,
-             CacheRules*         pRules,
-             StorageFactory*     pFactory)
+             SCacheRules         sRules,
+             SStorageFactory     sFactory)
     : m_name(name)
     , m_config(*pConfig)
-    , m_pRules(pRules)
-    , m_pFactory(pFactory)
+    , m_sRules(sRules)
+    , m_sFactory(sFactory)
 {
 }
 
 Cache::~Cache()
 {
-    delete m_pRules;
-    delete m_pFactory;
 }
 
 //static
@@ -92,10 +90,10 @@ bool Cache::Create(const CACHE_CONFIG& config,
 
 bool Cache::should_store(const char* zDefaultDb, const GWBUF* pQuery)
 {
-    return m_pRules->should_store(zDefaultDb, pQuery);
+    return m_sRules->should_store(zDefaultDb, pQuery);
 }
 
 bool Cache::should_use(const SESSION* pSession)
 {
-    return m_pRules->should_use(pSession);
+    return m_sRules->should_use(pSession);
 }
diff --git a/server/modules/filter/cache/cache.h b/server/modules/filter/cache/cache.h
index 77f22d253..d6950bb1a 100644
--- a/server/modules/filter/cache/cache.h
+++ b/server/modules/filter/cache/cache.h
@@ -14,6 +14,7 @@
 
 #include <maxscale/cdefs.h>
 #include <tr1/functional>
+#include <tr1/memory>
 #include <string>
 #include <maxscale/buffer.h>
 #include <maxscale/session.h>
@@ -25,6 +26,9 @@ class SessionCache;
 class Cache
 {
 public:
+    typedef std::tr1::shared_ptr<CacheRules> SCacheRules;
+    typedef std::tr1::shared_ptr<StorageFactory> SStorageFactory;
+
     virtual ~Cache();
 
     const CACHE_CONFIG& config() const { return m_config; }
@@ -77,8 +81,8 @@ public:
 protected:
     Cache(const std::string&  name,
           const CACHE_CONFIG* pConfig,
-          CacheRules*         pRules,
-          StorageFactory*     pFactory);
+          SCacheRules         sRules,
+          SStorageFactory     sFactory);
 
     static bool Create(const CACHE_CONFIG& config,
                        CacheRules**        ppRules);
@@ -94,6 +98,6 @@ private:
 protected:
     const std::string   m_name;     // The name of the instance; the section name in the config.
     const CACHE_CONFIG& m_config;   // The configuration of the cache instance.
-    CacheRules*         m_pRules;   // The rules of the cache instance.
-    StorageFactory*     m_pFactory; // The storage factory.
+    SCacheRules         m_sRules;   // The rules of the cache instance.
+    SStorageFactory     m_sFactory; // The storage factory.
 };
diff --git a/server/modules/filter/cache/cachemt.cc b/server/modules/filter/cache/cachemt.cc
index c93390119..e399346c4 100644
--- a/server/modules/filter/cache/cachemt.cc
+++ b/server/modules/filter/cache/cachemt.cc
@@ -16,12 +16,14 @@
 #include "storage.h"
 #include "storagefactory.h"
 
+using std::tr1::shared_ptr;
+
 CacheMT::CacheMT(const std::string&  name,
                  const CACHE_CONFIG* pConfig,
-                 CacheRules*         pRules,
-                 StorageFactory*     pFactory,
+                 SCacheRules         sRules,
+                 SStorageFactory     sFactory,
                  Storage*            pStorage)
-    : CacheSimple(name, pConfig, pRules, pFactory, pStorage)
+    : CacheSimple(name, pConfig, sRules, sFactory, pStorage)
 {
     spinlock_init(&m_lockPending);
 
@@ -43,17 +45,20 @@ CacheMT* CacheMT::Create(const std::string& name, const CACHE_CONFIG* pConfig)
 
     if (CacheSimple::Create(*pConfig, &pRules, &pFactory))
     {
-        pCache = Create(name, pConfig, pRules, pFactory);
+        shared_ptr<CacheRules> sRules(pRules);
+        shared_ptr<StorageFactory> sFactory(pFactory);
+
+        pCache = Create(name, pConfig, sRules, sFactory);
     }
 
     return pCache;
 }
 
 // static
-CacheMT* CacheMT::Create(const std::string& name, StorageFactory* pFactory, const CACHE_CONFIG* pConfig)
+CacheMT* CacheMT::Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig)
 {
     ss_dassert(pConfig);
-    ss_dassert(pFactory);
+    ss_dassert(sFactory.get());
 
     CacheMT* pCache = NULL;
 
@@ -61,7 +66,9 @@ CacheMT* CacheMT::Create(const std::string& name, StorageFactory* pFactory, cons
 
     if (CacheSimple::Create(*pConfig, &pRules))
     {
-        pCache = Create(name, pConfig, pRules, pFactory);
+        shared_ptr<CacheRules> sRules(pRules);
+
+        pCache = Create(name, pConfig, sRules, sFactory);
     }
 
     return pCache;
@@ -84,8 +91,8 @@ void CacheMT::refreshed(const CACHE_KEY& key,  const SessionCache* pSessionCache
 // static
 CacheMT* CacheMT::Create(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CacheRules*         pRules,
-                         StorageFactory*     pFactory)
+                         SCacheRules         sRules,
+                         SStorageFactory     sFactory)
 {
     CacheMT* pCache = NULL;
 
@@ -96,7 +103,7 @@ CacheMT* CacheMT::Create(const std::string&  name,
     int argc = pConfig->storage_argc;
     char** argv = pConfig->storage_argv;
 
-    Storage* pStorage = pFactory->createStorage(CACHE_THREAD_MODEL_MT, name.c_str(),
+    Storage* pStorage = sFactory->createStorage(CACHE_THREAD_MODEL_MT, name.c_str(),
                                                 ttl, maxCount, maxSize,
                                                 argc, argv);
 
@@ -104,15 +111,13 @@ CacheMT* CacheMT::Create(const std::string&  name,
     {
         CPP_GUARD(pCache = new CacheMT(name,
                                        pConfig,
-                                       pRules,
-                                       pFactory,
+                                       sRules,
+                                       sFactory,
                                        pStorage));
 
         if (!pCache)
         {
             delete pStorage;
-            delete pRules;
-            delete pFactory;
         }
     }
 
diff --git a/server/modules/filter/cache/cachemt.h b/server/modules/filter/cache/cachemt.h
index 78abb2392..e2d776e8c 100644
--- a/server/modules/filter/cache/cachemt.h
+++ b/server/modules/filter/cache/cachemt.h
@@ -22,7 +22,7 @@ public:
     ~CacheMT();
 
     static CacheMT* Create(const std::string& name, const CACHE_CONFIG* pConfig);
-    static CacheMT* Create(const std::string& name, StorageFactory* pFactory, const CACHE_CONFIG* pConfig);
+    static CacheMT* Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig);
 
     bool must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache);
 
@@ -31,14 +31,14 @@ public:
 private:
     CacheMT(const std::string&  name,
             const CACHE_CONFIG* pConfig,
-            CacheRules*         pRules,
-            StorageFactory*     pFactory,
+            SCacheRules         sRules,
+            SStorageFactory     sFactory,
             Storage*            pStorage);
 
     static CacheMT* Create(const std::string&  name,
                            const CACHE_CONFIG* pConfig,
-                           CacheRules*         pRules,
-                           StorageFactory*     pFactory);
+                           SCacheRules         sRules,
+                           SStorageFactory     sFactory);
 
 private:
     CacheMT(const CacheMT&);
diff --git a/server/modules/filter/cache/cachept.cc b/server/modules/filter/cache/cachept.cc
index a0b56ff13..16a6d2c53 100644
--- a/server/modules/filter/cache/cachept.cc
+++ b/server/modules/filter/cache/cachept.cc
@@ -47,10 +47,10 @@ inline int thread_index()
 
 CachePT::CachePT(const std::string&  name,
                  const CACHE_CONFIG* pConfig,
-                 CacheRules*         pRules,
-                 StorageFactory*     pFactory,
+                 SCacheRules         sRules,
+                 SStorageFactory     sFactory,
                  const Caches&       caches)
-    : Cache(name, pConfig, pRules, pFactory)
+    : Cache(name, pConfig, sRules, sFactory)
     , m_caches(caches)
 {
     MXS_NOTICE("Created cache per thread.");
@@ -72,15 +72,18 @@ CachePT* CachePT::Create(const std::string& name, const CACHE_CONFIG* pConfig)
 
     if (Cache::Create(*pConfig, &pRules, &pFactory))
     {
-        pCache = Create(name, pConfig, pRules, pFactory);
+        shared_ptr<CacheRules> sRules(pRules);
+        shared_ptr<StorageFactory> sFactory(pFactory);
+
+        pCache = Create(name, pConfig, sRules, sFactory);
     }
 
     return pCache;
 }
 
 // static
-CachePT* CachePT::Create(const std::string& name,
-                         StorageFactory* pFactory,
+CachePT* CachePT::Create(const std::string&  name,
+                         SStorageFactory     sFactory,
                          const CACHE_CONFIG* pConfig)
 {
     ss_dassert(pConfig);
@@ -91,7 +94,9 @@ CachePT* CachePT::Create(const std::string& name,
 
     if (Cache::Create(*pConfig, &pRules))
     {
-        pCache = Create(name, pConfig, pRules, pFactory);
+        shared_ptr<CacheRules> sRules(pRules);
+
+        pCache = Create(name, pConfig, sRules, sFactory);
     }
 
     return pCache;
@@ -130,8 +135,8 @@ cache_result_t CachePT::del_value(const CACHE_KEY& key)
 // static
 CachePT* CachePT::Create(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CacheRules*         pRules,
-                         StorageFactory*     pFactory)
+                         SCacheRules         sRules,
+                         SStorageFactory     sFactory)
 {
     CachePT* pCache = NULL;
 
@@ -153,7 +158,7 @@ CachePT* CachePT::Create(const std::string&  name,
 
             CacheST* pCacheST = 0;
 
-            CPP_GUARD(pCacheST = CacheST::Create(namest, pFactory, pConfig));
+            CPP_GUARD(pCacheST = CacheST::Create(namest, sFactory, pConfig));
 
             if (pCacheST)
             {
@@ -171,13 +176,11 @@ CachePT* CachePT::Create(const std::string&  name,
 
         if (!error)
         {
-            pCache = new CachePT(name, pConfig, pRules, pFactory, caches);
+            pCache = new CachePT(name, pConfig, sRules, sFactory, caches);
         }
     }
     catch (const std::exception&)
     {
-        delete pRules;
-        delete pFactory;
     }
 
     return pCache;
diff --git a/server/modules/filter/cache/cachept.h b/server/modules/filter/cache/cachept.h
index 214cb46a9..6f035caa7 100644
--- a/server/modules/filter/cache/cachept.h
+++ b/server/modules/filter/cache/cachept.h
@@ -23,7 +23,7 @@ public:
     ~CachePT();
 
     static CachePT* Create(const std::string& name, const CACHE_CONFIG* pConfig);
-    static CachePT* Create(const std::string& name, StorageFactory* pFactory, const CACHE_CONFIG* pConfig);
+    static CachePT* Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig);
 
     bool must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache);
 
@@ -43,14 +43,14 @@ private:
 
     CachePT(const std::string&  name,
             const CACHE_CONFIG* pConfig,
-            CacheRules*         pRules,
-            StorageFactory*     pFactory,
+            SCacheRules         sRules,
+            SStorageFactory     sFactory,
             const Caches&       caches);
 
     static CachePT* Create(const std::string&  name,
                            const CACHE_CONFIG* pConfig,
-                           CacheRules*         pRules,
-                           StorageFactory*     pFactory);
+                           SCacheRules         sRules,
+                           SStorageFactory     sFactory);
 
     Cache& thread_cache();
 
diff --git a/server/modules/filter/cache/cachesimple.cc b/server/modules/filter/cache/cachesimple.cc
index e8f0e1979..d80ca4e95 100644
--- a/server/modules/filter/cache/cachesimple.cc
+++ b/server/modules/filter/cache/cachesimple.cc
@@ -18,10 +18,10 @@
 
 CacheSimple::CacheSimple(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CacheRules*         pRules,
-                         StorageFactory*     pFactory,
+                         SCacheRules         sRules,
+                         SStorageFactory     sFactory,
                          Storage*            pStorage)
-    : Cache(name, pConfig, pRules, pFactory)
+    : Cache(name, pConfig, sRules, sFactory)
     , m_pStorage(pStorage)
 {
 }
diff --git a/server/modules/filter/cache/cachesimple.h b/server/modules/filter/cache/cachesimple.h
index 4677bf617..03502c5e1 100644
--- a/server/modules/filter/cache/cachesimple.h
+++ b/server/modules/filter/cache/cachesimple.h
@@ -35,8 +35,8 @@ public:
 protected:
     CacheSimple(const std::string&  name,
                 const CACHE_CONFIG* pConfig,
-                CacheRules*         pRules,
-                StorageFactory*     pFactory,
+                SCacheRules         sRules,
+                SStorageFactory     sFactory,
                 Storage*            pStorage);
 
     static bool Create(const CACHE_CONFIG& config,
diff --git a/server/modules/filter/cache/cachest.cc b/server/modules/filter/cache/cachest.cc
index ce64fa65d..c569b41ac 100644
--- a/server/modules/filter/cache/cachest.cc
+++ b/server/modules/filter/cache/cachest.cc
@@ -16,12 +16,14 @@
 #include "storage.h"
 #include "storagefactory.h"
 
+using std::tr1::shared_ptr;
+
 CacheST::CacheST(const std::string&  name,
                  const CACHE_CONFIG* pConfig,
-                 CacheRules*         pRules,
-                 StorageFactory*     pFactory,
+                 SCacheRules         sRules,
+                 SStorageFactory     sFactory,
                  Storage*            pStorage)
-    : CacheSimple(name, pConfig, pRules, pFactory, pStorage)
+    : CacheSimple(name, pConfig, sRules, sFactory, pStorage)
 {
     MXS_NOTICE("Created single threaded cache.");
 }
@@ -41,17 +43,20 @@ CacheST* CacheST::Create(const std::string& name, const CACHE_CONFIG* pConfig)
 
     if (CacheSimple::Create(*pConfig, &pRules, &pFactory))
     {
-        pCache = Create(name, pConfig, pRules, pFactory);
+        shared_ptr<CacheRules> sRules(pRules);
+        shared_ptr<StorageFactory> sFactory(pFactory);
+
+        pCache = Create(name, pConfig, sRules, sFactory);
     }
 
     return pCache;
 }
 
 // static
-CacheST* CacheST::Create(const std::string& name, StorageFactory* pFactory, const CACHE_CONFIG* pConfig)
+CacheST* CacheST::Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig)
 {
     ss_dassert(pConfig);
-    ss_dassert(pFactory);
+    ss_dassert(sFactory.get());
 
     CacheST* pCache = NULL;
 
@@ -59,7 +64,9 @@ CacheST* CacheST::Create(const std::string& name, StorageFactory* pFactory, cons
 
     if (CacheSimple::Create(*pConfig, &pRules))
     {
-        pCache = Create(name, pConfig, pRules, pFactory);
+        shared_ptr<CacheRules> sRules(pRules);
+
+        pCache = Create(name, pConfig, sRules, sFactory);
     }
 
     return pCache;
@@ -78,8 +85,8 @@ void CacheST::refreshed(const CACHE_KEY& key,  const SessionCache* pSessionCache
 // static
 CacheST* CacheST::Create(const std::string&  name,
                          const CACHE_CONFIG* pConfig,
-                         CacheRules*         pRules,
-                         StorageFactory*     pFactory)
+                         SCacheRules         sRules,
+                         SStorageFactory     sFactory)
 {
     CacheST* pCache = NULL;
 
@@ -90,7 +97,7 @@ CacheST* CacheST::Create(const std::string&  name,
     int argc = pConfig->storage_argc;
     char** argv = pConfig->storage_argv;
 
-    Storage* pStorage = pFactory->createStorage(CACHE_THREAD_MODEL_ST, name.c_str(),
+    Storage* pStorage = sFactory->createStorage(CACHE_THREAD_MODEL_ST, name.c_str(),
                                                 ttl, maxCount, maxSize,
                                                 argc, argv);
 
@@ -98,15 +105,13 @@ CacheST* CacheST::Create(const std::string&  name,
     {
         CPP_GUARD(pCache = new CacheST(name,
                                        pConfig,
-                                       pRules,
-                                       pFactory,
+                                       sRules,
+                                       sFactory,
                                        pStorage));
 
         if (!pCache)
         {
             delete pStorage;
-            delete pRules;
-            delete pFactory;
         }
     }
 
diff --git a/server/modules/filter/cache/cachest.h b/server/modules/filter/cache/cachest.h
index 5e149408a..613d06a10 100644
--- a/server/modules/filter/cache/cachest.h
+++ b/server/modules/filter/cache/cachest.h
@@ -21,7 +21,7 @@ public:
     ~CacheST();
 
     static CacheST* Create(const std::string& name, const CACHE_CONFIG* pConfig);
-    static CacheST* Create(const std::string& name, StorageFactory* pFactory, const CACHE_CONFIG* pConfig);
+    static CacheST* Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig);
 
     bool must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache);
 
@@ -30,14 +30,14 @@ public:
 private:
     CacheST(const std::string&  name,
             const CACHE_CONFIG* pConfig,
-            CacheRules*         pRules,
-            StorageFactory*     pFactory,
+            SCacheRules         sRules,
+            SStorageFactory     sFactory,
             Storage*            pStorage);
 
     static CacheST* Create(const std::string&  name,
                            const CACHE_CONFIG* pConfig,
-                           CacheRules*         pRules,
-                           StorageFactory*     pFactory);
+                           SCacheRules         sRules,
+                           SStorageFactory     sFactory);
 private:
     CacheST(const CacheST&);
     CacheST& operator = (const CacheST&);

From 9175295542ba31a65033af7b6472b4d0c6dda3a7 Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Wed, 30 Nov 2016 09:58:01 +0200
Subject: [PATCH 13/42] Cache: Now also CacheRules is shared

Unnecessary methods were also removed from CachePT and CacheMT
as it does not make sense to create more than one single instance
of those per filter instance. Consequently there is no need for
them to be able to use an existing StorageFactory (and CacheRules).
---
 server/modules/filter/cache/cache.cc       | 28 ++++++----------------
 server/modules/filter/cache/cache.h        |  3 ---
 server/modules/filter/cache/cachemt.cc     | 20 ----------------
 server/modules/filter/cache/cachemt.h      |  1 -
 server/modules/filter/cache/cachept.cc     | 23 +-----------------
 server/modules/filter/cache/cachept.h      |  1 -
 server/modules/filter/cache/cachesimple.cc | 17 -------------
 server/modules/filter/cache/cachesimple.h  |  3 ---
 server/modules/filter/cache/cachest.cc     | 21 ++++++----------
 server/modules/filter/cache/cachest.h      |  5 +++-
 10 files changed, 19 insertions(+), 103 deletions(-)

diff --git a/server/modules/filter/cache/cache.cc b/server/modules/filter/cache/cache.cc
index 9d865d2c4..27c0a9245 100644
--- a/server/modules/filter/cache/cache.cc
+++ b/server/modules/filter/cache/cache.cc
@@ -36,9 +36,11 @@ Cache::~Cache()
 
 //static
 bool Cache::Create(const CACHE_CONFIG& config,
-                   CacheRules**        ppRules)
+                   CacheRules**        ppRules,
+                   StorageFactory**    ppFactory)
 {
     CacheRules* pRules = NULL;
+    StorageFactory* pFactory = NULL;
 
     if (config.rules)
     {
@@ -50,26 +52,6 @@ bool Cache::Create(const CACHE_CONFIG& config,
     }
 
     if (pRules)
-    {
-        *ppRules = pRules;
-    }
-    else
-    {
-        MXS_ERROR("Could not create rules.");
-    }
-
-    return pRules != NULL;
-}
-
-//static
-bool Cache::Create(const CACHE_CONFIG& config,
-                   CacheRules**        ppRules,
-                   StorageFactory**    ppFactory)
-{
-    CacheRules* pRules = NULL;
-    StorageFactory* pFactory = NULL;
-
-    if (Create(config, &pRules))
     {
         pFactory = StorageFactory::Open(config.storage);
 
@@ -84,6 +66,10 @@ bool Cache::Create(const CACHE_CONFIG& config,
             delete pRules;
         }
     }
+    else
+    {
+        MXS_ERROR("Could not create rules.");
+    }
 
     return pFactory != NULL;
 }
diff --git a/server/modules/filter/cache/cache.h b/server/modules/filter/cache/cache.h
index d6950bb1a..c31465ef7 100644
--- a/server/modules/filter/cache/cache.h
+++ b/server/modules/filter/cache/cache.h
@@ -84,9 +84,6 @@ protected:
           SCacheRules         sRules,
           SStorageFactory     sFactory);
 
-    static bool Create(const CACHE_CONFIG& config,
-                       CacheRules**        ppRules);
-
     static bool Create(const CACHE_CONFIG& config,
                        CacheRules**        ppRules,
                        StorageFactory**    ppFactory);
diff --git a/server/modules/filter/cache/cachemt.cc b/server/modules/filter/cache/cachemt.cc
index e399346c4..158885370 100644
--- a/server/modules/filter/cache/cachemt.cc
+++ b/server/modules/filter/cache/cachemt.cc
@@ -54,26 +54,6 @@ CacheMT* CacheMT::Create(const std::string& name, const CACHE_CONFIG* pConfig)
     return pCache;
 }
 
-// static
-CacheMT* CacheMT::Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig)
-{
-    ss_dassert(pConfig);
-    ss_dassert(sFactory.get());
-
-    CacheMT* pCache = NULL;
-
-    CacheRules* pRules = NULL;
-
-    if (CacheSimple::Create(*pConfig, &pRules))
-    {
-        shared_ptr<CacheRules> sRules(pRules);
-
-        pCache = Create(name, pConfig, sRules, sFactory);
-    }
-
-    return pCache;
-}
-
 bool CacheMT::must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache)
 {
     LockGuard guard(&m_lockPending);
diff --git a/server/modules/filter/cache/cachemt.h b/server/modules/filter/cache/cachemt.h
index e2d776e8c..e8b1dfe72 100644
--- a/server/modules/filter/cache/cachemt.h
+++ b/server/modules/filter/cache/cachemt.h
@@ -22,7 +22,6 @@ public:
     ~CacheMT();
 
     static CacheMT* Create(const std::string& name, const CACHE_CONFIG* pConfig);
-    static CacheMT* Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig);
 
     bool must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache);
 
diff --git a/server/modules/filter/cache/cachept.cc b/server/modules/filter/cache/cachept.cc
index 16a6d2c53..3bfe5d8c5 100644
--- a/server/modules/filter/cache/cachept.cc
+++ b/server/modules/filter/cache/cachept.cc
@@ -81,27 +81,6 @@ CachePT* CachePT::Create(const std::string& name, const CACHE_CONFIG* pConfig)
     return pCache;
 }
 
-// static
-CachePT* CachePT::Create(const std::string&  name,
-                         SStorageFactory     sFactory,
-                         const CACHE_CONFIG* pConfig)
-{
-    ss_dassert(pConfig);
-
-    CachePT* pCache = NULL;
-
-    CacheRules* pRules = NULL;
-
-    if (Cache::Create(*pConfig, &pRules))
-    {
-        shared_ptr<CacheRules> sRules(pRules);
-
-        pCache = Create(name, pConfig, sRules, sFactory);
-    }
-
-    return pCache;
-}
-
 bool CachePT::must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache)
 {
     return thread_cache().must_refresh(key, pSessionCache);
@@ -158,7 +137,7 @@ CachePT* CachePT::Create(const std::string&  name,
 
             CacheST* pCacheST = 0;
 
-            CPP_GUARD(pCacheST = CacheST::Create(namest, sFactory, pConfig));
+            CPP_GUARD(pCacheST = CacheST::Create(namest, sRules, sFactory, pConfig));
 
             if (pCacheST)
             {
diff --git a/server/modules/filter/cache/cachept.h b/server/modules/filter/cache/cachept.h
index 6f035caa7..41d055b5a 100644
--- a/server/modules/filter/cache/cachept.h
+++ b/server/modules/filter/cache/cachept.h
@@ -23,7 +23,6 @@ public:
     ~CachePT();
 
     static CachePT* Create(const std::string& name, const CACHE_CONFIG* pConfig);
-    static CachePT* Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig);
 
     bool must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache);
 
diff --git a/server/modules/filter/cache/cachesimple.cc b/server/modules/filter/cache/cachesimple.cc
index d80ca4e95..f487db87b 100644
--- a/server/modules/filter/cache/cachesimple.cc
+++ b/server/modules/filter/cache/cachesimple.cc
@@ -31,23 +31,6 @@ CacheSimple::~CacheSimple()
     delete m_pStorage;
 }
 
-
-// static
-bool CacheSimple::Create(const CACHE_CONFIG& config,
-                         CacheRules**        ppRules)
-{
-    int rv = false;
-
-    CacheRules* pRules = NULL;
-
-    if (Cache::Create(config, &pRules))
-    {
-        *ppRules = pRules;
-    }
-
-    return pRules != NULL;;
-}
-
 // static
 bool CacheSimple::Create(const CACHE_CONFIG& config,
                          CacheRules**        ppRules,
diff --git a/server/modules/filter/cache/cachesimple.h b/server/modules/filter/cache/cachesimple.h
index 03502c5e1..73a15abf1 100644
--- a/server/modules/filter/cache/cachesimple.h
+++ b/server/modules/filter/cache/cachesimple.h
@@ -39,9 +39,6 @@ protected:
                 SStorageFactory     sFactory,
                 Storage*            pStorage);
 
-    static bool Create(const CACHE_CONFIG& config,
-                       CacheRules**        ppRules);
-
     static bool Create(const CACHE_CONFIG& config,
                        CacheRules**        ppRules,
                        StorageFactory**    ppFactory);
diff --git a/server/modules/filter/cache/cachest.cc b/server/modules/filter/cache/cachest.cc
index c569b41ac..f8f75dbb4 100644
--- a/server/modules/filter/cache/cachest.cc
+++ b/server/modules/filter/cache/cachest.cc
@@ -53,23 +53,16 @@ CacheST* CacheST::Create(const std::string& name, const CACHE_CONFIG* pConfig)
 }
 
 // static
-CacheST* CacheST::Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig)
+CacheST* CacheST::Create(const std::string&  name,
+                         SCacheRules         sRules,
+                         SStorageFactory     sFactory,
+                         const CACHE_CONFIG* pConfig)
 {
-    ss_dassert(pConfig);
+    ss_dassert(sRules.get());
     ss_dassert(sFactory.get());
+    ss_dassert(pConfig);
 
-    CacheST* pCache = NULL;
-
-    CacheRules* pRules = NULL;
-
-    if (CacheSimple::Create(*pConfig, &pRules))
-    {
-        shared_ptr<CacheRules> sRules(pRules);
-
-        pCache = Create(name, pConfig, sRules, sFactory);
-    }
-
-    return pCache;
+    return Create(name, pConfig, sRules, sFactory);
 }
 
 bool CacheST::must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache)
diff --git a/server/modules/filter/cache/cachest.h b/server/modules/filter/cache/cachest.h
index 613d06a10..50f1254ce 100644
--- a/server/modules/filter/cache/cachest.h
+++ b/server/modules/filter/cache/cachest.h
@@ -21,7 +21,10 @@ public:
     ~CacheST();
 
     static CacheST* Create(const std::string& name, const CACHE_CONFIG* pConfig);
-    static CacheST* Create(const std::string& name, SStorageFactory sFactory, const CACHE_CONFIG* pConfig);
+    static CacheST* Create(const std::string& name,
+                           SCacheRules sRules,
+                           SStorageFactory sFactory,
+                           const CACHE_CONFIG* pConfig);
 
     bool must_refresh(const CACHE_KEY& key, const SessionCache* pSessionCache);
 

From 573faff9879c983a5061f48853ee3624a27e8d41 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Tue, 29 Nov 2016 14:41:59 +0200
Subject: [PATCH 14/42] Add module command documentation for avrorouter

The documentation lists the one command it implements.
---
 .../Release-Notes/MaxScale-2.1.0-Release-Notes.md    |  3 +++
 Documentation/Routers/Avrorouter.md                  | 12 ++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/Documentation/Release-Notes/MaxScale-2.1.0-Release-Notes.md b/Documentation/Release-Notes/MaxScale-2.1.0-Release-Notes.md
index 6d77070ed..00db1d6cd 100644
--- a/Documentation/Release-Notes/MaxScale-2.1.0-Release-Notes.md
+++ b/Documentation/Release-Notes/MaxScale-2.1.0-Release-Notes.md
@@ -120,6 +120,9 @@ is the name of the function. _ARGS_ is a function specific list of arguments.
 
 Read [Module Commands](../Reference/Module-Commands.md) documentation for more details.
 
+In the 2.1 release of MaxScale, the [_dbfwfilter_}(../Filters/Database-Firewall-Filter.md)
+and [_avrorouter_](../Routers/Avrorouter.md) implement module commands.
+
 ### Amazon RDS Aurora monitor
 
 The new [Aurora Monitor](../Monitors/Aurora-Monitor.md) module allows monitoring
diff --git a/Documentation/Routers/Avrorouter.md b/Documentation/Routers/Avrorouter.md
index f9e376ab7..6894c654d 100644
--- a/Documentation/Routers/Avrorouter.md
+++ b/Documentation/Routers/Avrorouter.md
@@ -142,6 +142,18 @@ data block. The default value is 1 transaction.
 Controls the number of row events that are grouped into a single Avro
 data block. The default value is 1000 row events.
 
+## Module commands
+
+Read [Module Commands](../Reference/Module-Commands.md) documentation for details about module commands.
+
+The avrorouter supports the following module commands.
+
+### `avrorouter::convert SERVICE {start | stop}`
+
+Start or stop the binary log to Avro conversion. The first parameter is the name
+of the service to stop and the second parameter tells whether to start the
+conversion process or to stop it.
+
 # Files Created by the Avrorouter
 
 The avrorouter creates two files in the location pointed by _avrodir_:

From b79210c7605a7879d8a7e6acd9c7b6282e8b4ea4 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Thu, 20 Oct 2016 00:25:05 +0300
Subject: [PATCH 15/42] Create thread specific epoll instances

Having a unique epoll instance for each thread allows a lot of the locking
from poll.c to be removed. The downside to this is that each session can
have only one thread processing events for it which might reduce
performance with very low client counts.
---
 include/maxscale/dcb.h |   1 +
 server/core/poll.c     | 215 +++++++++--------------------------------
 2 files changed, 48 insertions(+), 168 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index 98237c685..80ac0b32d 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -227,6 +227,7 @@ typedef struct dcb
     DCBEVENTQ       evq;            /**< The event queue for this DCB */
     int             fd;             /**< The descriptor */
     dcb_state_t     state;          /**< Current descriptor state */
+    int             owner;          /**< Owning thread */
     SSL_STATE       ssl_state;      /**< Current state of SSL if in use */
     int             flags;          /**< DCB flags */
     char            *remote;        /**< Address of remote end */
diff --git a/server/core/poll.c b/server/core/poll.c
index 90295a9bb..7b9b75768 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -81,7 +81,8 @@ int max_poll_sleep;
  */
 #define MUTEX_EPOLL     0
 
-static int epoll_fd = -1;    /*< The epoll file descriptor */
+static int *epoll_fd;    /*< The epoll file descriptor */
+static int next_epoll_fd = 0; /*< Which thread handles the next DCB */
 static int do_shutdown = 0;  /*< Flag the shutdown of the poll subsystem */
 static GWBITMASK poll_mask;
 #if MUTEX_EPOLL
@@ -89,7 +90,7 @@ static simple_mutex_t epoll_wait_mutex; /*< serializes calls to epoll_wait */
 #endif
 static int n_waiting = 0;    /*< No. of threads in epoll_wait */
 
-static int process_pollq(int thread_id);
+static int process_pollq(int thread_id, struct epoll_event *event);
 static void poll_add_event_to_dcb(DCB* dcb, GWBUF* buf, __uint32_t ev);
 static bool poll_dcb_session_check(DCB *dcb, const char *);
 
@@ -206,26 +207,30 @@ static int poll_resolve_error(DCB *, int, bool);
 void
 poll_init()
 {
-    int i;
+    n_threads = config_threadcount();
 
-    if (epoll_fd != -1)
+    if (!(epoll_fd = MXS_MALLOC(sizeof(int) * n_threads)))
     {
         return;
     }
-    if ((epoll_fd = epoll_create(MAX_EVENTS)) == -1)
+
+    for (int i = 0; i < n_threads; i++)
     {
-        char errbuf[MXS_STRERROR_BUFLEN];
-        MXS_ERROR("FATAL: Could not create epoll instance: %s", strerror_r(errno, errbuf, sizeof(errbuf)));
-        exit(-1);
+        if ((epoll_fd[i] = epoll_create(MAX_EVENTS)) == -1)
+        {
+            char errbuf[MXS_STRERROR_BUFLEN];
+            MXS_ERROR("FATAL: Could not create epoll instance: %s", strerror_r(errno, errbuf, sizeof(errbuf)));
+            exit(-1);
+        }
     }
+
     memset(&pollStats, 0, sizeof(pollStats));
     memset(&queueStats, 0, sizeof(queueStats));
     bitmask_init(&poll_mask);
-    n_threads = config_threadcount();
     thread_data = (THREAD_DATA *)MXS_MALLOC(n_threads * sizeof(THREAD_DATA));
     if (thread_data)
     {
-        for (i = 0; i < n_threads; i++)
+        for (int i = 0; i < n_threads; i++)
         {
             thread_data[i].state = THREAD_STOPPED;
         }
@@ -254,13 +259,13 @@ poll_init()
     n_avg_samples = 15 * 60 / POLL_LOAD_FREQ;
     avg_samples = (double *)MXS_MALLOC(sizeof(double) * n_avg_samples);
     MXS_ABORT_IF_NULL(avg_samples);
-    for (i = 0; i < n_avg_samples; i++)
+    for (int i = 0; i < n_avg_samples; i++)
     {
         avg_samples[i] = 0.0;
     }
     evqp_samples = (int *)MXS_MALLOC(sizeof(int) * n_avg_samples);
     MXS_ABORT_IF_NULL(evqp_samples);
-    for (i = 0; i < n_avg_samples; i++)
+    for (int i = 0; i < n_avg_samples; i++)
     {
         evqp_samples[i] = 0.0;
     }
@@ -339,7 +344,20 @@ poll_add_dcb(DCB *dcb)
      * The only possible failure that will not cause a crash is
      * running out of system resources.
      */
-    rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, dcb->fd, &ev);
+    int owner = 0;
+
+    if (dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER)
+    {
+        owner = dcb->session->client_dcb->owner;
+    }
+    else
+    {
+        owner = (unsigned int)atomic_add(&next_epoll_fd, 1) % n_threads;
+    }
+
+    dcb->owner = owner;
+
+    rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev);
     if (rc)
     {
         /* Some errors are actually considered acceptable */
@@ -406,7 +424,7 @@ poll_remove_dcb(DCB *dcb)
     spinlock_release(&dcb->dcb_initlock);
     if (dcbfd > 0)
     {
-        rc = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, dcbfd, &ev);
+        rc = epoll_ctl(epoll_fd[dcb->owner], EPOLL_CTL_DEL, dcbfd, &ev);
         /**
          * The poll_resolve_error function will always
          * return 0 or crash.  So if it returns non-zero result,
@@ -570,11 +588,6 @@ poll_waitevents(void *arg)
 
     while (1)
     {
-        if (pollStats.evq_pending == 0 && timeout_bias < 10)
-        {
-            timeout_bias++;
-        }
-
         atomic_add(&n_waiting, 1);
 #if BLOCKINGPOLL
         nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
@@ -589,7 +602,7 @@ poll_waitevents(void *arg)
         }
 
         ts_stats_increment(pollStats.n_polls, thread_id);
-        if ((nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, 0)) == -1)
+        if ((nfds = epoll_wait(epoll_fd[thread_id], events, MAX_EVENTS, 0)) == -1)
         {
             atomic_add(&n_waiting, -1);
             int eno = errno;
@@ -609,14 +622,18 @@ poll_waitevents(void *arg)
          * We calculate a timeout bias to alter the length of the blocking
          * call based on the time since we last received an event to process
          */
-        else if (nfds == 0 && pollStats.evq_pending == 0 && poll_spins++ > number_poll_spins)
+        else if (nfds == 0 && poll_spins++ > number_poll_spins)
         {
+            if (timeout_bias < 10)
+            {
+                timeout_bias++;
+            }
             ts_stats_increment(pollStats.blockingpolls, thread_id);
-            nfds = epoll_wait(epoll_fd,
+            nfds = epoll_wait(epoll_fd[thread_id],
                               events,
                               MAX_EVENTS,
                               (max_poll_sleep * timeout_bias) / 10);
-            if (nfds == 0 && pollStats.evq_pending)
+            if (nfds == 0)
             {
                 atomic_add(&pollStats.wake_evqpending, 1);
                 poll_spins = 0;
@@ -671,47 +688,6 @@ poll_waitevents(void *arg)
              * idle and is added to the queue to process after
              * setting the event bits.
              */
-            for (i = 0; i < nfds; i++)
-            {
-                DCB *dcb = (DCB *)events[i].data.ptr;
-                __uint32_t ev = events[i].events;
-
-                spinlock_acquire(&pollqlock);
-                if (DCB_POLL_BUSY(dcb))
-                {
-                    if (dcb->evq.pending_events == 0)
-                    {
-                        pollStats.evq_pending++;
-                        dcb->evq.inserted = hkheartbeat;
-                    }
-                    dcb->evq.pending_events |= ev;
-                }
-                else
-                {
-                    dcb->evq.pending_events = ev;
-                    if (eventq)
-                    {
-                        dcb->evq.prev = eventq->evq.prev;
-                        eventq->evq.prev->evq.next = dcb;
-                        eventq->evq.prev = dcb;
-                        dcb->evq.next = eventq;
-                    }
-                    else
-                    {
-                        eventq = dcb;
-                        dcb->evq.prev = dcb;
-                        dcb->evq.next = dcb;
-                    }
-                    pollStats.evq_length++;
-                    pollStats.evq_pending++;
-                    dcb->evq.inserted = hkheartbeat;
-                    if (pollStats.evq_length > pollStats.evq_max)
-                    {
-                        pollStats.evq_max = pollStats.evq_length;
-                    }
-                }
-                spinlock_release(&pollqlock);
-            }
         }
 
         /*
@@ -720,9 +696,10 @@ poll_waitevents(void *arg)
          * precautionary measure to avoid issues if the house keeping
          * of the count goes wrong.
          */
-        if (process_pollq(thread_id))
+
+        for (int i = 0; i < nfds; i++)
         {
-            timeout_bias = 1;
+            process_pollq(thread_id, &events[i]);
         }
 
         if (check_timeouts && hkheartbeat >= next_timeout_check)
@@ -811,61 +788,14 @@ poll_set_maxwait(unsigned int maxwait)
  * @return              0 if no DCB's have been processed
  */
 static int
-process_pollq(int thread_id)
+process_pollq(int thread_id, struct epoll_event *event)
 {
-    DCB *dcb;
     int found = 0;
-    uint32_t ev;
+    uint32_t ev = event->events;
     unsigned long qtime;
 
-    spinlock_acquire(&pollqlock);
-    if (eventq == NULL)
-    {
-        /* Nothing to process */
-        spinlock_release(&pollqlock);
-        return 0;
-    }
-    dcb = eventq;
-    if (dcb->evq.next == dcb->evq.prev && dcb->evq.processing == 0)
-    {
-        found = 1;
-        dcb->evq.processing = 1;
-    }
-    else if (dcb->evq.next == dcb->evq.prev)
-    {
-        /* Only item in queue is being processed */
-        spinlock_release(&pollqlock);
-        return 0;
-    }
-    else
-    {
-        do
-        {
-            dcb = dcb->evq.next;
-        }
-        while (dcb != eventq && dcb->evq.processing == 1);
-
-        if (dcb->evq.processing == 0)
-        {
-            /* Found DCB to process */
-            dcb->evq.processing = 1;
-            found = 1;
-        }
-    }
-    if (found)
-    {
-        ev = dcb->evq.pending_events;
-        dcb->evq.processing_events = ev;
-        dcb->evq.pending_events = 0;
-        pollStats.evq_pending--;
-        ss_dassert(pollStats.evq_pending >= 0);
-    }
-    spinlock_release(&pollqlock);
-
-    if (found == 0)
-    {
-        return 0;
-    }
+    DCB *dcb = event->data.ptr;
+    atomic_add(&pollStats.evq_pending, -1);
 
 #if PROFILE_POLL
     memlog_log(plog, hkheartbeat - dcb->evq.inserted);
@@ -1100,59 +1030,8 @@ process_pollq(int thread_id)
         queueStats.maxexectime = qtime;
     }
 
-    spinlock_acquire(&pollqlock);
-    dcb->evq.processing_events = 0;
-
-    if (dcb->evq.pending_events == 0)
-    {
-        /* No pending events so remove from the queue */
-        if (dcb->evq.prev != dcb)
-        {
-            dcb->evq.prev->evq.next = dcb->evq.next;
-            dcb->evq.next->evq.prev = dcb->evq.prev;
-            if (eventq == dcb)
-            {
-                eventq = dcb->evq.next;
-            }
-        }
-        else
-        {
-            eventq = NULL;
-        }
-        dcb->evq.next = NULL;
-        dcb->evq.prev = NULL;
-        pollStats.evq_length--;
-    }
-    else
-    {
-        /*
-         * We have a pending event, move to the end of the queue
-         * if there are any other DCB's in the queue.
-         *
-         * If we are the first item on the queue this is easy, we
-         * just bump the eventq pointer.
-         */
-        if (dcb->evq.prev != dcb)
-        {
-            if (eventq == dcb)
-            {
-                eventq = dcb->evq.next;
-            }
-            else
-            {
-                dcb->evq.prev->evq.next = dcb->evq.next;
-                dcb->evq.next->evq.prev = dcb->evq.prev;
-                dcb->evq.prev = eventq->evq.prev;
-                dcb->evq.next = eventq;
-                eventq->evq.prev = dcb;
-                dcb->evq.prev->evq.next = dcb;
-            }
-        }
-    }
-    dcb->evq.processing = 0;
     /** Reset session id from thread's local storage */
     mxs_log_tls.li_sesid = 0;
-    spinlock_release(&pollqlock);
 
     return 1;
 }

From 8efdaa1ea68c2001cfb10222bc971fca93338bd5 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Tue, 25 Oct 2016 13:19:25 +0300
Subject: [PATCH 16/42] Move fake events to a thread-specific queue

The fake poll events are now stored in thread specific queues. This
removes the need for the poll event queue.
---
 include/maxscale/dcb.h                        |   1 +
 server/core/dcb.c                             |  18 +-
 server/core/poll.c                            | 205 +++++-------------
 .../MySQL/MySQLBackend/mysql_backend.c        |  16 +-
 .../protocol/MySQL/MySQLClient/mysql_client.c |  34 +--
 server/modules/protocol/MySQL/mysql_common.c  |   7 +-
 server/modules/routing/maxinfo/maxinfo_exec.c |  10 -
 7 files changed, 92 insertions(+), 199 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index 80ac0b32d..93483d227 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -248,6 +248,7 @@ typedef struct dcb
     SPINLOCK        delayqlock;     /**< Delay Backend Write Queue spinlock */
     GWBUF           *delayq;        /**< Delay Backend Write Data Queue */
     GWBUF           *dcb_readqueue; /**< read queue for storing incomplete reads */
+    GWBUF           *dcb_fakequeue; /**< Fake event queue for generated events */
     SPINLOCK        authlock;       /**< Generic Authorization spinlock */
 
     DCBSTATS        stats;          /**< DCB related statistics */
diff --git a/server/core/dcb.c b/server/core/dcb.c
index cc76a3a89..1ada04d25 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -420,7 +420,11 @@ dcb_free_all_memory(DCB *dcb)
         gwbuf_free(dcb->dcb_readqueue);
         dcb->dcb_readqueue = NULL;
     }
-
+    if (dcb->dcb_fakequeue)
+    {
+        gwbuf_free(dcb->dcb_fakequeue);
+        dcb->dcb_fakequeue = NULL;
+    }
     spinlock_acquire(&dcb->cb_lock);
     while ((cb_dcb = dcb->callbacks) != NULL)
     {
@@ -913,11 +917,15 @@ int dcb_read(DCB   *dcb,
 
     if (dcb->dcb_readqueue)
     {
-        spinlock_acquire(&dcb->authlock);
         *head = gwbuf_append(*head, dcb->dcb_readqueue);
         dcb->dcb_readqueue = NULL;
         nreadtotal = gwbuf_length(*head);
-        spinlock_release(&dcb->authlock);
+    }
+    else if (dcb->dcb_fakequeue)
+    {
+        *head = gwbuf_append(*head, dcb->dcb_fakequeue);
+        dcb->dcb_fakequeue = NULL;
+        nreadtotal = gwbuf_length(*head);
     }
 
     if (SSL_HANDSHAKE_DONE == dcb->ssl_state || SSL_ESTABLISHED == dcb->ssl_state)
@@ -1661,7 +1669,7 @@ dcb_grab_writeq(DCB *dcb, bool first_time)
 
     if (first_time && dcb->ssl_read_want_write)
     {
-        poll_fake_event(dcb, EPOLLIN);
+        poll_fake_read_event(dcb);
     }
 
     if (first_time && dcb->draining_flag)
@@ -3554,7 +3562,5 @@ dcb_role_name(DCB *dcb)
  */
 void dcb_append_readqueue(DCB *dcb, GWBUF *buffer)
 {
-    spinlock_acquire(&dcb->authlock);
     dcb->dcb_readqueue = gwbuf_append(dcb->dcb_readqueue, buffer);
-    spinlock_release(&dcb->authlock);
 }
diff --git a/server/core/poll.c b/server/core/poll.c
index 7b9b75768..cfa316a2e 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -81,8 +81,19 @@ int max_poll_sleep;
  */
 #define MUTEX_EPOLL     0
 
+/** Fake epoll event struct */
+typedef struct fake_event
+{
+    DCB               *dcb;   /*< The DCB where this event was generated */
+    GWBUF             *data;  /*< Fake data, placed in the DCB's read queue */
+    uint32_t           event; /*< The EPOLL event type */
+    struct fake_event *tail;  /*< The last event */
+    struct fake_event *next;  /*< The next event */
+} fake_event_t;
+
 static int *epoll_fd;    /*< The epoll file descriptor */
 static int next_epoll_fd = 0; /*< Which thread handles the next DCB */
+static fake_event_t **fake_events; /*< Thread-specific fake event queue */
 static int do_shutdown = 0;  /*< Flag the shutdown of the poll subsystem */
 static GWBITMASK poll_mask;
 #if MUTEX_EPOLL
@@ -91,7 +102,7 @@ static simple_mutex_t epoll_wait_mutex; /*< serializes calls to epoll_wait */
 static int n_waiting = 0;    /*< No. of threads in epoll_wait */
 
 static int process_pollq(int thread_id, struct epoll_event *event);
-static void poll_add_event_to_dcb(DCB* dcb, GWBUF* buf, __uint32_t ev);
+static void poll_add_event_to_dcb(DCB* dcb, GWBUF* buf, uint32_t ev);
 static bool poll_dcb_session_check(DCB *dcb, const char *);
 
 DCB *eventq = NULL;
@@ -224,6 +235,11 @@ poll_init()
         }
     }
 
+    if ((fake_events = MXS_CALLOC(sizeof(fake_event_t*), n_threads)) == NULL)
+    {
+        exit(-1);
+    }
+
     memset(&pollStats, 0, sizeof(pollStats));
     memset(&queueStats, 0, sizeof(queueStats));
     bitmask_init(&poll_mask);
@@ -339,7 +355,7 @@ poll_add_dcb(DCB *dcb)
                   STRDCBSTATE(dcb->state));
     }
     dcb->state = new_state;
-    spinlock_release(&dcb->dcb_initlock);
+
     /*
      * The only possible failure that will not cause a crash is
      * running out of system resources.
@@ -356,6 +372,7 @@ poll_add_dcb(DCB *dcb)
     }
 
     dcb->owner = owner;
+    spinlock_release(&dcb->dcb_initlock);
 
     rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev);
     if (rc)
@@ -702,6 +719,19 @@ poll_waitevents(void *arg)
             process_pollq(thread_id, &events[i]);
         }
 
+        /** Process fake events */
+        while (fake_events[thread_id])
+        {
+            fake_event_t *event = fake_events[thread_id];
+            fake_events[thread_id] = fake_events[thread_id]->next;
+
+            struct epoll_event ev;
+            event->dcb->dcb_fakequeue = event->data;
+            ev.data.ptr = event->dcb;
+            ev.events = event->event;
+            process_pollq(thread_id, &ev);
+        }
+
         if (check_timeouts && hkheartbeat >= next_timeout_check)
         {
             process_idle_sessions();
@@ -795,7 +825,6 @@ process_pollq(int thread_id, struct epoll_event *event)
     unsigned long qtime;
 
     DCB *dcb = event->data.ptr;
-    atomic_add(&pollStats.evq_pending, -1);
 
 #if PROFILE_POLL
     memlog_log(plog, hkheartbeat - dcb->evq.inserted);
@@ -1132,8 +1161,6 @@ dprintPollStats(DCB *dcb)
                pollStats.evq_length);
     dcb_printf(dcb, "Maximum event queue length:                    %" PRId32 "\n",
                pollStats.evq_max);
-    dcb_printf(dcb, "No. of DCBs with pending events:               %" PRId32 "\n",
-               pollStats.evq_pending);
     dcb_printf(dcb, "No. of wakeups with pending queue:             %" PRId32 "\n",
                pollStats.wake_evqpending);
 
@@ -1366,7 +1393,6 @@ poll_loadav(void *data)
         current_avg = 0.0;
     }
     avg_samples[next_sample] = current_avg;
-    evqp_samples[next_sample] = pollStats.evq_pending;
     next_sample++;
     if (next_sample >= n_avg_samples)
     {
@@ -1396,50 +1422,30 @@ void poll_add_epollin_event_to_dcb(DCB*   dcb,
 
 static void poll_add_event_to_dcb(DCB*       dcb,
                                   GWBUF*     buf,
-                                  __uint32_t ev)
+                                  uint32_t ev)
 {
-    /** Add buf to readqueue */
-    spinlock_acquire(&dcb->authlock);
-    dcb->dcb_readqueue = gwbuf_append(dcb->dcb_readqueue, buf);
-    spinlock_release(&dcb->authlock);
+    fake_event_t *event = MXS_MALLOC(sizeof(*event));
 
-    spinlock_acquire(&pollqlock);
+    if (event)
+    {
+        event->data = buf;
+        event->dcb = dcb;
+        event->event = ev;
+        event->next = NULL;
+        event->tail = event;
 
-    /** Set event to DCB */
-    if (DCB_POLL_BUSY(dcb))
-    {
-        if (dcb->evq.pending_events == 0)
+        int thr = dcb->owner;
+
+        if (fake_events[thr])
         {
-            pollStats.evq_pending++;
-        }
-        dcb->evq.pending_events |= ev;
-    }
-    else
-    {
-        dcb->evq.pending_events = ev;
-        /** Add DCB to eventqueue if it isn't already there */
-        if (eventq)
-        {
-            dcb->evq.prev = eventq->evq.prev;
-            eventq->evq.prev->evq.next = dcb;
-            eventq->evq.prev = dcb;
-            dcb->evq.next = eventq;
+            fake_events[thr]->tail->next = event;
+            fake_events[thr]->tail = event;
         }
         else
         {
-            eventq = dcb;
-            dcb->evq.prev = dcb;
-            dcb->evq.next = dcb;
-        }
-        pollStats.evq_length++;
-        pollStats.evq_pending++;
-
-        if (pollStats.evq_length > pollStats.evq_max)
-        {
-            pollStats.evq_max = pollStats.evq_length;
+            fake_events[thr] = event;
         }
     }
-    spinlock_release(&pollqlock);
 }
 
 /*
@@ -1458,7 +1464,7 @@ static void poll_add_event_to_dcb(DCB*       dcb,
 void
 poll_fake_write_event(DCB *dcb)
 {
-    poll_fake_event(dcb, EPOLLOUT);
+    poll_add_event_to_dcb(dcb, NULL, EPOLLOUT);
 }
 
 /*
@@ -1477,79 +1483,7 @@ poll_fake_write_event(DCB *dcb)
 void
 poll_fake_read_event(DCB *dcb)
 {
-    poll_fake_event(dcb, EPOLLIN);
-}
-
-/*
- * Insert a fake completion event for a DCB into the polling queue.
- *
- * This is used to trigger transmission activity on another DCB from
- * within the event processing routine of a DCB. or to allow a DCB
- * to defer some further output processing, to allow for other DCBs
- * to receive a slice of the processing time. Fake events are added
- * to the tail of the event queue, in the same way that real events
- * are, so maintain the "fairness" of processing.
- *
- * @param dcb   DCB to emulate an event for
- * @param ev    Event to emulate
- */
-void
-poll_fake_event(DCB *dcb, enum EPOLL_EVENTS ev)
-{
-
-    spinlock_acquire(&pollqlock);
-    /*
-     * If the DCB is already on the queue, there are no pending events and
-     * there are other events on the queue, then
-     * take it off the queue. This stops the DCB hogging the threads.
-     */
-    if (DCB_POLL_BUSY(dcb) && dcb->evq.pending_events == 0 && dcb->evq.prev != dcb)
-    {
-        dcb->evq.prev->evq.next = dcb->evq.next;
-        dcb->evq.next->evq.prev = dcb->evq.prev;
-        if (eventq == dcb)
-        {
-            eventq = dcb->evq.next;
-        }
-        dcb->evq.next = NULL;
-        dcb->evq.prev = NULL;
-        pollStats.evq_length--;
-    }
-
-    if (DCB_POLL_BUSY(dcb))
-    {
-        if (dcb->evq.pending_events == 0)
-        {
-            pollStats.evq_pending++;
-        }
-        dcb->evq.pending_events |= ev;
-    }
-    else
-    {
-        dcb->evq.pending_events = ev;
-        dcb->evq.inserted = hkheartbeat;
-        if (eventq)
-        {
-            dcb->evq.prev = eventq->evq.prev;
-            eventq->evq.prev->evq.next = dcb;
-            eventq->evq.prev = dcb;
-            dcb->evq.next = eventq;
-        }
-        else
-        {
-            eventq = dcb;
-            dcb->evq.prev = dcb;
-            dcb->evq.next = dcb;
-        }
-        pollStats.evq_length++;
-        pollStats.evq_pending++;
-        dcb->evq.inserted = hkheartbeat;
-        if (pollStats.evq_length > pollStats.evq_max)
-        {
-            pollStats.evq_max = pollStats.evq_length;
-        }
-    }
-    spinlock_release(&pollqlock);
+    poll_add_event_to_dcb(dcb, NULL, EPOLLIN);
 }
 
 /*
@@ -1567,42 +1501,7 @@ poll_fake_hangup_event(DCB *dcb)
 #else
     uint32_t ev = EPOLLHUP;
 #endif
-
-    spinlock_acquire(&pollqlock);
-    if (DCB_POLL_BUSY(dcb))
-    {
-        if (dcb->evq.pending_events == 0)
-        {
-            pollStats.evq_pending++;
-        }
-        dcb->evq.pending_events |= ev;
-    }
-    else
-    {
-        dcb->evq.pending_events = ev;
-        dcb->evq.inserted = hkheartbeat;
-        if (eventq)
-        {
-            dcb->evq.prev = eventq->evq.prev;
-            eventq->evq.prev->evq.next = dcb;
-            eventq->evq.prev = dcb;
-            dcb->evq.next = eventq;
-        }
-        else
-        {
-            eventq = dcb;
-            dcb->evq.prev = dcb;
-            dcb->evq.next = dcb;
-        }
-        pollStats.evq_length++;
-        pollStats.evq_pending++;
-        dcb->evq.inserted = hkheartbeat;
-        if (pollStats.evq_length > pollStats.evq_max)
-        {
-            pollStats.evq_max = pollStats.evq_length;
-        }
-    }
-    spinlock_release(&pollqlock);
+    poll_add_event_to_dcb(dcb, NULL, ev);
 }
 
 /**
@@ -1696,14 +1595,14 @@ poll_get_stat(POLL_STAT stat)
         return ts_stats_sum(pollStats.n_accept);
     case POLL_STAT_EVQ_LEN:
         return pollStats.evq_length;
-    case POLL_STAT_EVQ_PENDING:
-        return pollStats.evq_pending;
     case POLL_STAT_EVQ_MAX:
         return pollStats.evq_max;
     case POLL_STAT_MAX_QTIME:
         return (int)queueStats.maxqtime;
     case POLL_STAT_MAX_EXECTIME:
         return (int)queueStats.maxexectime;
+    default:
+        break;
     }
     return 0;
 }
diff --git a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
index cc943a460..67b602cdc 100644
--- a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
+++ b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
@@ -599,10 +599,8 @@ gw_read_backend_event(DCB *dcb)
             if (proto->protocol_auth_state == MXS_AUTH_STATE_COMPLETE)
             {
                 /** Authentication completed successfully */
-                spinlock_acquire(&dcb->authlock);
                 GWBUF *localq = dcb->delayq;
                 dcb->delayq = NULL;
-                spinlock_release(&dcb->authlock);
 
                 if (localq)
                 {
@@ -746,9 +744,9 @@ gw_read_and_write(DCB *dcb)
     {
         GWBUF *tmp = modutil_get_complete_packets(&read_buffer);
         /* Put any residue into the read queue */
-        spinlock_acquire(&dcb->authlock);
+
         dcb->dcb_readqueue = read_buffer;
-        spinlock_release(&dcb->authlock);
+
         if (tmp == NULL)
         {
             /** No complete packets */
@@ -1012,7 +1010,7 @@ static int gw_MySQLWrite_backend(DCB *dcb, GWBUF *queue)
 
             gwbuf_free(queue);
             rc = 0;
-            spinlock_release(&dcb->authlock);
+
             break;
 
         case MXS_AUTH_STATE_COMPLETE:
@@ -1027,7 +1025,7 @@ static int gw_MySQLWrite_backend(DCB *dcb, GWBUF *queue)
                       dcb->fd,
                       STRPROTOCOLSTATE(backend_protocol->protocol_auth_state));
 
-            spinlock_release(&dcb->authlock);
+
             /**
              * Statement type is used in readwrite split router.
              * Command is *not* set for readconn router.
@@ -1082,7 +1080,7 @@ static int gw_MySQLWrite_backend(DCB *dcb, GWBUF *queue)
              * connected with auth ok
              */
             backend_set_delayqueue(dcb, queue);
-            spinlock_release(&dcb->authlock);
+
             rc = 1;
         }
         break;
@@ -1807,9 +1805,9 @@ static GWBUF* process_response_data(DCB* dcb,
 
                     /** Store the already read data into the readqueue of the DCB
                      * and restore the response status to the initial number of packets */
-                    spinlock_acquire(&dcb->authlock);
+
                     dcb->dcb_readqueue = gwbuf_append(outbuf, dcb->dcb_readqueue);
-                    spinlock_release(&dcb->authlock);
+
                     protocol_set_response_status(p, initial_packets, initial_bytes);
                     return NULL;
                 }
diff --git a/server/modules/protocol/MySQL/MySQLClient/mysql_client.c b/server/modules/protocol/MySQL/MySQLClient/mysql_client.c
index df10a1d1f..94040b3a3 100644
--- a/server/modules/protocol/MySQL/MySQLClient/mysql_client.c
+++ b/server/modules/protocol/MySQL/MySQLClient/mysql_client.c
@@ -433,19 +433,19 @@ int gw_read_client_event(DCB* dcb)
          * will be changed to MYSQL_IDLE (see below).
          *
          */
-        case MXS_AUTH_STATE_MESSAGE_READ:
-            /* After this call read_buffer will point to freed data */
-            if (nbytes_read < 3 || (0 == max_bytes && nbytes_read <
-                                    (MYSQL_GET_PACKET_LEN((uint8_t *) GWBUF_DATA(read_buffer)) + 4)) ||
-                (0 != max_bytes && nbytes_read < max_bytes))
-            {
-                spinlock_acquire(&dcb->authlock);
-                dcb->dcb_readqueue = read_buffer;
-                spinlock_release(&dcb->authlock);
-                return 0;
-            }
-            return_code = gw_read_do_authentication(dcb, read_buffer, nbytes_read);
-            break;
+    case MXS_AUTH_STATE_MESSAGE_READ:
+        /* After this call read_buffer will point to freed data */
+        if (nbytes_read < 3 || (0 == max_bytes && nbytes_read <
+            (MYSQL_GET_PACKET_LEN((uint8_t *) GWBUF_DATA(read_buffer)) + 4)) ||
+            (0 != max_bytes && nbytes_read < max_bytes))
+        {
+
+            dcb->dcb_readqueue = read_buffer;
+
+            return 0;
+        }
+        return_code = gw_read_do_authentication(dcb, read_buffer, nbytes_read);
+        break;
 
         /**
          *
@@ -861,9 +861,9 @@ gw_read_normal_data(DCB *dcb, GWBUF *read_buffer, int nbytes_read)
         if (nbytes_read < 3 || nbytes_read <
             (MYSQL_GET_PACKET_LEN((uint8_t *) GWBUF_DATA(read_buffer)) + 4))
         {
-            spinlock_acquire(&dcb->authlock);
+
             dcb->dcb_readqueue = read_buffer;
-            spinlock_release(&dcb->authlock);
+
             return 0;
         }
         gwbuf_set_type(read_buffer, GWBUF_TYPE_MYSQL);
@@ -904,9 +904,9 @@ gw_read_finish_processing(DCB *dcb, GWBUF *read_buffer, uint64_t capabilities)
         {
             /* Must have been data left over */
             /* Add incomplete mysql packet to read queue */
-            spinlock_acquire(&dcb->authlock);
+
             dcb->dcb_readqueue = gwbuf_append(dcb->dcb_readqueue, read_buffer);
-            spinlock_release(&dcb->authlock);
+
         }
     }
     else if (NULL != session->router_session || (rcap_type_required(capabilities, RCAP_TYPE_NO_RSESSION)))
diff --git a/server/modules/protocol/MySQL/mysql_common.c b/server/modules/protocol/MySQL/mysql_common.c
index 9dad9552a..04618cb68 100644
--- a/server/modules/protocol/MySQL/mysql_common.c
+++ b/server/modules/protocol/MySQL/mysql_common.c
@@ -1039,9 +1039,9 @@ bool read_complete_packet(DCB *dcb, GWBUF **readbuf)
         if (localbuf)
         {
             /** Store any extra data in the DCB's readqueue */
-            spinlock_acquire(&dcb->authlock);
+
             dcb->dcb_readqueue = gwbuf_append(dcb->dcb_readqueue, localbuf);
-            spinlock_release(&dcb->authlock);
+
         }
     }
 
@@ -1061,7 +1061,6 @@ bool gw_get_shared_session_auth_info(DCB* dcb, MYSQL_session* session)
     CHK_DCB(dcb);
     CHK_SESSION(dcb->session);
 
-    spinlock_acquire(&dcb->session->ses_lock);
 
     if (dcb->session->state != SESSION_STATE_ALLOC &&
         dcb->session->state != SESSION_STATE_DUMMY)
@@ -1076,7 +1075,7 @@ bool gw_get_shared_session_auth_info(DCB* dcb, MYSQL_session* session)
                   pthread_self(), dcb->session->state);
         rval = false;
     }
-    spinlock_release(&dcb->session->ses_lock);
+
     return rval;
 }
 
diff --git a/server/modules/routing/maxinfo/maxinfo_exec.c b/server/modules/routing/maxinfo/maxinfo_exec.c
index 3ede0d05b..b77cc148f 100644
--- a/server/modules/routing/maxinfo/maxinfo_exec.c
+++ b/server/modules/routing/maxinfo/maxinfo_exec.c
@@ -1044,15 +1044,6 @@ maxinfo_event_queue_length()
     return poll_get_stat(POLL_STAT_EVQ_LEN);
 }
 
-/**
- * Interface to poll stats for event pending queue length
- */
-static int
-maxinfo_event_pending_queue_length()
-{
-    return poll_get_stat(POLL_STAT_EVQ_PENDING);
-}
-
 /**
  * Interface to poll stats for max event queue length
  */
@@ -1108,7 +1099,6 @@ static struct
     { "Error_events", VT_INT, (STATSFUNC)maxinfo_error_events },
     { "Accept_events", VT_INT, (STATSFUNC)maxinfo_accept_events },
     { "Event_queue_length", VT_INT, (STATSFUNC)maxinfo_event_queue_length },
-    { "Pending_events", VT_INT, (STATSFUNC)maxinfo_event_pending_queue_length },
     { "Max_event_queue_length", VT_INT, (STATSFUNC)maxinfo_max_event_queue_length },
     { "Max_event_queue_time", VT_INT, (STATSFUNC)maxinfo_max_event_queue_time },
     { "Max_event_execution_time", VT_INT, (STATSFUNC)maxinfo_max_event_exec_time },

From f77f78360e08c95e95424782e09ebea8f336c6fd Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Tue, 25 Oct 2016 21:23:39 +0300
Subject: [PATCH 17/42] Insert fake events under a lock

The thread-specific spinlock needs to be acquired before a fake event is
inserted from a non-polling thread. The usual situation is when a monitor
thread inserts a hangup event for a DCB.

Other, less common cases are when session timeouts have been enabled and
the DCB needs to be closed. Here it is better to insert a fake hangup
event instead of directly closing the DCB from an external thread.
---
 server/core/poll.c    | 41 +++++++++++++++++++++++++++++++++++------
 server/core/session.c |  3 ++-
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/server/core/poll.c b/server/core/poll.c
index cfa316a2e..665b39938 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -94,6 +94,7 @@ typedef struct fake_event
 static int *epoll_fd;    /*< The epoll file descriptor */
 static int next_epoll_fd = 0; /*< Which thread handles the next DCB */
 static fake_event_t **fake_events; /*< Thread-specific fake event queue */
+static SPINLOCK      *fake_event_lock;
 static int do_shutdown = 0;  /*< Flag the shutdown of the poll subsystem */
 static GWBITMASK poll_mask;
 #if MUTEX_EPOLL
@@ -235,11 +236,21 @@ poll_init()
         }
     }
 
-    if ((fake_events = MXS_CALLOC(sizeof(fake_event_t*), n_threads)) == NULL)
+    if ((fake_events = MXS_CALLOC(n_threads, sizeof(fake_event_t*))) == NULL)
     {
         exit(-1);
     }
 
+    if ((fake_event_lock = MXS_CALLOC(n_threads, sizeof(SPINLOCK))) == NULL)
+    {
+        exit(-1);
+    }
+
+    for (int i = 0; i < n_threads; i++)
+    {
+        spinlock_init(&fake_event_lock[i]);
+    }
+
     memset(&pollStats, 0, sizeof(pollStats));
     memset(&queueStats, 0, sizeof(queueStats));
     bitmask_init(&poll_mask);
@@ -719,17 +730,28 @@ poll_waitevents(void *arg)
             process_pollq(thread_id, &events[i]);
         }
 
-        /** Process fake events */
-        while (fake_events[thread_id])
-        {
-            fake_event_t *event = fake_events[thread_id];
-            fake_events[thread_id] = fake_events[thread_id]->next;
+        fake_event_t *event = NULL;
 
+        /** It is very likely that the queue is empty so to avoid hitting the
+         * spinlock every time we receive events, we only do a dirty read. Currently,
+         * only the monitors inject fake events from external threads. */
+        if (fake_events[thread_id])
+        {
+            spinlock_acquire(&fake_event_lock[thread_id]);
+            event = fake_events[thread_id];
+            fake_events[thread_id] = NULL;
+            spinlock_release(&fake_event_lock[thread_id]);
+        }
+
+        /** Process fake events */
+        while (event)
+        {
             struct epoll_event ev;
             event->dcb->dcb_fakequeue = event->data;
             ev.data.ptr = event->dcb;
             ev.events = event->event;
             process_pollq(thread_id, &ev);
+            event = event->next;
         }
 
         if (check_timeouts && hkheartbeat >= next_timeout_check)
@@ -1436,6 +1458,11 @@ static void poll_add_event_to_dcb(DCB*       dcb,
 
         int thr = dcb->owner;
 
+        /** It is possible that a housekeeper or a monitor thread inserts a fake
+         * event into the thread's event queue which is why the operation needs
+         * to be protected by a spinlock */
+        spinlock_acquire(&fake_event_lock[thr]);
+
         if (fake_events[thr])
         {
             fake_events[thr]->tail->next = event;
@@ -1445,6 +1472,8 @@ static void poll_add_event_to_dcb(DCB*       dcb,
         {
             fake_events[thr] = event;
         }
+
+        spinlock_release(&fake_event_lock[thr]);
     }
 }
 
diff --git a/server/core/session.c b/server/core/session.c
index ac29c884a..e0bf2a47d 100644
--- a/server/core/session.c
+++ b/server/core/session.c
@@ -42,6 +42,7 @@
 #include <maxscale/atomic.h>
 #include <maxscale/log_manager.h>
 #include <maxscale/housekeeper.h>
+#include <maxscale/poll.h>
 
 /* This list of all sessions */
 LIST_CONFIG SESSIONlist =
@@ -927,7 +928,7 @@ void process_idle_sessions()
                 if (all_session->service && all_session->client_dcb && all_session->client_dcb->state == DCB_STATE_POLLING &&
                     hkheartbeat - all_session->client_dcb->last_read > all_session->service->conn_idle_timeout * 10)
                 {
-                    dcb_close(all_session->client_dcb);
+                    poll_fake_hangup_event(all_session->client_dcb);
                 }
 
                 current = list_iterate(&SESSIONlist, current);

From 076b810c1ee0aaf5b92b3d478d12f1235abd85c0 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Wed, 26 Oct 2016 21:55:24 +0300
Subject: [PATCH 18/42] Create thread specific zombie queues

Because each thread has their own epoll file descriptor and only one
thread can process a DCB, it makes sense to move to a per thread zombie
queue. This removes one of the last restrictions on scalability.
---
 include/maxscale/dcb.h     |  25 +++-
 server/core/dcb.c          | 264 ++++++++-----------------------------
 server/core/gateway.cc     |   1 +
 server/core/poll.c         |   4 +-
 server/core/session.c      |   2 +
 server/core/test/testdcb.c |   1 -
 6 files changed, 81 insertions(+), 216 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index 93483d227..f687ae0b4 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -249,7 +249,6 @@ typedef struct dcb
     GWBUF           *delayq;        /**< Delay Backend Write Data Queue */
     GWBUF           *dcb_readqueue; /**< read queue for storing incomplete reads */
     GWBUF           *dcb_fakequeue; /**< Fake event queue for generated events */
-    SPINLOCK        authlock;       /**< Generic Authorization spinlock */
 
     DCBSTATS        stats;          /**< DCB related statistics */
     unsigned int    dcb_server_status; /*< the server role indicator from SERVER */
@@ -285,7 +284,7 @@ typedef struct dcb
 #define DCB_INIT {.dcb_chk_top = CHK_NUM_DCB, .dcb_initlock = SPINLOCK_INIT, \
     .evq = DCBEVENTQ_INIT, .ipv4 = {0}, .func = {0}, .authfunc = {0}, \
     .writeqlock = SPINLOCK_INIT, .delayqlock = SPINLOCK_INIT, \
-    .authlock = SPINLOCK_INIT, .stats = {0}, .memdata = DCBMM_INIT, \
+    .stats = {0}, .memdata = DCBMM_INIT, \
     .cb_lock = SPINLOCK_INIT, .pollinlock = SPINLOCK_INIT, \
     .fd = DCBFD_CLOSED, .stats = DCBSTATS_INIT, .ssl_state = SSL_HANDSHAKE_UNKNOWN, \
     .state = DCB_STATE_ALLOC, .polloutlock = SPINLOCK_INIT, .dcb_chk_tail = CHK_NUM_DCB, \
@@ -316,7 +315,13 @@ typedef enum
 
 #define DCB_POLL_BUSY(x)                ((x)->evq.next != NULL)
 
-DCB *dcb_get_zombies(void);
+/**
+ * @brief DCB system initialization function
+ *
+ * This function needs to be the first function call into this system.
+ */
+void dcb_global_init();
+
 int dcb_write(DCB *, GWBUF *);
 DCB *dcb_accept(DCB *listener, GWPROTOCOL *protocol_funcs);
 bool dcb_pre_alloc(int number);
@@ -328,7 +333,17 @@ DCB *dcb_clone(DCB *);
 int dcb_read(DCB *, GWBUF **, int);
 int dcb_drain_writeq(DCB *);
 void dcb_close(DCB *);
-DCB *dcb_process_zombies(int);              /* Process Zombies except the one behind the pointer */
+
+/**
+ * @brief Process zombie DCBs
+ *
+ * This should only be called from a polling thread in poll.c when no events
+ * are being processed.
+ *
+ * @param threadid Thread ID of the poll thread
+ */
+void dcb_process_zombies(int threadid);
+
 void printAllDCBs();                         /* Debug to print all DCB in the system */
 void printDCB(DCB *);                        /* Debug print routine */
 void dprintDCBList(DCB *);                 /* Debug print DCB list statistics */
@@ -345,8 +360,6 @@ int dcb_remove_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON, voi
 int dcb_isvalid(DCB *);                     /* Check the DCB is in the linked list */
 int dcb_count_by_usage(DCB_USAGE);          /* Return counts of DCBs */
 int dcb_persistent_clean_count(DCB *, bool);      /* Clean persistent and return count */
-
-void dcb_call_foreach (struct server* server, DCB_REASON reason);
 void dcb_hangup_foreach (struct server* server);
 size_t dcb_get_session_id(DCB* dcb);
 bool dcb_get_ses_log_info(DCB* dcb, size_t* sesid, int* enabled_logs);
diff --git a/server/core/dcb.c b/server/core/dcb.c
index 1ada04d25..df6068e6f 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -100,18 +100,30 @@ static LIST_CONFIG DCBlist =
 /* A DCB with null values, used for initialization */
 static DCB dcb_initialized = DCB_INIT;
 
-static  DCB             *zombies = NULL;
-static  int             nzombies = 0;
+static  DCB           **zombies;
+static  int            *nzombies;
 static  int             maxzombies = 0;
 static  SPINLOCK        zombiespin = SPINLOCK_INIT;
 
+void dcb_global_init()
+{
+    int nthreads = config_threadcount();
+
+    if ((zombies = MXS_CALLOC(nthreads, sizeof(DCB*))) == NULL ||
+        (nzombies = MXS_CALLOC(nthreads, sizeof(int))) == NULL)
+    {
+        MXS_OOM();
+        raise(SIGABRT);
+    }
+}
+
 static void dcb_initialize(void *dcb);
 static void dcb_final_free(DCB *dcb);
 static void dcb_call_callback(DCB *dcb, DCB_REASON reason);
 static int  dcb_null_write(DCB *dcb, GWBUF *buf);
 static int  dcb_null_auth(DCB *dcb, SERVER *server, SESSION *session, GWBUF *buf);
 static inline DCB * dcb_find_in_list(DCB *dcb);
-static inline void dcb_process_victim_queue(DCB *listofdcb);
+static inline void dcb_process_victim_queue(int threadid);
 static void dcb_stop_polling_and_shutdown (DCB *dcb);
 static bool dcb_maybe_add_persistent(DCB *);
 static inline bool dcb_write_parameter_check(DCB *dcb, GWBUF *queue);
@@ -166,17 +178,6 @@ bool dcb_get_ses_log_info(
     return false;
 }
 
-/**
- * Return the pointer to the list of zombie DCB's
- *
- * @return Zombies DCB list
- */
-DCB *
-dcb_get_zombies(void)
-{
-    return zombies;
-}
-
 /*
  * @brief Pre-allocate memory for a number of DCBs
  *
@@ -455,109 +456,12 @@ dcb_free_all_memory(DCB *dcb)
  *
  * @param       threadid        The thread ID of the caller
  */
-DCB *
-dcb_process_zombies(int threadid)
+void dcb_process_zombies(int threadid)
 {
-    DCB *zombiedcb;
-    DCB *previousdcb = NULL, *nextdcb;
-    DCB *listofdcb = NULL;
-
-    /**
-     * Perform a dirty read to see if there is anything in the queue.
-     * This avoids threads hitting the queue spinlock when the queue
-     * is empty. This will really help when the only entry is being
-     * freed, since the queue is updated before the expensive call to
-     * dcb_final_free.
-     */
-    if (!zombies)
+    if (zombies[threadid])
     {
-        return NULL;
+        dcb_process_victim_queue(threadid);
     }
-
-    /*
-     * Process the zombie queue and create a list of DCB's that can be
-     * finally freed. This processing is down under a spinlock that
-     * will prevent new entries being added to the zombie queue. Therefore
-     * we do not want to do any expensive operations under this spinlock
-     * as it will block other threads. The expensive operations will be
-     * performed on the victim queue within holding the zombie queue
-     * spinlock.
-     */
-    spinlock_acquire(&zombiespin);
-    zombiedcb = zombies;
-    while (zombiedcb)
-    {
-        CHK_DCB(zombiedcb);
-        nextdcb = zombiedcb->memdata.next;
-        /*
-         * Skip processing of DCB's that are
-         * in the event queue waiting to be processed.
-         */
-        if (zombiedcb->evq.next || zombiedcb->evq.prev)
-        {
-            previousdcb = zombiedcb;
-        }
-        else
-        {
-
-            if (bitmask_clear_without_spinlock(&zombiedcb->memdata.bitmask, threadid))
-            {
-                /**
-                 * Remove the DCB from the zombie queue
-                 * and call the final free routine for the
-                 * DCB
-                 *
-                 * zombiedcb is the DCB we are processing
-                 * previousdcb is the previous DCB on the zombie
-                 * queue or NULL if the DCB is at the head of the
-                 * queue.  Remove zombiedcb from the zombies list.
-                 */
-                if (NULL == previousdcb)
-                {
-                    zombies = zombiedcb->memdata.next;
-                }
-                else
-                {
-                    previousdcb->memdata.next = zombiedcb->memdata.next;
-                }
-
-                MXS_DEBUG("%lu [%s] Remove dcb "
-                          "%p fd %d in state %s from the "
-                          "list of zombies.",
-                          pthread_self(),
-                          __func__,
-                          zombiedcb,
-                          zombiedcb->fd,
-                          STRDCBSTATE(zombiedcb->state));
-                /*<
-                 * Move zombie dcb to linked list of victim dcbs.
-                 * The variable dcb is used to hold the last DCB
-                 * to have been added to the linked list, or NULL
-                 * if none has yet been added.  If the list
-                 * (listofdcb) is not NULL, then it follows that
-                 * dcb will also not be null.
-                 */
-                nzombies--;
-                zombiedcb->memdata.next = listofdcb;
-                listofdcb = zombiedcb;
-            }
-            else
-            {
-                /* Since we didn't remove this dcb from the zombies
-                   list, we need to advance the previous pointer */
-                previousdcb = zombiedcb;
-            }
-        }
-        zombiedcb = nextdcb;
-    }
-    spinlock_release(&zombiespin);
-
-    if (listofdcb)
-    {
-        dcb_process_victim_queue(listofdcb);
-    }
-
-    return zombies;
 }
 
 /**
@@ -570,17 +474,17 @@ dcb_process_zombies(int threadid)
  * @param       listofdcb       The first victim DCB
  */
 static inline void
-dcb_process_victim_queue(DCB *listofdcb)
+dcb_process_victim_queue(int threadid)
 {
-    DCB *dcb = listofdcb;
+    /** Grab the zombie queue to a local queue. This allows us to add back DCBs
+     * that should not yet be closed. */
+    DCB *dcblist = zombies[threadid];
+    zombies[threadid] = NULL;
 
-    while (dcb != NULL)
+    while (dcblist)
     {
-        DCB *nextdcb;
-        /*<
-         * Stop dcb's listening and modify state accordingly.
-         */
-        spinlock_acquire(&dcb->dcb_initlock);
+        DCB *dcb = dcblist;
+
         if (dcb->state == DCB_STATE_POLLING  || dcb->state == DCB_STATE_LISTENING)
         {
             if (dcb->state == DCB_STATE_LISTENING)
@@ -595,34 +499,28 @@ dcb_process_victim_queue(DCB *listofdcb)
             }
             else
             {
-                /* Must be DCB_STATE_POLLING */
-                spinlock_release(&dcb->dcb_initlock);
                 if (0 == dcb->persistentstart && dcb_maybe_add_persistent(dcb))
                 {
                     /* Have taken DCB into persistent pool, no further killing */
-                    dcb = dcb->memdata.next;
-                    continue;
+                    dcblist = dcblist->memdata.next;
                 }
                 else
                 {
-                    DCB *next2dcb;
+                    /** The DCB is still polling. Shut it down and process it later. */
                     dcb_stop_polling_and_shutdown(dcb);
-                    spinlock_acquire(&zombiespin);
-                    bitmask_copy(&dcb->memdata.bitmask, poll_bitmask());
-                    next2dcb = dcb->memdata.next;
-                    dcb->memdata.next = zombies;
-                    zombies = dcb;
-                    nzombies++;
-                    if (nzombies > maxzombies)
-                    {
-                        maxzombies = nzombies;
-                    }
-                    spinlock_release(&zombiespin);
-                    dcb = next2dcb;
-                    continue;
+                    DCB *newzombie = dcblist;
+                    dcblist = dcblist->memdata.next;
+                    newzombie->memdata.next = zombies[threadid];
+                    zombies[threadid] = newzombie;
                 }
+
+                /** Nothing to do here but to process the next DCB */
+                continue;
             }
         }
+
+        nzombies[threadid]--;
+
         /*
          * Into the final close logic, so if DCB is for backend server, we
          * must decrement the number of current connections.
@@ -690,11 +588,14 @@ dcb_process_victim_queue(DCB *listofdcb)
                              &mxs_log_tls.li_sesid,
                              &mxs_log_tls.li_enabled_priorities);
 
+        /** Move to the next DCB before freeing the previous one */
+        dcblist = dcblist->memdata.next;
+
+        /** After these calls, the DCB should be treated as if it were freed.
+         * Whether it is actually freed depends on the type of the DCB and how
+         * many DCBs are linked to it via the SESSION object. */
         dcb->state = DCB_STATE_DISCONNECTED;
-        nextdcb = dcb->memdata.next;
-        spinlock_release(&dcb->dcb_initlock);
         dcb_final_free(dcb);
-        dcb = nextdcb;
     }
     /** Reset threads session data */
     mxs_log_tls.li_sesid = 0;
@@ -1748,20 +1649,15 @@ dcb_close(DCB *dcb)
     if (dcb->state == DCB_STATE_ALLOC && dcb->fd == DCBFD_CLOSED)
     {
         dcb_final_free(dcb);
-        return;
     }
-
     /*
      * If DCB is in persistent pool, mark it as an error and exit
      */
-    if (dcb->persistentstart > 0)
+    else if (dcb->persistentstart > 0)
     {
         dcb->dcb_errhandle_called = true;
-        return;
     }
-
-    spinlock_acquire(&zombiespin);
-    if (!dcb->dcb_is_zombie)
+    else if (!dcb->dcb_is_zombie)
     {
         if (DCB_ROLE_BACKEND_HANDLER == dcb->dcb_role && 0 == dcb->persistentstart
             && dcb->server && DCB_STATE_POLLING == dcb->state)
@@ -1777,23 +1673,21 @@ dcb_close(DCB *dcb)
         /*<
          * Add closing dcb to the top of the list, setting zombie marker
          */
+        int owner = dcb->owner;
         dcb->dcb_is_zombie = true;
-        dcb->memdata.next = zombies;
-        zombies = dcb;
-        nzombies++;
-        if (nzombies > maxzombies)
+        dcb->memdata.next = zombies[owner];
+        zombies[owner] = dcb;
+        nzombies[owner]++;
+        if (nzombies[owner] > maxzombies)
         {
-            maxzombies = nzombies;
-        }
-        /*< Set bit for each maxscale thread. This should be done before
-         * the state is changed, so as to protect the DCB from premature
-         * destruction. */
-        if (dcb->server)
-        {
-            bitmask_copy(&dcb->memdata.bitmask, poll_bitmask());
+            maxzombies = nzombies[owner];
         }
     }
-    spinlock_release(&zombiespin);
+    else
+    {
+        /** DCBs in the zombie queue can still receive events which means that
+         * a DCB can be closed multiple times while it's in the zombie queue. */
+    }
 }
 
 /**
@@ -2621,50 +2515,6 @@ dcb_isvalid(DCB *dcb)
     return (int)list_is_entry_in_use(&DCBlist, (list_entry_t *)dcb);
 }
 
-/**
- * Call all the callbacks on all DCB's that match the server and the reason given
- *
- * @param reason        The DCB_REASON that triggers the callback
- */
-void
-dcb_call_foreach(struct server* server, DCB_REASON reason)
-{
-    MXS_DEBUG("%lu [dcb_call_foreach]", pthread_self());
-
-    switch (reason) {
-    case DCB_REASON_DRAINED:
-    case DCB_REASON_HIGH_WATER:
-    case DCB_REASON_LOW_WATER:
-    case DCB_REASON_ERROR:
-    case DCB_REASON_HUP:
-    case DCB_REASON_NOT_RESPONDING:
-    {
-        DCB *dcb;
-        list_entry_t *current;
-
-        current = list_start_iteration(&DCBlist);
-
-        while (current)
-        {
-            dcb = (DCB *)current;
-            spinlock_acquire(&dcb->dcb_initlock);
-            if (dcb->state == DCB_STATE_POLLING && dcb->server &&
-                strcmp(dcb->server->unique_name,server->unique_name) == 0)
-            {
-                dcb_call_callback(dcb, DCB_REASON_NOT_RESPONDING);
-            }
-            spinlock_release(&dcb->dcb_initlock);
-            current = list_iterate(&DCBlist, current);
-        }
-        break;
-    }
-
-    default:
-        break;
-    }
-    return;
-}
-
 /**
  * Call all the callbacks on all DCB's that match the server and the reason given
  *
diff --git a/server/core/gateway.cc b/server/core/gateway.cc
index 7f7a9b10e..f2808c328 100644
--- a/server/core/gateway.cc
+++ b/server/core/gateway.cc
@@ -1941,6 +1941,7 @@ int main(int argc, char **argv)
     /* Init MaxScale poll system */
     poll_init();
 
+    dcb_global_init();
     /**
      * Init mysql thread context for main thread as well. Needed when users
      * are queried from backends.
diff --git a/server/core/poll.c b/server/core/poll.c
index 665b39938..e2cc58d0c 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -450,6 +450,7 @@ poll_remove_dcb(DCB *dcb)
      */
     dcbfd = dcb->fd;
     spinlock_release(&dcb->dcb_initlock);
+
     if (dcbfd > 0)
     {
         rc = epoll_ctl(epoll_fd[dcb->owner], EPOLL_CTL_DEL, dcbfd, &ev);
@@ -842,12 +843,11 @@ poll_set_maxwait(unsigned int maxwait)
 static int
 process_pollq(int thread_id, struct epoll_event *event)
 {
-    int found = 0;
     uint32_t ev = event->events;
     unsigned long qtime;
 
     DCB *dcb = event->data.ptr;
-
+    ss_dassert(dcb->owner == thread_id);
 #if PROFILE_POLL
     memlog_log(plog, hkheartbeat - dcb->evq.inserted);
 #endif
diff --git a/server/core/session.c b/server/core/session.c
index e0bf2a47d..98ab5bdea 100644
--- a/server/core/session.c
+++ b/server/core/session.c
@@ -319,6 +319,8 @@ session_link_dcb(SESSION *session, DCB *dcb)
     }
     atomic_add(&session->refcount, 1);
     dcb->session = session;
+    /** Move this DCB under the same thread */
+    dcb->owner = session->client_dcb->owner;
     spinlock_release(&session->ses_lock);
     return true;
 }
diff --git a/server/core/test/testdcb.c b/server/core/test/testdcb.c
index 756487297..cb0dddefb 100644
--- a/server/core/test/testdcb.c
+++ b/server/core/test/testdcb.c
@@ -67,7 +67,6 @@ test1()
     ss_dfprintf(stderr, "\t..done\nMake clone DCB a zombie");
     clone->state = DCB_STATE_NOPOLLING;
     dcb_close(clone);
-    ss_info_dassert(dcb_get_zombies() == clone, "Clone DCB must be start of zombie list now");
     ss_dfprintf(stderr, "\t..done\nProcess the zombies list");
     dcb_process_zombies(0);
     ss_dfprintf(stderr, "\t..done\nCheck clone no longer valid");

From 68d3fc10925dc7b748130a41b9c440b76600d112 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Wed, 26 Oct 2016 23:42:33 +0300
Subject: [PATCH 19/42] Remove the DCB write queue locking

Since only one thread can append to the DCBs write queue at a time, there
is no need to lock it.
---
 server/core/dcb.c                             | 24 ++++---------------
 .../MySQL/MySQLBackend/mysql_backend.c        |  2 --
 2 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/server/core/dcb.c b/server/core/dcb.c
index df6068e6f..6ac136048 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -1122,19 +1122,14 @@ dcb_basic_read_SSL(DCB *dcb, int *nsingleread)
             *nsingleread = -1;
             return NULL;
         }
-        spinlock_acquire(&dcb->writeqlock);
+
         /* If we were in a retry situation, need to clear flag and attempt write */
         if (dcb->ssl_read_want_write || dcb->ssl_read_want_read)
         {
             dcb->ssl_read_want_write = false;
             dcb->ssl_read_want_read = false;
-            spinlock_release(&dcb->writeqlock);
             dcb_drain_writeq(dcb);
         }
-        else
-        {
-            spinlock_release(&dcb->writeqlock);
-        }
         break;
 
     case SSL_ERROR_ZERO_RETURN:
@@ -1153,10 +1148,8 @@ dcb_basic_read_SSL(DCB *dcb, int *nsingleread)
                   pthread_self(),
                   __func__
                 );
-        spinlock_acquire(&dcb->writeqlock);
         dcb->ssl_read_want_write = false;
         dcb->ssl_read_want_read = true;
-        spinlock_release(&dcb->writeqlock);
         *nsingleread = 0;
         break;
 
@@ -1166,10 +1159,8 @@ dcb_basic_read_SSL(DCB *dcb, int *nsingleread)
                   pthread_self(),
                   __func__
                 );
-        spinlock_acquire(&dcb->writeqlock);
         dcb->ssl_read_want_write = true;
         dcb->ssl_read_want_read = false;
-        spinlock_release(&dcb->writeqlock);
         *nsingleread = 0;
         break;
 
@@ -1253,7 +1244,6 @@ dcb_write(DCB *dcb, GWBUF *queue)
         return 0;
     }
 
-    spinlock_acquire(&dcb->writeqlock);
     empty_queue = (dcb->writeq == NULL);
     /*
      * Add our data to the write queue.  If the queue already had data,
@@ -1261,10 +1251,10 @@ dcb_write(DCB *dcb, GWBUF *queue)
      * If it did not already have data, we call the drain write queue
      * function immediately to attempt to write the data.
      */
-    atomic_add(&dcb->writeqlen, gwbuf_length(queue));
+    dcb->writeqlen += gwbuf_length(queue);
     dcb->writeq = gwbuf_append(dcb->writeq, queue);
-    spinlock_release(&dcb->writeqlock);
     dcb->stats.n_buffered++;
+
     MXS_DEBUG("%lu [dcb_write] Append to writequeue. %d writes "
               "buffered for dcb %p in state %s fd %d",
               pthread_self(),
@@ -1494,7 +1484,6 @@ dcb_drain_writeq(DCB *dcb)
              */
             if (stop_writing)
             {
-                spinlock_acquire(&dcb->writeqlock);
                 dcb->writeq = gwbuf_append(local_writeq, dcb->writeq);
 
                 if (dcb->drain_called_while_busy)
@@ -1502,13 +1491,11 @@ dcb_drain_writeq(DCB *dcb)
                     local_writeq = dcb->writeq;
                     dcb->writeq = NULL;
                     dcb->drain_called_while_busy = false;
-                    spinlock_release(&dcb->writeqlock);
                     continue;
                 }
                 else
                 {
                     dcb->draining_flag = false;
-                    spinlock_release(&dcb->writeqlock);
                     goto wrap_up;
                 }
             }
@@ -1532,7 +1519,7 @@ wrap_up:
      */
     if (total_written)
     {
-        atomic_add(&dcb->writeqlen, -total_written);
+        dcb->writeqlen -= total_written;
 
         /* Check if the draining has taken us from above water to below water */
         if (above_water && dcb->writeqlen < dcb->low_water)
@@ -1566,7 +1553,6 @@ static GWBUF *
 dcb_grab_writeq(DCB *dcb, bool first_time)
 {
     GWBUF *local_writeq = NULL;
-    spinlock_acquire(&dcb->writeqlock);
 
     if (first_time && dcb->ssl_read_want_write)
     {
@@ -1583,7 +1569,7 @@ dcb_grab_writeq(DCB *dcb, bool first_time)
         dcb->draining_flag = local_writeq ? true : false;
         dcb->writeq = NULL;
     }
-    spinlock_release(&dcb->writeqlock);
+
     return local_writeq;
 }
 
diff --git a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
index 67b602cdc..ec4a3c204 100644
--- a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
+++ b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
@@ -884,13 +884,11 @@ static int gw_write_backend_event(DCB *dcb)
         uint8_t* data = NULL;
         bool com_quit = false;
 
-        spinlock_acquire(&dcb->writeqlock);
         if (dcb->writeq)
         {
             data = (uint8_t *) GWBUF_DATA(dcb->writeq);
             com_quit = MYSQL_IS_COM_QUIT(data);
         }
-        spinlock_release(&dcb->writeqlock);
 
         if (data)
         {

From 51842333d7b49a3bfaefaca1a3f78676df9f39f7 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Thu, 27 Oct 2016 11:00:45 +0300
Subject: [PATCH 20/42] Temporarily take out the listmanager from use

The listmanager code uses a global spinlock which might cause problems
with multiple threads.
---
 server/core/dcb.c      |  5 +++--
 server/core/gateway.cc |  5 +++++
 server/core/session.c  | 10 ++++------
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/server/core/dcb.c b/server/core/dcb.c
index 6ac136048..32863a88a 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -229,11 +229,12 @@ dcb_alloc(dcb_role_t role, SERV_LISTENER *listener)
 {
     DCB *newdcb;
 
-    if ((newdcb = (DCB *)list_find_free(&DCBlist, dcb_initialize)) == NULL)
+    if ((newdcb = (DCB *)MXS_MALLOC(sizeof(*newdcb))) == NULL)
     {
         return NULL;
     }
 
+    dcb_initialize(newdcb);
     newdcb->dcb_role = role;
     newdcb->listener = listener;
     newdcb->entry_is_ready = true;
@@ -440,7 +441,7 @@ dcb_free_all_memory(DCB *dcb)
     bitmask_free(&dcb->memdata.bitmask);
 
     /* We never free the actual DCB, it is available for reuse*/
-    list_free_entry(&DCBlist, (list_entry_t *)dcb);
+    MXS_FREE(dcb);
 
 }
 
diff --git a/server/core/gateway.cc b/server/core/gateway.cc
index f2808c328..de8096f98 100644
--- a/server/core/gateway.cc
+++ b/server/core/gateway.cc
@@ -1935,6 +1935,11 @@ int main(int argc, char **argv)
         goto return_main;
     }
 
+    /* Temporary - should use configuration values and test return value (bool)
+     * TODO: Enable the list manager code */
+    dcb_pre_alloc(1);
+    session_pre_alloc(1);
+
     /** Initialize statistics */
     ts_stats_init();
 
diff --git a/server/core/session.c b/server/core/session.c
index 98ab5bdea..87baf1f01 100644
--- a/server/core/session.c
+++ b/server/core/session.c
@@ -116,15 +116,14 @@ session_pre_alloc(int number)
 SESSION *
 session_alloc(SERVICE *service, DCB *client_dcb)
 {
-    SESSION *session;
+    SESSION *session = (SESSION *)(MXS_MALLOC(sizeof(*session)));
 
-    session = (SESSION *)list_find_free(&SESSIONlist, session_initialize);
-    ss_info_dassert(session != NULL, "Allocating memory for session failed.");
     if (NULL == session)
     {
-        MXS_OOM();
         return NULL;
     }
+    session_initialize(session);
+
     /** Assign a session id and increase */
     session->ses_id = (size_t)atomic_add(&session_id, 1) + 1;
     session->ses_is_child = (bool) DCB_IS_CLONE(client_dcb);
@@ -445,8 +444,7 @@ session_free(SESSION *session)
 static void
 session_final_free(SESSION *session)
 {
-    /* We never free the actual session, it is available for reuse*/
-    list_free_entry(&SESSIONlist, (list_entry_t *)session);
+    MXS_FREE(session);
 }
 
 /**

From abc0681248e11f4d90ebcf75817235433c033e0a Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Thu, 27 Oct 2016 15:13:53 +0300
Subject: [PATCH 21/42] Temporarily disable listmanager debug checks

Since the listmanager code isn't used, the debug assertions will always
fail. They should be disabled until the listmanager code can converted to
the per-thread model.
---
 include/maxscale/debug.h | 2 --
 server/core/dcb.c        | 5 ++++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/maxscale/debug.h b/include/maxscale/debug.h
index 9370def5c..64973e68e 100644
--- a/include/maxscale/debug.h
+++ b/include/maxscale/debug.h
@@ -474,7 +474,6 @@ typedef enum skygw_chk_t
         ss_info_dassert(d->dcb_chk_top == CHK_NUM_DCB &&        \
                 d->dcb_chk_tail == CHK_NUM_DCB,                 \
                         "Dcb under- or overflow");              \
-        CHK_MANAGED_LIST(d)                                     \
         }
 
 #define CHK_PROTOCOL(p) {                                               \
@@ -487,7 +486,6 @@ typedef enum skygw_chk_t
             ss_info_dassert(s->ses_chk_top == CHK_NUM_SESSION &&        \
                             s->ses_chk_tail == CHK_NUM_SESSION,         \
                             "Session under- or overflow");              \
-            CHK_MANAGED_LIST(s)                                         \
     }
 
 #define CHK_SERVER(s) {                                          \
diff --git a/server/core/dcb.c b/server/core/dcb.c
index 32863a88a..87b9bca5a 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -377,7 +377,8 @@ void
 dcb_free_all_memory(DCB *dcb)
 {
     DCB_CALLBACK *cb_dcb;
-    ss_dassert(dcb->entry_is_in_use);
+    // TODO: Uncomment once listmanager code is in use
+    //ss_dassert(dcb->entry_is_in_use);
 
     if (dcb->protocol && (!DCB_IS_CLONE(dcb)))
     {
@@ -1842,10 +1843,12 @@ void printAllDCBs()
 void
 dprintOneDCB(DCB *pdcb, DCB *dcb)
 {
+/* TODO: Uncomment once listmanager code is in use
     if (false == dcb->entry_is_in_use)
     {
         return;
     }
+*/
     dcb_printf(pdcb, "DCB: %p\n", (void *)dcb);
     dcb_printf(pdcb, "\tDCB state:          %s\n",
                gw_dcb_state2string(dcb->state));

From fe56e65903563f584dc11fa4d4729b500c78fbf7 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Mon, 31 Oct 2016 23:55:05 +0200
Subject: [PATCH 22/42] Use service capabilities in response processing

The MySQLBackend protocol now only checks for complete packets if the
service requires statement based routing. This should remove unnecessary
processing when data is only streamed from the backend to the client.
---
 .../MySQL/MySQLBackend/mysql_backend.c        | 135 +++++++++---------
 1 file changed, 69 insertions(+), 66 deletions(-)

diff --git a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
index ec4a3c204..555d8f385 100644
--- a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
+++ b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
@@ -673,6 +673,42 @@ gw_reply_on_error(DCB *dcb, mxs_auth_state_t state)
     gwbuf_free(errbuf);
 }
 
+/**
+ * @brief Check if a reply can be routed to the client
+ *
+ * @param Backend DCB
+ * @return True if session is ready for reply routing
+ */
+static inline bool session_ok_to_route(DCB *dcb)
+{
+    bool rval = false;
+
+    if (dcb->session->state == SESSION_STATE_ROUTER_READY &&
+        dcb->session->client_dcb != NULL &&
+        dcb->session->client_dcb->state == DCB_STATE_POLLING &&
+        (dcb->session->router_session ||
+         service_get_capabilities(dcb->session->service) & RCAP_TYPE_NO_RSESSION))
+    {
+        MySQLProtocol *client_protocol = (MySQLProtocol *)dcb->session->client_dcb->protocol;
+
+        if (client_protocol)
+        {
+            CHK_PROTOCOL(client_protocol);
+
+            if (client_protocol->protocol_auth_state == MXS_AUTH_STATE_COMPLETE)
+            {
+                rval = true;
+            }
+        }
+        else if (dcb->session->client_dcb->dcb_role == DCB_ROLE_INTERNAL)
+        {
+            rval = true;
+        }
+    }
+
+    return rval;
+}
+
 /**
  * @brief With authentication completed, read new data and write to backend
  *
@@ -686,7 +722,7 @@ gw_read_and_write(DCB *dcb)
     GWBUF *read_buffer = NULL;
     SESSION *session = dcb->session;
     int nbytes_read;
-    int return_code;
+    int return_code = 0;
 
     CHK_SESSION(session);
 
@@ -719,28 +755,24 @@ gw_read_and_write(DCB *dcb)
             session->state = SESSION_STATE_STOPPING;
             spinlock_release(&session->ses_lock);
         }
-        return_code = 0;
-        goto return_rc;
+        return 0;
     }
 
     nbytes_read = gwbuf_length(read_buffer);
     if (nbytes_read == 0)
     {
         ss_dassert(read_buffer == NULL);
-        goto return_rc;
+        return return_code;
     }
     else
     {
         ss_dassert(read_buffer != NULL);
     }
 
-    if (nbytes_read < 3)
-    {
-        dcb->dcb_readqueue = read_buffer;
-        return_code = 0;
-        goto return_rc;
-    }
+    /** Ask what type of input the router/filter chain expects */
+    uint64_t capabilities = service_get_capabilities(session->service);
 
+    if (rcap_type_required(capabilities, RCAP_TYPE_STMT_INPUT))
     {
         GWBUF *tmp = modutil_get_complete_packets(&read_buffer);
         /* Put any residue into the read queue */
@@ -750,19 +782,29 @@ gw_read_and_write(DCB *dcb)
         if (tmp == NULL)
         {
             /** No complete packets */
-            return_code = 0;
-            goto return_rc;
+            return 0;
         }
-        else
+
+        read_buffer = tmp;
+
+        if (rcap_type_required(capabilities, RCAP_TYPE_CONTIGUOUS_INPUT))
         {
-            read_buffer = tmp;
+            if ((tmp = gwbuf_make_contiguous(read_buffer)))
+            {
+                read_buffer = tmp;
+            }
+            else
+            {
+                /** Failed to make the buffer contiguous */
+                gwbuf_free(read_buffer);
+                poll_fake_hangup_event(dcb);
+                return 0;
+            }
         }
     }
 
     MySQLProtocol *proto = (MySQLProtocol *)dcb->protocol;
 
-    spinlock_acquire(&dcb->authlock);
-
     if (proto->ignore_reply)
     {
 
@@ -773,8 +815,6 @@ gw_read_and_write(DCB *dcb)
         proto->ignore_reply = false;
         gwbuf_free(read_buffer);
 
-        spinlock_release(&dcb->authlock);
-
         int rval = 0;
 
         if (result == MYSQL_REPLY_OK)
@@ -792,14 +832,13 @@ gw_read_and_write(DCB *dcb)
         return rval;
     }
 
-    spinlock_release(&dcb->authlock);
-
     /**
      * If protocol has session command set, concatenate whole
      * response into one buffer.
      */
     if (protocol_get_srv_command((MySQLProtocol *)dcb->protocol, false) != MYSQL_COM_UNDEFINED)
     {
+        ss_dassert(rcap_type_required(capabilities, RCAP_TYPE_STMT_INPUT));
         read_buffer = process_response_data(dcb, read_buffer, gwbuf_length(read_buffer));
         /**
          * Received incomplete response to session command.
@@ -807,64 +846,32 @@ gw_read_and_write(DCB *dcb)
          */
         if (!sescmd_response_complete(dcb))
         {
-            return_code = 0;
-            goto return_rc;
+            return 0;
         }
 
         if (!read_buffer)
         {
-            MXS_NOTICE("%lu [gw_read_backend_event] "
+            MXS_ERROR("%lu [gw_read_backend_event] "
                        "Read buffer unexpectedly null, even though response "
                        "not marked as complete. User: %s",
                        pthread_self(), dcb->session->client_dcb->user);
-            return_code = 0;
-            goto return_rc;
+            return 0;
         }
     }
-    /**
-     * Check that session is operable, and that client DCB is
-     * still listening the socket for replies.
-     */
-    if (dcb->session->state == SESSION_STATE_ROUTER_READY &&
-        dcb->session->client_dcb != NULL &&
-        dcb->session->client_dcb->state == DCB_STATE_POLLING &&
-        (session->router_session ||
-         service_get_capabilities(session->service) & RCAP_TYPE_NO_RSESSION))
+
+    if (session_ok_to_route(dcb))
     {
-        MySQLProtocol *client_protocol = (MySQLProtocol *)dcb->session->client_dcb->protocol;
-        if (client_protocol != NULL)
-        {
-            CHK_PROTOCOL(client_protocol);
-
-            if (client_protocol->protocol_auth_state == MXS_AUTH_STATE_COMPLETE)
-            {
-                gwbuf_set_type(read_buffer, GWBUF_TYPE_MYSQL);
-
-                session->service->router->clientReply(
-                    session->service->router_instance,
-                    session->router_session,
-                    read_buffer,
-                    dcb);
-                return_code = 1;
-            }
-        }
-        else if (dcb->session->client_dcb->dcb_role == DCB_ROLE_INTERNAL)
-        {
-            gwbuf_set_type(read_buffer, GWBUF_TYPE_MYSQL);
-            session->service->router->clientReply(
-                session->service->router_instance,
-                session->router_session,
-                read_buffer,
-                dcb);
-            return_code = 1;
-        }
+        gwbuf_set_type(read_buffer, GWBUF_TYPE_MYSQL);
+        session->service->router->clientReply(session->service->router_instance,
+                                              session->router_session,
+                                              read_buffer, dcb);
+        return_code = 1;
     }
     else /*< session is closing; replying to client isn't possible */
     {
         gwbuf_free(read_buffer);
     }
 
-return_rc:
     return return_code;
 }
 
@@ -943,7 +950,6 @@ static int gw_MySQLWrite_backend(DCB *dcb, GWBUF *queue)
     int rc = 0;
 
     CHK_DCB(dcb);
-    spinlock_acquire(&dcb->authlock);
 
     if (dcb->was_persistent && dcb->state == DCB_STATE_POLLING)
     {
@@ -963,8 +969,6 @@ static int gw_MySQLWrite_backend(DCB *dcb, GWBUF *queue)
         backend_protocol->ignore_reply = true;
         backend_protocol->stored_query = queue;
 
-        spinlock_release(&dcb->authlock);
-
         GWBUF *buf = gw_create_change_user_packet(dcb->session->client_dcb->data, dcb->protocol);
         return dcb_write(dcb, buf) ? 1 : 0;
     }
@@ -983,7 +987,6 @@ static int gw_MySQLWrite_backend(DCB *dcb, GWBUF *queue)
              */
             backend_protocol->stored_query = gwbuf_append(backend_protocol->stored_query, queue);
         }
-        spinlock_release(&dcb->authlock);
         return 1;
     }
 

From 30927455efee4aafbd004016eb6064fc55941c57 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sun, 20 Nov 2016 10:17:04 +0200
Subject: [PATCH 23/42] Add each DCB to the owning thread's list

Each DCB needs to be added to the owning thread's list so that they can be
iterated through. As sessions always have a client DCB, the sessions don't
need to be added to a similar per thread list.

This change fixes a problem with dcb_hangup_foreach that the removal of
the list manager introduced. Now the hangup events are properly injected
for the DCBs that connect to the server in question.
---
 include/maxscale/dcb.h |  11 ++-
 server/core/dcb.c      | 177 +++++++++++++++++++++++++++++------------
 server/core/poll.c     |  47 ++---------
 3 files changed, 144 insertions(+), 91 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index f687ae0b4..4b39d4b9c 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -278,6 +278,8 @@ typedef struct dcb
     bool            ssl_write_want_write;    /*< Flag */
     int             dcb_port;       /**< port of target server */
     bool            was_persistent;  /**< Whether this DCB was in the persistent pool */
+    struct dcb      *thr_next; /**< Next DCB in owning thread's list */
+    struct dcb      *thr_tail; /**< Last DCB in owning thread's list */
     skygw_chk_t     dcb_chk_tail;
 } DCB;
 
@@ -288,7 +290,7 @@ typedef struct dcb
     .cb_lock = SPINLOCK_INIT, .pollinlock = SPINLOCK_INIT, \
     .fd = DCBFD_CLOSED, .stats = DCBSTATS_INIT, .ssl_state = SSL_HANDSHAKE_UNKNOWN, \
     .state = DCB_STATE_ALLOC, .polloutlock = SPINLOCK_INIT, .dcb_chk_tail = CHK_NUM_DCB, \
-    .authenticator_data = NULL}
+    .authenticator_data = NULL, .thr_next = NULL, .thr_tail = NULL}
 
 /**
  * The DCB usage filer used for returning DCB's in use for a certain reason
@@ -344,6 +346,13 @@ void dcb_close(DCB *);
  */
 void dcb_process_zombies(int threadid);
 
+/**
+ * Add a DCB to the owner's list
+ *
+ * @param dcb DCB to add
+ */
+void dcb_add_to_list(DCB *dcb);
+
 void printAllDCBs();                         /* Debug to print all DCB in the system */
 void printDCB(DCB *);                        /* Debug print routine */
 void dprintDCBList(DCB *);                 /* Debug print DCB list statistics */
diff --git a/server/core/dcb.c b/server/core/dcb.c
index 87b9bca5a..471e9df08 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -100,6 +100,8 @@ static LIST_CONFIG DCBlist =
 /* A DCB with null values, used for initialization */
 static DCB dcb_initialized = DCB_INIT;
 
+static  DCB           **all_dcbs;
+static  SPINLOCK       *all_dcbs_lock;
 static  DCB           **zombies;
 static  int            *nzombies;
 static  int             maxzombies = 0;
@@ -110,11 +112,18 @@ void dcb_global_init()
     int nthreads = config_threadcount();
 
     if ((zombies = MXS_CALLOC(nthreads, sizeof(DCB*))) == NULL ||
+        (all_dcbs = MXS_CALLOC(nthreads, sizeof(DCB*))) == NULL ||
+        (all_dcbs_lock = MXS_CALLOC(nthreads, sizeof(SPINLOCK))) == NULL ||
         (nzombies = MXS_CALLOC(nthreads, sizeof(int))) == NULL)
     {
         MXS_OOM();
         raise(SIGABRT);
     }
+
+    for (int i = 0; i < nthreads; i++)
+    {
+        spinlock_init(&all_dcbs_lock[i]);
+    }
 }
 
 static void dcb_initialize(void *dcb);
@@ -145,6 +154,7 @@ static int dcb_set_socket_option(int sockfd, int level, int optname, void *optva
 static void dcb_add_to_all_list(DCB *dcb);
 static DCB *dcb_find_free();
 static GWBUF *dcb_grab_writeq(DCB *dcb, bool first_time);
+static void dcb_remove_from_list(DCB *dcb);
 
 size_t dcb_get_session_id(
     DCB *dcb)
@@ -597,6 +607,7 @@ dcb_process_victim_queue(int threadid)
          * Whether it is actually freed depends on the type of the DCB and how
          * many DCBs are linked to it via the SESSION object. */
         dcb->state = DCB_STATE_DISCONNECTED;
+        dcb_remove_from_list(dcb);
         dcb_final_free(dcb);
     }
     /** Reset threads session data */
@@ -1940,7 +1951,6 @@ dprintOneDCB(DCB *pdcb, DCB *dcb)
 void
 dprintDCBList(DCB *pdcb)
 {
-    dprintListStats(pdcb, &DCBlist, "All DCBs");
 }
 
 /**
@@ -1951,19 +1961,19 @@ dprintDCBList(DCB *pdcb)
 void
 dprintAllDCBs(DCB *pdcb)
 {
-    list_entry_t *current;
 
-    current = list_start_iteration(&DCBlist);
-#if SPINLOCK_PROFILE
-    dcb_printf(pdcb, "DCB List Spinlock Statistics:\n");
-    spinlock_stats(&DCBlist->list_lock, spin_reporter, pdcb);
-    dcb_printf(pdcb, "Zombie Queue Lock Statistics:\n");
-    spinlock_stats(&zombiespin, spin_reporter, pdcb);
-#endif
-    while (current)
+    int nthr = config_threadcount();
+
+    for (int i = 0; i < nthr; i++)
     {
-        dprintOneDCB(pdcb, (DCB *)current);
-        current = list_iterate(&DCBlist, current);
+        spinlock_acquire(&all_dcbs_lock[i]);
+
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
+        {
+            dprintOneDCB(pdcb, dcb);
+        }
+
+        spinlock_release(&all_dcbs_lock[i]);
     }
 }
 
@@ -1975,24 +1985,28 @@ dprintAllDCBs(DCB *pdcb)
 void
 dListDCBs(DCB *pdcb)
 {
-    DCB *dcb;
-    list_entry_t *current;
-
-    current = list_start_iteration(&DCBlist);
     dcb_printf(pdcb, "Descriptor Control Blocks\n");
     dcb_printf(pdcb, "------------------+----------------------------+--------------------+----------\n");
     dcb_printf(pdcb, " %-16s | %-26s | %-18s | %s\n",
                "DCB", "State", "Service", "Remote");
     dcb_printf(pdcb, "------------------+----------------------------+--------------------+----------\n");
-    while (current)
+
+    int nthr = config_threadcount();
+
+    for (int i = 0; i < nthr; i++)
     {
-        dcb = (DCB *)current;
-        dcb_printf(pdcb, " %-16p | %-26s | %-18s | %s\n",
-            dcb, gw_dcb_state2string(dcb->state),
-            ((dcb->session && dcb->session->service) ? dcb->session->service->name : ""),
-            (dcb->remote ? dcb->remote : ""));
-        current = list_iterate(&DCBlist, current);
+        spinlock_acquire(&all_dcbs_lock[i]);
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
+        {
+            dcb_printf(pdcb, " %-16p | %-26s | %-18s | %s\n",
+                       dcb, gw_dcb_state2string(dcb->state),
+                       ((dcb->session && dcb->session->service) ? dcb->session->service->name : ""),
+                       (dcb->remote ? dcb->remote : ""));
+        }
+
+        spinlock_release(&all_dcbs_lock[i]);
     }
+
     dcb_printf(pdcb, "------------------+----------------------------+--------------------+----------\n\n");
 }
 
@@ -2004,33 +2018,35 @@ dListDCBs(DCB *pdcb)
 void
 dListClients(DCB *pdcb)
 {
-    DCB *dcb;
-    list_entry_t *current;
-
-    current = list_start_iteration(&DCBlist);
-
     dcb_printf(pdcb, "Client Connections\n");
     dcb_printf(pdcb, "-----------------+------------------+----------------------+------------\n");
     dcb_printf(pdcb, " %-15s | %-16s | %-20s | %s\n",
                "Client", "DCB", "Service", "Session");
     dcb_printf(pdcb, "-----------------+------------------+----------------------+------------\n");
-    while (current)
+
+    int nthr = config_threadcount();
+
+    for (int i = 0; i < nthr; i++)
     {
-        dcb = (DCB *)current;
-        if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
+        spinlock_acquire(&all_dcbs_lock[i]);
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
         {
-            dcb_printf(pdcb, " %-15s | %16p | %-20s | %10p\n",
-                       (dcb->remote ? dcb->remote : ""),
-                       dcb, (dcb->session->service ?
-                             dcb->session->service->name : ""),
-                       dcb->session);
+            if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
+            {
+                dcb_printf(pdcb, " %-15s | %16p | %-20s | %10p\n",
+                           (dcb->remote ? dcb->remote : ""),
+                           dcb, (dcb->session->service ?
+                                 dcb->session->service->name : ""),
+                           dcb->session);
+            }
         }
-        current = list_iterate(&DCBlist, current);
+
+        spinlock_release(&all_dcbs_lock[i]);
     }
+
     dcb_printf(pdcb, "-----------------+------------------+----------------------+------------\n\n");
 }
 
-
 /**
  * Diagnostic to print a DCB to another DCB
  *
@@ -2502,7 +2518,7 @@ dcb_call_callback(DCB *dcb, DCB_REASON reason)
 int
 dcb_isvalid(DCB *dcb)
 {
-    return (int)list_is_entry_in_use(&DCBlist, (list_entry_t *)dcb);
+    return !dcb->dcb_is_zombie;
 }
 
 /**
@@ -2513,26 +2529,28 @@ dcb_isvalid(DCB *dcb)
 void
 dcb_hangup_foreach(struct server* server)
 {
-    DCB *dcb;
-    list_entry_t *current;
+    int nthr = config_threadcount();
 
-    current = list_start_iteration(&DCBlist);
 
-    while (current)
+    for (int i = 0; i < nthr; i++)
     {
-        dcb = (DCB *)current;
-        spinlock_acquire(&dcb->dcb_initlock);
-        if (dcb->state == DCB_STATE_POLLING && dcb->server &&
-            dcb->server == server)
+        spinlock_acquire(&all_dcbs_lock[i]);
+
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
         {
-            poll_fake_hangup_event(dcb);
+            spinlock_acquire(&dcb->dcb_initlock);
+            if (dcb->state == DCB_STATE_POLLING && dcb->server &&
+                dcb->server == server)
+            {
+                poll_fake_hangup_event(dcb);
+            }
+            spinlock_release(&dcb->dcb_initlock);
         }
-        spinlock_release(&dcb->dcb_initlock);
-        current = list_iterate(&DCBlist, current);
+
+        spinlock_release(&all_dcbs_lock[i]);
     }
 }
 
-
 /**
  * Null protocol write routine used for cloned dcb's. It merely consumes
  * buffers written on the cloned DCB and sets the DCB_REPLIED flag.
@@ -3404,3 +3422,60 @@ void dcb_append_readqueue(DCB *dcb, GWBUF *buffer)
 {
     dcb->dcb_readqueue = gwbuf_append(dcb->dcb_readqueue, buffer);
 }
+
+void dcb_add_to_list(DCB *dcb)
+{
+    spinlock_acquire(&all_dcbs_lock[dcb->owner]);
+
+    if (all_dcbs[dcb->owner] == NULL)
+    {
+        all_dcbs[dcb->owner] = dcb;
+        all_dcbs[dcb->owner]->thr_tail = dcb;
+    }
+    else
+    {
+        all_dcbs[dcb->owner]->thr_tail->thr_next = dcb;
+        all_dcbs[dcb->owner]->thr_tail = dcb;
+    }
+
+    spinlock_release(&all_dcbs_lock[dcb->owner]);
+}
+
+/**
+ * Remove a DCB from the owner's list
+ *
+ * @param dcb DCB to remove
+ */
+static void dcb_remove_from_list(DCB *dcb)
+{
+    spinlock_acquire(&all_dcbs_lock[dcb->owner]);
+
+    if (dcb == all_dcbs[dcb->owner])
+    {
+        DCB *tail = all_dcbs[dcb->owner]->thr_tail;
+        all_dcbs[dcb->owner] = all_dcbs[dcb->owner]->thr_next;
+        all_dcbs[dcb->owner]->thr_tail = tail;
+    }
+    else
+    {
+        DCB *current = all_dcbs[dcb->owner]->thr_next;
+        DCB *prev = all_dcbs[dcb->owner];
+
+        while (current)
+        {
+            if (current == dcb)
+            {
+                if (current == all_dcbs[dcb->owner]->thr_tail)
+                {
+                    all_dcbs[dcb->owner]->thr_tail = prev;
+                }
+                prev->thr_next = current->thr_next;
+                break;
+            }
+            prev = current;
+            current = current->thr_next;
+        }
+    }
+
+    spinlock_release(&all_dcbs_lock[dcb->owner]);
+}
diff --git a/server/core/poll.c b/server/core/poll.c
index e2cc58d0c..0955ba20f 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -385,6 +385,8 @@ poll_add_dcb(DCB *dcb)
     dcb->owner = owner;
     spinlock_release(&dcb->dcb_initlock);
 
+    dcb_add_to_list(dcb);
+
     rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev);
     if (rc)
     {
@@ -719,13 +721,7 @@ poll_waitevents(void *arg)
              */
         }
 
-        /*
-         * Process of the queue of waiting requests
-         * This is done without checking the evq_pending count as a
-         * precautionary measure to avoid issues if the house keeping
-         * of the count goes wrong.
-         */
-
+        /* Process of the queue of waiting requests */
         for (int i = 0; i < nfds; i++)
         {
             process_pollq(thread_id, &events[i]);
@@ -744,7 +740,6 @@ poll_waitevents(void *arg)
             spinlock_release(&fake_event_lock[thread_id]);
         }
 
-        /** Process fake events */
         while (event)
         {
             struct epoll_event ev;
@@ -752,7 +747,9 @@ poll_waitevents(void *arg)
             ev.data.ptr = event->dcb;
             ev.events = event->event;
             process_pollq(thread_id, &ev);
+            fake_event_t *tmp = event;
             event = event->next;
+            MXS_FREE(tmp);
         }
 
         if (check_timeouts && hkheartbeat >= next_timeout_check)
@@ -764,7 +761,10 @@ poll_waitevents(void *arg)
         {
             thread_data[thread_id].state = THREAD_ZPROCESSING;
         }
+
+        /** Process closed DCBs */
         dcb_process_zombies(thread_id);
+
         if (thread_data)
         {
             thread_data[thread_id].state = THREAD_IDLE;
@@ -1195,10 +1195,6 @@ dprintPollStats(DCB *dcb)
     dcb_printf(dcb, "\t>= %d\t\t\t%" PRId32 "\n", MAXNFDS,
                pollStats.n_fds[MAXNFDS - 1]);
 
-#if SPINLOCK_PROFILE
-    dcb_printf(dcb, "Event queue lock statistics:\n");
-    spinlock_stats(&pollqlock, spin_reporter, dcb);
-#endif
 }
 
 /**
@@ -1541,33 +1537,6 @@ poll_fake_hangup_event(DCB *dcb)
 void
 dShowEventQ(DCB *pdcb)
 {
-    DCB *dcb;
-    char *tmp1, *tmp2;
-
-    spinlock_acquire(&pollqlock);
-    if (eventq == NULL)
-    {
-        /* Nothing to process */
-        spinlock_release(&pollqlock);
-        return;
-    }
-    dcb = eventq;
-    dcb_printf(pdcb, "\nEvent Queue.\n");
-    dcb_printf(pdcb, "%-16s | %-10s | %-18s | %s\n", "DCB", "Status", "Processing Events",
-               "Pending Events");
-    dcb_printf(pdcb, "-----------------+------------+--------------------+-------------------\n");
-    do
-    {
-        dcb_printf(pdcb, "%-16p | %-10s | %-18s | %-18s\n", dcb,
-                   dcb->evq.processing ? "Processing" : "Pending",
-                   (tmp1 = event_to_string(dcb->evq.processing_events)),
-                   (tmp2 = event_to_string(dcb->evq.pending_events)));
-        MXS_FREE(tmp1);
-        MXS_FREE(tmp2);
-        dcb = dcb->evq.next;
-    }
-    while (dcb != eventq);
-    spinlock_release(&pollqlock);
 }
 
 

From 5067f3594a4e95a3d5645ee177046078fb36a3ed Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sun, 20 Nov 2016 10:34:37 +0200
Subject: [PATCH 24/42] Adapt session timeout checks to a per thread model

Each thread will now check their own list of DCBs for timed out sessions.
---
 include/maxscale/dcb.h     |  2 ++
 include/maxscale/session.h |  9 -------
 server/core/dcb.c          | 43 +++++++++++++++++++++++++++++++
 server/core/poll.c         |  5 +---
 server/core/service.c      |  2 +-
 server/core/session.c      | 52 --------------------------------------
 6 files changed, 47 insertions(+), 66 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index 4b39d4b9c..2f4797de5 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -377,6 +377,8 @@ int dcb_accept_SSL(DCB* dcb);
 int dcb_connect_SSL(DCB* dcb);
 int dcb_listen(DCB *listener, const char *config, const char *protocol_name);
 void dcb_append_readqueue(DCB *dcb, GWBUF *buffer);
+void dcb_enable_session_timeouts();
+void dcb_process_idle_sessions(int thr);
 
 /**
  * DCB flags values
diff --git a/include/maxscale/session.h b/include/maxscale/session.h
index 855e922ca..66e15fed7 100644
--- a/include/maxscale/session.h
+++ b/include/maxscale/session.h
@@ -185,13 +185,6 @@ typedef struct session
     .stats = SESSION_STATS_INIT, .head = DOWNSTREAM_INIT, .tail = UPSTREAM_INIT, \
     .state = SESSION_STATE_ALLOC, .ses_chk_tail = CHK_NUM_SESSION}
 
-/** Whether to do session timeout checks */
-extern bool check_timeouts;
-
-/** When the next timeout check is done. This is compared to hkheartbeat in
- * hk_heartbeat.h */
-extern long next_timeout_check;
-
 #define SESSION_PROTOCOL(x, type)       DCB_PROTOCOL((x)->client_dcb, type)
 
 /**
@@ -231,8 +224,6 @@ SESSION* get_session_by_router_ses(void* rses);
 void session_enable_log_priority(SESSION* ses, int priority);
 void session_disable_log_priority(SESSION* ses, int priority);
 RESULTSET *sessionGetList(SESSIONLISTFILTER);
-void process_idle_sessions();
-void enable_session_timeouts();
 
 /**
  * Get the transaction state of the session.
diff --git a/server/core/dcb.c b/server/core/dcb.c
index 471e9df08..c02cefc8b 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -92,6 +92,7 @@
 #include <sys/un.h>
 #include <maxscale/alloc.h>
 #include <maxscale/utils.h>
+#include <maxscale/platform.h>
 
 /* The list of all DCBs */
 static LIST_CONFIG DCBlist =
@@ -107,6 +108,10 @@ static  int            *nzombies;
 static  int             maxzombies = 0;
 static  SPINLOCK        zombiespin = SPINLOCK_INIT;
 
+/** Variables for session timeout checks */
+bool check_timeouts = false;
+thread_local long next_timeout_check = 0;
+
 void dcb_global_init()
 {
     int nthreads = config_threadcount();
@@ -3479,3 +3484,41 @@ static void dcb_remove_from_list(DCB *dcb)
 
     spinlock_release(&all_dcbs_lock[dcb->owner]);
 }
+
+/**
+ * Enable the timing out of idle connections.
+ */
+void dcb_enable_session_timeouts()
+{
+    check_timeouts = true;
+}
+
+/**
+ * Close sessions that have been idle for too long.
+ *
+ * If the time since a session last sent data is greater than the set value in the
+ * service, it is disconnected. The connection timeout is disabled by default.
+ */
+void dcb_process_idle_sessions(int thr)
+{
+    if (check_timeouts && hkheartbeat >= next_timeout_check)
+    {
+        /** Because the resolution of the timeout is one second, we only need to
+         * check for it once per second. One heartbeat is 100 milliseconds. */
+        next_timeout_check = hkheartbeat + 10;
+
+        for (DCB *dcb = all_dcbs[thr]; dcb; dcb = dcb->memdata.next)
+        {
+            if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
+            {
+                SESSION *session = dcb->session;
+
+                if (session->service && session->client_dcb && session->client_dcb->state == DCB_STATE_POLLING &&
+                    hkheartbeat - session->client_dcb->last_read > session->service->conn_idle_timeout * 10)
+                {
+                    poll_fake_hangup_event(dcb);
+                }
+            }
+        }
+    }
+}
diff --git a/server/core/poll.c b/server/core/poll.c
index 0955ba20f..b77f59304 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -752,10 +752,7 @@ poll_waitevents(void *arg)
             MXS_FREE(tmp);
         }
 
-        if (check_timeouts && hkheartbeat >= next_timeout_check)
-        {
-            process_idle_sessions();
-        }
+        dcb_process_idle_sessions(thread_id);
 
         if (thread_data)
         {
diff --git a/server/core/service.c b/server/core/service.c
index 381f95d79..0c7165e64 100644
--- a/server/core/service.c
+++ b/server/core/service.c
@@ -1052,7 +1052,7 @@ serviceSetTimeout(SERVICE *service, int val)
      * configured with a idle timeout. */
     if ((service->conn_idle_timeout = val))
     {
-        enable_session_timeouts();
+        dcb_enable_session_timeouts();
     }
 
     return 1;
diff --git a/server/core/session.c b/server/core/session.c
index 87baf1f01..38bcbd8b3 100644
--- a/server/core/session.c
+++ b/server/core/session.c
@@ -56,14 +56,6 @@ static int session_id;
 
 static struct session session_dummy_struct;
 
-/**
- * These two are declared in session.h
- */
-bool check_timeouts = false;
-long next_timeout_check = 0;
-
-static SPINLOCK timeout_lock = SPINLOCK_INIT;
-
 static void session_initialize(void *session);
 static int session_setup_filters(SESSION *session);
 static void session_simple_free(SESSION *session, DCB *dcb);
@@ -894,50 +886,6 @@ session_getUser(SESSION *session)
     return (session && session->client_dcb) ? session->client_dcb->user : NULL;
 }
 
-/**
- * Enable the timing out of idle connections.
- *
- * This will prevent unnecessary acquisitions of the session spinlock if no
- * service is configured with a session idle timeout.
- */
-void enable_session_timeouts()
-{
-    check_timeouts = true;
-}
-
-/**
- * Close sessions that have been idle for too long.
- *
- * If the time since a session last sent data is greater than the set value in the
- * service, it is disconnected. The connection timeout is disabled by default.
- */
-void process_idle_sessions()
-{
-    if (spinlock_acquire_nowait(&timeout_lock))
-    {
-        if (hkheartbeat >= next_timeout_check)
-        {
-            list_entry_t *current = list_start_iteration(&SESSIONlist);
-            /** Because the resolution of the timeout is one second, we only need to
-             * check for it once per second. One heartbeat is 100 milliseconds. */
-            next_timeout_check = hkheartbeat + 10;
-            while (current)
-            {
-                SESSION *all_session = (SESSION *)current;
-
-                if (all_session->service && all_session->client_dcb && all_session->client_dcb->state == DCB_STATE_POLLING &&
-                    hkheartbeat - all_session->client_dcb->last_read > all_session->service->conn_idle_timeout * 10)
-                {
-                    poll_fake_hangup_event(all_session->client_dcb);
-                }
-
-                current = list_iterate(&SESSIONlist, current);
-            }
-        }
-        spinlock_release(&timeout_lock);
-    }
-}
-
 /**
  * Callback structure for the session list extraction
  */

From fce87f8c8ec58e96e9b00672c07225594082bdc0 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sun, 20 Nov 2016 21:55:52 +0200
Subject: [PATCH 25/42] Add listeners to all epoll instances

Adding the listener DCBs to all epoll instances allows all threads to
accept new connections. This should increate MaxScale's responsiveness.
---
 server/core/poll.c | 47 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/server/core/poll.c b/server/core/poll.c
index b77f59304..e0acd30b1 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -387,7 +387,29 @@ poll_add_dcb(DCB *dcb)
 
     dcb_add_to_list(dcb);
 
-    rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev);
+    if (dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER)
+    {
+        /** Listeners are added to all epoll instances */
+        int nthr = config_threadcount();
+
+        for (int i = 0; i < nthr; i++)
+        {
+            if ((rc = epoll_ctl(epoll_fd[i], EPOLL_CTL_ADD, dcb->fd, &ev)))
+            {
+                /** Remove the listener from the previous epoll instances */
+                for (int j = 0; j < i; j++)
+                {
+                    epoll_ctl(epoll_fd[j], EPOLL_CTL_DEL, dcb->fd, &ev);
+                }
+                break;
+            }
+        }
+    }
+    else
+    {
+        rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev);
+    }
+
     if (rc)
     {
         /* Some errors are actually considered acceptable */
@@ -455,7 +477,26 @@ poll_remove_dcb(DCB *dcb)
 
     if (dcbfd > 0)
     {
-        rc = epoll_ctl(epoll_fd[dcb->owner], EPOLL_CTL_DEL, dcbfd, &ev);
+        if (dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER)
+        {
+            /** Listeners are added to all epoll instances */
+            int nthr = config_threadcount();
+
+            for (int i = 0; i < nthr; i++)
+            {
+                int tmp_rc = epoll_ctl(epoll_fd[i], EPOLL_CTL_DEL, dcb->fd, &ev);
+                if (tmp_rc)
+                {
+                    /** Even if one of the instances failed to remove it, try
+                     * to remove it from all the others */
+                    rc = tmp_rc;
+                }
+            }
+        }
+        else
+        {
+            rc = epoll_ctl(epoll_fd[dcb->owner], EPOLL_CTL_DEL, dcbfd, &ev);
+        }
         /**
          * The poll_resolve_error function will always
          * return 0 or crash.  So if it returns non-zero result,
@@ -844,7 +885,7 @@ process_pollq(int thread_id, struct epoll_event *event)
     unsigned long qtime;
 
     DCB *dcb = event->data.ptr;
-    ss_dassert(dcb->owner == thread_id);
+    ss_dassert(dcb->owner == thread_id || dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER);
 #if PROFILE_POLL
     memlog_log(plog, hkheartbeat - dcb->evq.inserted);
 #endif

From ed280aa81b05a8b6321f28fd70e587abe4b45d64 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sun, 20 Nov 2016 22:15:42 +0200
Subject: [PATCH 26/42] Use client DCB to route requests

Using internal DCBs for query routing wasn't needed as the client DCB
could be used. This could also be done by simply routing the query again
with routeQuery.
---
 include/maxscale/debug.h                      |  3 +-
 .../routing/schemarouter/schemarouter.c       | 58 +------------------
 2 files changed, 4 insertions(+), 57 deletions(-)

diff --git a/include/maxscale/debug.h b/include/maxscale/debug.h
index 64973e68e..ac3c25e8f 100644
--- a/include/maxscale/debug.h
+++ b/include/maxscale/debug.h
@@ -243,7 +243,8 @@ typedef enum skygw_chk_t
 #define STRDCBROLE(r) ((r) == DCB_ROLE_SERVICE_LISTENER ? "DCB_ROLE_SERVICE_LISTENER" : \
                        ((r) == DCB_ROLE_CLIENT_HANDLER ? "DCB_ROLE_CLIENT_HANDLER" : \
                         ((r) == DCB_ROLE_BACKEND_HANDLER ? "DCB_ROLE_BACKEND_HANDLER" : \
-                         "UNKNOWN DCB ROLE")))
+                         ((r) == DCB_ROLE_INTERNAL ? "DCB_ROLE_INTERNAL" : \
+                          "UNKNOWN DCB ROLE"))))
 
 #define STRBETYPE(t) ((t) == BE_MASTER ? "BE_MASTER" : \
                         ((t) == BE_SLAVE ? "BE_SLAVE" : \
diff --git a/server/modules/routing/schemarouter/schemarouter.c b/server/modules/routing/schemarouter/schemarouter.c
index 6d802fe5a..8aa38ec42 100644
--- a/server/modules/routing/schemarouter/schemarouter.c
+++ b/server/modules/routing/schemarouter/schemarouter.c
@@ -607,41 +607,6 @@ bool check_shard_status(ROUTER_INSTANCE* router, char* shard)
     return false;
 }
 
-/**
- * A fake DCB read function used to forward queued queries.
- * @param dcb Internal DCB used by the router session
- * @return Always 1
- */
-int internalRoute(DCB* dcb)
-{
-    if (dcb->dcb_readqueue && dcb->session)
-    {
-        GWBUF* tmp = dcb->dcb_readqueue;
-        void* rinst = dcb->session->service->router_instance;
-        void *rses = dcb->session->router_session;
-
-        dcb->dcb_readqueue = NULL;
-        return dcb->session->service->router->routeQuery(rinst, rses, tmp);
-    }
-    return 1;
-}
-
-/**
- * A fake DCB read function used to forward replies to the client.
- * @param dcb Internal DCB used by the router session
- * @return Always 1
- */
-int internalReply(DCB* dcb)
-{
-    if (dcb->dcb_readqueue && dcb->session)
-    {
-        GWBUF* tmp = dcb->dcb_readqueue;
-        dcb->dcb_readqueue = NULL;
-        return SESSION_ROUTE_REPLY(dcb->session, tmp);
-    }
-    return 1;
-}
-
 /**
  * Implementation of the mandatory version entry point
  *
@@ -955,16 +920,8 @@ static void* newSession(ROUTER* router_inst, SESSION* session)
     }
 
     client_rses->shardmap = map;
-    client_rses->dcb_reply = dcb_alloc(DCB_ROLE_INTERNAL, NULL);
-    client_rses->dcb_reply->func.read = internalReply;
-    client_rses->dcb_reply->state = DCB_STATE_POLLING;
-    client_rses->dcb_reply->session = session;
     memcpy(&client_rses->rses_config, &router->schemarouter_config, sizeof(schemarouter_config_t));
     client_rses->n_sescmd = 0;
-    client_rses->dcb_route = dcb_alloc(DCB_ROLE_INTERNAL, NULL);
-    client_rses->dcb_route->func.read = internalRoute;
-    client_rses->dcb_route->state = DCB_STATE_POLLING;
-    client_rses->dcb_route->session = session;
     client_rses->rses_config.last_refresh = time(NULL);
 
     if (using_db)
@@ -1147,18 +1104,7 @@ static void closeSession(ROUTER* instance, void* router_session)
             }
         }
 
-        /* Close internal DCBs */
-        router_cli_ses->dcb_reply->session = NULL;
-        router_cli_ses->dcb_route->session = NULL;
-        dcb_close(router_cli_ses->dcb_reply);
-        dcb_close(router_cli_ses->dcb_route);
-
-        while (router_cli_ses->queue &&
-               (router_cli_ses->queue = gwbuf_consume(
-                                            router_cli_ses->queue, gwbuf_length(router_cli_ses->queue))))
-        {
-            ;
-        }
+        gwbuf_free(router_cli_ses->queue);
 
         /** Unlock */
         rses_end_locked_router_action(router_cli_ses);
@@ -4153,7 +4099,7 @@ void route_queued_query(ROUTER_CLIENT_SES *router_cli_ses)
               querystr);
     MXS_FREE(querystr);
 #endif
-    poll_add_epollin_event_to_dcb(router_cli_ses->dcb_route, tmp);
+    poll_add_epollin_event_to_dcb(router_cli_ses->rses_client_dcb, tmp);
 }
 
 /**

From e53b9585dd30d0d2920de8edba23c5d4a11e970f Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Tue, 22 Nov 2016 10:55:03 +0200
Subject: [PATCH 27/42] Add mapping function for DCBs

The dcb_foreach allows a function to be mapped to all DCBs in
MaxScale. This allows the list of DCBs to be iterated in a safe manner
without having to worry about internal locking of the DCB mechanism.
---
 include/maxscale/dcb.h | 11 +++++++++++
 server/core/dcb.c      | 24 ++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index 2f4797de5..42836bddc 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -380,6 +380,17 @@ void dcb_append_readqueue(DCB *dcb, GWBUF *buffer);
 void dcb_enable_session_timeouts();
 void dcb_process_idle_sessions(int thr);
 
+/**
+ * @brief Call a function for each connected DCB
+ *
+ * @param func Function to call. The function should return @c true to continue iteration
+ * and @c false to stop iteration earlier. The first parameter is a DCB and the second
+ * is the value of @c data that the user provided.
+ * @param data User provided data passed as the second parameter to @c func
+ * @return True if all DCBs were iterated, false if the callback returned false
+ */
+bool dcb_foreach(bool (*func)(DCB *, void *), void *data);
+
 /**
  * DCB flags values
  */
diff --git a/server/core/dcb.c b/server/core/dcb.c
index c02cefc8b..c4f4446ef 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -3522,3 +3522,27 @@ void dcb_process_idle_sessions(int thr)
         }
     }
 }
+
+bool dcb_foreach(bool(*func)(DCB *, void *), void *data)
+{
+
+    int nthr = config_threadcount();
+    bool more = true;
+
+    for (int i = 0; i < nthr && more; i++)
+    {
+        spinlock_acquire(&all_dcbs_lock[i]);
+
+        for (DCB *dcb = all_dcbs[i]; dcb && more; dcb = dcb->memdata.next)
+        {
+            if (!func(dcb, data))
+            {
+                more = false;
+            }
+        }
+
+        spinlock_release(&all_dcbs_lock[i]);
+    }
+
+    return more;
+}

From 2efa8629448aa0d202d85fd29ff5df5462dee4f0 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Tue, 22 Nov 2016 11:03:05 +0200
Subject: [PATCH 28/42] Remove listmanager code

The code prevented scaling by imposing global spinlocks for the DCBs and
SESSIONs. Removing this list means that a thread-local list must be taken
into use to replace it.
---
 include/maxscale/dcb.h                     |   3 -
 include/maxscale/session.h                 |   4 -
 server/core/CMakeLists.txt                 |   2 +-
 server/core/config.c                       |   4 -
 server/core/dcb.c                          | 133 ++++-----
 server/core/gateway.cc                     |   5 -
 server/core/session.c                      | 324 +++++++--------------
 server/modules/filter/tee/tee.c            |   4 +-
 server/modules/routing/debugcli/debugcmd.c | 121 ++++----
 9 files changed, 222 insertions(+), 378 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index 42836bddc..ab12153b5 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -48,7 +48,6 @@
 #include <maxscale/cdefs.h>
 #include <maxscale/spinlock.h>
 #include <maxscale/buffer.h>
-#include <maxscale/listmanager.h>
 #include <maxscale/gw_protocol.h>
 #include <maxscale/gw_authenticator.h>
 #include <maxscale/gw_ssl.h>
@@ -216,7 +215,6 @@ typedef enum
  */
 typedef struct dcb
 {
-    LIST_ENTRY_FIELDS
     skygw_chk_t     dcb_chk_top;
     bool            dcb_errhandle_called; /*< this can be called only once */
     bool            dcb_is_zombie;  /**< Whether the DCB is in the zombie list */
@@ -326,7 +324,6 @@ void dcb_global_init();
 
 int dcb_write(DCB *, GWBUF *);
 DCB *dcb_accept(DCB *listener, GWPROTOCOL *protocol_funcs);
-bool dcb_pre_alloc(int number);
 DCB *dcb_alloc(dcb_role_t, struct servlistener *);
 void dcb_free(DCB *);
 void dcb_free_all_memory(DCB *dcb);
diff --git a/include/maxscale/session.h b/include/maxscale/session.h
index 66e15fed7..9dda114cf 100644
--- a/include/maxscale/session.h
+++ b/include/maxscale/session.h
@@ -37,7 +37,6 @@
 #include <time.h>
 #include <maxscale/atomic.h>
 #include <maxscale/buffer.h>
-#include <maxscale/listmanager.h>
 #include <maxscale/spinlock.h>
 #include <maxscale/resultset.h>
 #include <maxscale/log_manager.h>
@@ -160,7 +159,6 @@ typedef enum
  */
 typedef struct session
 {
-    LIST_ENTRY_FIELDS
     skygw_chk_t     ses_chk_top;
     SPINLOCK        ses_lock;
     session_state_t state;            /*< Current descriptor state */
@@ -205,7 +203,6 @@ typedef struct session
                                (sess)->tail.session, (buf))
 
 SESSION *session_alloc(struct service *, struct dcb *);
-bool session_pre_alloc(int number);
 SESSION *session_set_dummy(struct dcb *);
 bool session_free(SESSION *);
 int session_isvalid(SESSION *);
@@ -220,7 +217,6 @@ void dprintSession(struct dcb *, SESSION *);
 void dListSessions(struct dcb *);
 char *session_state(session_state_t);
 bool session_link_dcb(SESSION *, struct dcb *);
-SESSION* get_session_by_router_ses(void* rses);
 void session_enable_log_priority(SESSION* ses, int priority);
 void session_disable_log_priority(SESSION* ses, int priority);
 RESULTSET *sessionGetList(SESSIONLISTFILTER);
diff --git a/server/core/CMakeLists.txt b/server/core/CMakeLists.txt
index 847d2fa42..9414df47e 100644
--- a/server/core/CMakeLists.txt
+++ b/server/core/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_library(maxscale-common SHARED adminusers.c alloc.c authenticator.c atomic.c buffer.c config.c config_runtime.c dcb.c filter.c externcmd.c gwbitmask.c gwdirs.c hashtable.c hint.c housekeeper.c listmanager.c load_utils.c log_manager.cc maxscale_pcre2.c memlog.c misc.c mlist.c modutil.c monitor.c queuemanager.c query_classifier.c poll.c random_jkiss.c resultset.c secrets.c server.c service.c session.c spinlock.c thread.c users.c utils.c skygw_utils.cc statistics.c listener.c gw_ssl.c mysql_utils.c mysql_binlog.c modulecmd.c )
+add_library(maxscale-common SHARED adminusers.c alloc.c authenticator.c atomic.c buffer.c config.c config_runtime.c dcb.c filter.c externcmd.c gwbitmask.c gwdirs.c hashtable.c hint.c housekeeper.c load_utils.c log_manager.cc maxscale_pcre2.c memlog.c misc.c mlist.c modutil.c monitor.c queuemanager.c query_classifier.c poll.c random_jkiss.c resultset.c secrets.c server.c service.c session.c spinlock.c thread.c users.c utils.c skygw_utils.cc statistics.c listener.c gw_ssl.c mysql_utils.c mysql_binlog.c modulecmd.c )
 
 target_link_libraries(maxscale-common ${MARIADB_CONNECTOR_LIBRARIES} ${LZMA_LINK_FLAGS} ${PCRE2_LIBRARIES} ${CURL_LIBRARIES} ssl pthread crypt dl crypto inih z rt m stdc++)
 
diff --git a/server/core/config.c b/server/core/config.c
index 3c60038ae..51e1a82ec 100644
--- a/server/core/config.c
+++ b/server/core/config.c
@@ -702,10 +702,6 @@ config_load(const char *filename)
 {
     ss_dassert(!config_file);
 
-    /* Temporary - should use configuration values and test return value (bool) */
-    dcb_pre_alloc(1000);
-    session_pre_alloc(250);
-
     global_defaults();
     feedback_defaults();
 
diff --git a/server/core/dcb.c b/server/core/dcb.c
index c4f4446ef..83bc874e7 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -70,7 +70,6 @@
 #include <time.h>
 #include <signal.h>
 #include <maxscale/dcb.h>
-#include <maxscale/listmanager.h>
 #include <maxscale/spinlock.h>
 #include <maxscale/server.h>
 #include <maxscale/session.h>
@@ -94,10 +93,6 @@
 #include <maxscale/utils.h>
 #include <maxscale/platform.h>
 
-/* The list of all DCBs */
-static LIST_CONFIG DCBlist =
-{LIST_TYPE_RECYCLABLE, sizeof(DCB), SPINLOCK_INIT};
-
 /* A DCB with null values, used for initialization */
 static DCB dcb_initialized = DCB_INIT;
 
@@ -193,17 +188,6 @@ bool dcb_get_ses_log_info(
     return false;
 }
 
-/*
- * @brief Pre-allocate memory for a number of DCBs
- *
- * @param   The number of DCBs to be pre-allocated
- */
-bool
-dcb_pre_alloc(int number)
-{
-    return list_pre_alloc(&DCBlist, number, dcb_initialize);
-}
-
 /**
  * @brief Initialize a DCB
  *
@@ -252,7 +236,6 @@ dcb_alloc(dcb_role_t role, SERV_LISTENER *listener)
     dcb_initialize(newdcb);
     newdcb->dcb_role = role;
     newdcb->listener = listener;
-    newdcb->entry_is_ready = true;
 
     return newdcb;
 }
@@ -1832,6 +1815,11 @@ spin_reporter(void *dcb, char *desc, int value)
     dcb_printf((DCB *)dcb, "\t\t%-40s  %d\n", desc, value);
 }
 
+bool printAllDCBs_cb(DCB *dcb, void *data)
+{
+    printDCB(dcb);
+    return true;
+}
 
 /**
  * Diagnostic to print all DCB allocated in the system
@@ -1839,15 +1827,7 @@ spin_reporter(void *dcb, char *desc, int value)
  */
 void printAllDCBs()
 {
-    list_entry_t *current;
-
-    current = list_start_iteration(&DCBlist);
-
-    while (current)
-    {
-        printDCB((DCB *)current);
-        current = list_iterate(&DCBlist, current);
-    }
+    dcb_foreach(printAllDCBs_cb, NULL);
 }
 
 /**
@@ -2663,6 +2643,57 @@ dcb_persistent_clean_count(DCB *dcb, bool cleanall)
     return count;
 }
 
+struct dcb_usage_count
+{
+    int count;
+    DCB_USAGE type;
+};
+
+bool count_by_usage_cb(DCB *dcb, void *data)
+{
+    struct dcb_usage_count *d = (struct dcb_usage_count*)data;
+
+    switch (d->type)
+    {
+        case DCB_USAGE_CLIENT:
+            if (DCB_ROLE_CLIENT_HANDLER == dcb->dcb_role)
+            {
+                d->count++;
+            }
+            break;
+        case DCB_USAGE_LISTENER:
+            if (dcb->state == DCB_STATE_LISTENING)
+            {
+                d->count++;
+            }
+            break;
+        case DCB_USAGE_BACKEND:
+            if (dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER)
+            {
+                d->count++;
+            }
+            break;
+        case DCB_USAGE_INTERNAL:
+            if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER ||
+                dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER)
+            {
+                d->count++;
+            }
+            break;
+        case DCB_USAGE_ZOMBIE:
+            if (DCB_ISZOMBIE(dcb))
+            {
+                d->count++;
+            }
+            break;
+        case DCB_USAGE_ALL:
+            d->count++;
+            break;
+    }
+
+    return true;
+}
+
 /**
  * Return DCB counts optionally filtered by usage
  *
@@ -2672,55 +2703,11 @@ dcb_persistent_clean_count(DCB *dcb, bool cleanall)
 int
 dcb_count_by_usage(DCB_USAGE usage)
 {
-    int rval = 0;
-    DCB *dcb;
-    list_entry_t *current;
+    struct dcb_usage_count val = {.count = 0, .type = usage};
 
-    current = list_start_iteration(&DCBlist);
+    dcb_foreach(count_by_usage_cb, &val);
 
-    while (current)
-    {
-        dcb = (DCB *)current;
-        switch (usage)
-        {
-        case DCB_USAGE_CLIENT:
-            if (DCB_ROLE_CLIENT_HANDLER == dcb->dcb_role)
-            {
-                rval++;
-            }
-            break;
-        case DCB_USAGE_LISTENER:
-            if (dcb->state == DCB_STATE_LISTENING)
-            {
-                rval++;
-            }
-            break;
-        case DCB_USAGE_BACKEND:
-            if (dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER)
-            {
-                rval++;
-            }
-            break;
-        case DCB_USAGE_INTERNAL:
-            if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER ||
-                dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER)
-            {
-                rval++;
-            }
-            break;
-        case DCB_USAGE_ZOMBIE:
-            if (DCB_ISZOMBIE(dcb))
-            {
-                rval++;
-            }
-            break;
-        case DCB_USAGE_ALL:
-            rval++;
-            break;
-        }
-        current = list_iterate(&DCBlist, current);
-    }
-    return rval;
+    return val.count;
 }
 
 /**
diff --git a/server/core/gateway.cc b/server/core/gateway.cc
index de8096f98..f2808c328 100644
--- a/server/core/gateway.cc
+++ b/server/core/gateway.cc
@@ -1935,11 +1935,6 @@ int main(int argc, char **argv)
         goto return_main;
     }
 
-    /* Temporary - should use configuration values and test return value (bool)
-     * TODO: Enable the list manager code */
-    dcb_pre_alloc(1);
-    session_pre_alloc(1);
-
     /** Initialize statistics */
     ts_stats_init();
 
diff --git a/server/core/session.c b/server/core/session.c
index 38bcbd8b3..4b01cf781 100644
--- a/server/core/session.c
+++ b/server/core/session.c
@@ -34,7 +34,6 @@
 #include <errno.h>
 #include <maxscale/alloc.h>
 #include <maxscale/session.h>
-#include <maxscale/listmanager.h>
 #include <maxscale/service.h>
 #include <maxscale/router.h>
 #include <maxscale/dcb.h>
@@ -44,10 +43,6 @@
 #include <maxscale/housekeeper.h>
 #include <maxscale/poll.h>
 
-/* This list of all sessions */
-LIST_CONFIG SESSIONlist =
-{LIST_TYPE_RECYCLABLE, sizeof(SESSION), SPINLOCK_INIT};
-
 /* A session with null values, used for initialization */
 static SESSION session_initialized = SESSION_INIT;
 
@@ -62,7 +57,6 @@ static void session_simple_free(SESSION *session, DCB *dcb);
 static void session_add_to_all_list(SESSION *session);
 static SESSION *session_find_free();
 static void session_final_free(SESSION *session);
-static list_entry_t *skip_maybe_to_next_non_listener(list_entry_t *current, SESSIONLISTFILTER filter);
 
 /**
  * @brief Initialize a session
@@ -83,17 +77,6 @@ session_initialize(void *session)
     *(SESSION *)session = session_initialized;
 }
 
-/*
- * @brief Pre-allocate memory for a number of sessions
- *
- * @param   The number of sessions to be pre-allocated
- */
-bool
-session_pre_alloc(int number)
-{
-    return list_pre_alloc(&SESSIONlist, number, session_initialize);
-}
-
 /**
  * Allocate a new session for a new client of the specified service.
  *
@@ -220,7 +203,6 @@ session_alloc(SERVICE *service, DCB *client_dcb)
     CHK_SESSION(session);
 
     client_dcb->session = session;
-    session->entry_is_ready = true;
     return SESSION_STATE_TO_BE_FREED == session->state ? NULL : session;
 }
 
@@ -238,8 +220,6 @@ session_set_dummy(DCB *client_dcb)
     SESSION *session;
 
     session = &session_dummy_struct;
-    session->list_entry_chk_top = CHK_NUM_MANAGED_LIST;
-    session->list_entry_chk_tail = CHK_NUM_MANAGED_LIST;
     session->ses_chk_top = CHK_NUM_SESSION;
     session->ses_chk_tail = CHK_NUM_SESSION;
     session->ses_is_child = false;
@@ -252,7 +232,6 @@ session_set_dummy(DCB *client_dcb)
     session->state = SESSION_STATE_DUMMY;
     session->refcount = 1;
     session->ses_id = 0;
-    session->next = NULL;
 
     client_dcb->session = session;
     return session;
@@ -448,20 +427,7 @@ session_final_free(SESSION *session)
 int
 session_isvalid(SESSION *session)
 {
-    int rval = 0;
-    list_entry_t *current = list_start_iteration(&SESSIONlist);
-    while (current)
-    {
-        if ((SESSION *)current == session)
-        {
-            rval = 1;
-            list_terminate_iteration_early(&SESSIONlist, current);
-            break;
-        }
-        current = list_iterate(&SESSIONlist, current);
-    }
-
-    return rval;
+    return true;
 }
 
 /**
@@ -479,8 +445,19 @@ printSession(SESSION *session)
     printf("\tState:        %s\n", session_state(session->state));
     printf("\tService:      %s (%p)\n", session->service->name, session->service);
     printf("\tClient DCB:   %p\n", session->client_dcb);
-    printf("\tConnected:    %s",
+    printf("\tConnected:    %s\n",
            asctime_r(localtime_r(&session->stats.connect, &result), timebuf));
+    printf("\tRouter Session: %p\n", session->router_session);
+}
+
+bool printAllSessions_cb(DCB *dcb, void *data)
+{
+    if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
+    {
+        printSession(dcb->session);
+    }
+
+    return true;
 }
 
 /**
@@ -492,76 +469,7 @@ printSession(SESSION *session)
 void
 printAllSessions()
 {
-    list_entry_t *current = list_start_iteration(&SESSIONlist);
-    while (current)
-    {
-        printSession((SESSION *)current);
-        current = list_iterate(&SESSIONlist, current);
-    }
-}
-
-
-/**
- * Check sessions
- *
- * Designed to be called within a debugger session in order
- * to display information regarding "interesting" sessions
- */
-void
-CheckSessions()
-{
-    list_entry_t *current;
-    int noclients = 0;
-    int norouter = 0;
-
-    current = list_start_iteration(&SESSIONlist);
-    while (current)
-    {
-        SESSION *list_session = (SESSION *)current;
-        if (list_session->state != SESSION_STATE_LISTENER ||
-            list_session->state != SESSION_STATE_LISTENER_STOPPED)
-        {
-            if (list_session->client_dcb == NULL && list_session->refcount)
-            {
-                if (noclients == 0)
-                {
-                    printf("Sessions without a client DCB.\n");
-                    printf("==============================\n");
-                }
-                printSession(list_session);
-                noclients++;
-            }
-        }
-        current = list_iterate(&SESSIONlist, current);
-    }
-    if (noclients)
-    {
-        printf("%d Sessions have no clients\n", noclients);
-    }
-    current = list_start_iteration(&SESSIONlist);
-    while (current)
-    {
-        SESSION *list_session = (SESSION *)current;
-        if (list_session->state != SESSION_STATE_LISTENER ||
-            list_session->state != SESSION_STATE_LISTENER_STOPPED)
-        {
-            if (list_session->router_session == NULL && list_session->refcount)
-            {
-                if (norouter == 0)
-                {
-                    printf("Sessions without a router session.\n");
-                    printf("==================================\n");
-                }
-                printSession(list_session);
-                norouter++;
-            }
-        }
-        current = list_iterate(&SESSIONlist, current);
-    }
-    if (norouter)
-    {
-        printf("%d Sessions have no router session\n", norouter);
-    }
+    dcb_foreach(printAllSessions_cb, NULL);
 }
 
 /*
@@ -572,7 +480,17 @@ CheckSessions()
 void
 dprintSessionList(DCB *pdcb)
 {
-    dprintListStats(pdcb, &SESSIONlist, "All Sessions");
+}
+
+/** Callback for dprintAllSessions */
+bool dprintAllSessions_cb(DCB *dcb, void *data)
+{
+    if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
+    {
+        DCB *out_dcb = (DCB*)data;
+        dprintSession(out_dcb, dcb->session);
+    }
+    return true;
 }
 
 /**
@@ -586,14 +504,8 @@ dprintSessionList(DCB *pdcb)
 void
 dprintAllSessions(DCB *dcb)
 {
-
-    list_entry_t *current = list_start_iteration(&SESSIONlist);
-    while (current)
-    {
-        dprintSession(dcb, (SESSION *)current);
-        current = list_iterate(&SESSIONlist, current);
-    }
- }
+    dcb_foreach(dprintAllSessions_cb, dcb);
+}
 
 /**
  * Print a particular session to a DCB
@@ -646,6 +558,22 @@ dprintSession(DCB *dcb, SESSION *print_session)
     }
 }
 
+bool dListSessions_cb(DCB *dcb, void *data)
+{
+    if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
+    {
+        DCB *out_dcb = (DCB*)data;
+        SESSION *session = dcb->session;
+        dcb_printf(out_dcb, "%-16p | %-15s | %-14s | %s\n", session,
+                   session->client_dcb && session->client_dcb->remote ?
+                   session->client_dcb->remote : "",
+                   session->service && session->service->name ?
+                   session->service->name : "",
+                   session_state(session->state));
+    }
+
+    return true;
+}
 /**
  * List all sessions in tabular form to a DCB
  *
@@ -657,32 +585,13 @@ dprintSession(DCB *dcb, SESSION *print_session)
 void
 dListSessions(DCB *dcb)
 {
-    bool written_heading = false;
-    list_entry_t *current = list_start_iteration(&SESSIONlist);
-    if (current)
-    {
-        dcb_printf(dcb, "Sessions.\n");
-        dcb_printf(dcb, "-----------------+-----------------+----------------+--------------------------\n");
-        dcb_printf(dcb, "Session          | Client          | Service        | State\n");
-        dcb_printf(dcb, "-----------------+-----------------+----------------+--------------------------\n");
-        written_heading = true;
-    }
-    while (current)
-    {
-        SESSION *list_session = (SESSION *)current;
-        dcb_printf(dcb, "%-16p | %-15s | %-14s | %s\n", list_session,
-                ((list_session->client_dcb && list_session->client_dcb->remote)
-                ? list_session->client_dcb->remote : ""),
-                (list_session->service && list_session->service->name ? list_session->service->name
-                : ""),
-                session_state(list_session->state));
-        current = list_iterate(&SESSIONlist, current);
-    }
-    if (written_heading)
-    {
-        dcb_printf(dcb,
-                   "-----------------+-----------------+----------------+--------------------------\n\n");
-    }
+    dcb_printf(dcb, "-----------------+-----------------+----------------+--------------------------\n");
+    dcb_printf(dcb, "Session          | Client          | Service        | State\n");
+    dcb_printf(dcb, "-----------------+-----------------+----------------+--------------------------\n");
+
+    dcb_foreach(dListSessions_cb, dcb);
+
+    dcb_printf(dcb, "-----------------+-----------------+----------------+--------------------------\n\n");
 }
 
 /**
@@ -719,28 +628,6 @@ session_state(session_state_t state)
     }
 }
 
-/*
- * @brief Find the session that relates to a given router session
- *
- * @param rses      A router session
- * @return      The related session, or NULL if none
- */
-SESSION* get_session_by_router_ses(void* rses)
-{
-    list_entry_t *current = list_start_iteration(&SESSIONlist);
-    while (current)
-    {
-        if (((SESSION *)current)->router_session == rses)
-        {
-            list_terminate_iteration_early(&SESSIONlist, current);
-            return (SESSION *)current;
-        }
-        current = list_iterate(&SESSIONlist, current);
-    }
-    return NULL;
-}
-
-
 /**
  * Create the filter chain for this session.
  *
@@ -892,9 +779,44 @@ session_getUser(SESSION *session)
 typedef struct
 {
     int index;
+    int current;
     SESSIONLISTFILTER filter;
+    RESULT_ROW *row;
+    RESULTSET *set;
 } SESSIONFILTER;
 
+bool dcb_iter_cb(DCB *dcb, void *data)
+{
+    SESSIONFILTER *cbdata = (SESSIONFILTER*)data;
+
+    if (cbdata->current < cbdata->index)
+    {
+        if (cbdata->filter == SESSION_LIST_ALL ||
+            (cbdata->filter == SESSION_LIST_CONNECTION &&
+             (dcb->session->state != SESSION_STATE_LISTENER)))
+        {
+            cbdata->current++;
+        }
+    }
+    else
+    {
+        char buf[20];
+        SESSION *list_session = dcb->session;
+
+        cbdata->index++;
+        cbdata->row = resultset_make_row(cbdata->set);
+        snprintf(buf, sizeof(buf), "%p", list_session);
+        resultset_row_set(cbdata->row, 0, buf);
+        resultset_row_set(cbdata->row, 1, ((list_session->client_dcb && list_session->client_dcb->remote)
+                                           ? list_session->client_dcb->remote : ""));
+        resultset_row_set(cbdata->row, 2, (list_session->service && list_session->service->name
+                                           ? list_session->service->name : ""));
+        resultset_row_set(cbdata->row, 3, session_state(list_session->state));
+        return false;
+    }
+    return true;
+}
+
 /**
  * Provide a row to the result set that defines the set of sessions
  *
@@ -905,74 +827,18 @@ typedef struct
 static RESULT_ROW *
 sessionRowCallback(RESULTSET *set, void *data)
 {
-    SESSIONFILTER *cbdata = (SESSIONFILTER *)data;
-    int i = 0;
-    list_entry_t *current = list_start_iteration(&SESSIONlist);
+    SESSIONFILTER *cbdata = (SESSIONFILTER*)data;
+    RESULT_ROW *row = NULL;
 
-    /* Skip to the first non-listener if not showing listeners */
-    current = skip_maybe_to_next_non_listener(current, cbdata->filter);
+    dcb_foreach(dcb_iter_cb, cbdata);
 
-    while (i < cbdata->index && current)
+    if (cbdata->row)
     {
-        if (cbdata->filter == SESSION_LIST_ALL ||
-            (cbdata->filter == SESSION_LIST_CONNECTION &&
-            ((SESSION *)current)->state !=  SESSION_STATE_LISTENER))
-        {
-            i++;
-        }
-        current = list_iterate(&SESSIONlist, current);
+        row = cbdata->row;
+        cbdata->row = NULL;
     }
 
-    /* Skip to the next non-listener if not showing listeners */
-    current = skip_maybe_to_next_non_listener(current, cbdata->filter);
-
-    if (NULL == current)
-    {
-        MXS_FREE(data);
-        return NULL;
-    }
-    else
-    {
-        char buf[20];
-        RESULT_ROW *row;
-        SESSION *list_session = (SESSION *)current;
-
-        cbdata->index++;
-        row = resultset_make_row(set);
-        snprintf(buf,19, "%p", list_session);
-        buf[19] = '\0';
-        resultset_row_set(row, 0, buf);
-        resultset_row_set(row, 1, ((list_session->client_dcb && list_session->client_dcb->remote)
-                               ? list_session->client_dcb->remote : ""));
-        resultset_row_set(row, 2, (list_session->service && list_session->service->name
-                               ? list_session->service->name : ""));
-        resultset_row_set(row, 3, session_state(list_session->state));
-        list_terminate_iteration_early(&SESSIONlist, current);
-        return row;
-    }
-}
-
-/*
- * @brief   Skip to the next non-listener session, if not showing listeners
- *
- * Based on a test of the filter that is the second parameter, along with the
- * state of the sessions.
- *
- * @param       current The session to start the possible skipping
- * @param       filter  The filter the defines the operation
- *
- * @result      The first session beyond those skipped, or the starting session;
- *              NULL if the list of sessions is exhausted.
- */
-static list_entry_t *skip_maybe_to_next_non_listener(list_entry_t *current, SESSIONLISTFILTER filter)
-{
-    /* Skip to the first non-listener if not showing listeners */
-    while (current && filter == SESSION_LIST_CONNECTION &&
-        ((SESSION *)current)->state == SESSION_STATE_LISTENER)
-    {
-        current = list_iterate(&SESSIONlist, current);
-    }
-    return current;
+    return row;
 }
 
 /**
@@ -985,6 +851,7 @@ static list_entry_t *skip_maybe_to_next_non_listener(list_entry_t *current, SESS
  * so we suppress the warning. In fact, the function call results in return
  * of the set structure which includes a pointer to data
  */
+
 /*lint -e429 */
 RESULTSET *
 sessionGetList(SESSIONLISTFILTER filter)
@@ -998,11 +865,16 @@ sessionGetList(SESSIONLISTFILTER filter)
     }
     data->index = 0;
     data->filter = filter;
+    data->current = 0;
+    data->row = NULL;
+
     if ((set = resultset_create(sessionRowCallback, data)) == NULL)
     {
         MXS_FREE(data);
         return NULL;
     }
+
+    data->set = set;
     resultset_add_column(set, "Session", 16, COL_TYPE_VARCHAR);
     resultset_add_column(set, "Client", 15, COL_TYPE_VARCHAR);
     resultset_add_column(set, "Service", 15, COL_TYPE_VARCHAR);
diff --git a/server/modules/filter/tee/tee.c b/server/modules/filter/tee/tee.c
index d979f2e47..ab91bfd3a 100644
--- a/server/modules/filter/tee/tee.c
+++ b/server/modules/filter/tee/tee.c
@@ -213,8 +213,6 @@ int route_single_query(TEE_INSTANCE* my_instance,
 int reset_session_state(TEE_SESSION* my_session, GWBUF* buffer);
 void create_orphan(SESSION* ses);
 
-extern LIST_CONFIG SESSIONlist;
-
 static void
 orphan_free(void* data)
 {
@@ -299,7 +297,7 @@ orphan_free(void* data)
             tmp->session->router_session);
 
         tmp->session->state = SESSION_STATE_FREE;
-        list_free_entry(&SESSIONlist, (list_entry_t*)tmp->session);
+        MXS_FREE(tmp->session);
         MXS_FREE(tmp);
     }
 
diff --git a/server/modules/routing/debugcli/debugcmd.c b/server/modules/routing/debugcli/debugcmd.c
index 9505346e3..2f15c5e6c 100644
--- a/server/modules/routing/debugcli/debugcmd.c
+++ b/server/modules/routing/debugcli/debugcmd.c
@@ -92,8 +92,6 @@
 #define ARG_TYPE_FILTER         9
 #define ARG_TYPE_NUMERIC        10
 
-extern LIST_CONFIG SESSIONlist;
-
 /**
  * The subcommand structure
  *
@@ -2115,6 +2113,31 @@ static bool get_log_action(const char* name, struct log_action_entry* entryp)
     return found;
 }
 
+
+bool seslog_cb(DCB *dcb, void *data)
+{
+    bool rval = true;
+    struct log_action_entry *entry = ((void**)data)[0];
+    size_t *id = ((void**)data)[1];
+    bool enable = (bool)((void**)data)[2];
+    SESSION *session = dcb->session;
+
+    if (session->ses_id == *id)
+    {
+        if (enable)
+        {
+            session_enable_log_priority(session, entry->priority);
+        }
+        else
+        {
+            session_disable_log_priority(session, entry->priority);
+        }
+        rval = false;
+    }
+
+    return rval;
+}
+
 /**
  * Enables a log for a single session
  * @param session The session in question
@@ -2127,21 +2150,10 @@ static void enable_sess_log_action(DCB *dcb, char *arg1, char *arg2)
 
     if (get_log_action(arg1, &entry))
     {
-        size_t id = (size_t) strtol(arg2, 0, 0);
-        list_entry_t *current = list_start_iteration(&SESSIONlist);
-        while (current)
-        {
-            SESSION *session = (SESSION *)current;
-            if (session->ses_id == id)
-            {
-                session_enable_log_priority(session, entry.priority);
-                list_terminate_iteration_early(&SESSIONlist, current);
-                break;
-            }
-            current = list_iterate(&SESSIONlist, current);
-        }
+        size_t id = (size_t)strtol(arg2, NULL, 10);
+        void *data[] = {&entry, &id, (void*)true};
 
-        if (!current)
+        if (dcb_foreach(seslog_cb, data))
         {
             dcb_printf(dcb, "Session not found: %s.\n", arg2);
         }
@@ -2164,28 +2176,17 @@ static void disable_sess_log_action(DCB *dcb, char *arg1, char *arg2)
 
     if (get_log_action(arg1, &entry))
     {
-        size_t id = (size_t) strtol(arg2, 0, 0);
-        list_entry_t *current = list_start_iteration(&SESSIONlist);
-        while (current)
-        {
-            SESSION *session = (SESSION *)current;
-            if (session->ses_id == id)
-            {
-                session_disable_log_priority(session, entry.priority);
-                list_terminate_iteration_early(&SESSIONlist, current);
-                break;
-            }
-            current = list_iterate(&SESSIONlist, current);
-        }
+        size_t id = (size_t)strtol(arg2, NULL, 10);
+        void *data[] = {&entry, &id, (void*)false};
 
-        if (!current)
+        if (dcb_foreach(seslog_cb, data))
         {
             dcb_printf(dcb, "Session not found: %s.\n", arg2);
         }
     }
     else
     {
-        dcb_printf(dcb, "%s is not supported for disable log.\n", arg1);
+        dcb_printf(dcb, "%s is not supported for enable log.\n", arg1);
     }
 }
 
@@ -2226,6 +2227,30 @@ static int string_to_priority(const char* name)
     return result ? result->priority : -1;
 }
 
+bool sesprio_cb(DCB *dcb, void *data)
+{
+    bool rval = true;
+    int *priority = ((void**)data)[0];
+    size_t *id = ((void**)data)[1];
+    bool enable = (bool)((void**)data)[2];
+    SESSION *session = dcb->session;
+
+    if (session->ses_id == *id)
+    {
+        if (enable)
+        {
+            session_enable_log_priority(session, *priority);
+        }
+        else
+        {
+            session_disable_log_priority(session, *priority);
+        }
+        rval = false;
+    }
+
+    return rval;
+}
+
 /**
  * Enables a log priority for a single session
  * @param session The session in question
@@ -2238,21 +2263,10 @@ static void enable_sess_log_priority(DCB *dcb, char *arg1, char *arg2)
 
     if (priority != -1)
     {
-        size_t id = (size_t) strtol(arg2, 0, 0);
-        list_entry_t *current = list_start_iteration(&SESSIONlist);
-        while (current)
-        {
-            SESSION *session = (SESSION *)current;
-            if (session->ses_id == id)
-            {
-                session_enable_log_priority(session, priority);
-                list_terminate_iteration_early(&SESSIONlist, current);
-                break;
-            }
-            current = list_iterate(&SESSIONlist, current);
-        }
+        size_t id = (size_t) strtol(arg2, NULL, 10);
+        void *data[] = {&priority, &id, (void*)true};
 
-        if (!current)
+        if (dcb_foreach(sesprio_cb, data))
         {
             dcb_printf(dcb, "Session not found: %s.\n", arg2);
         }
@@ -2275,21 +2289,10 @@ static void disable_sess_log_priority(DCB *dcb, char *arg1, char *arg2)
 
     if (priority != -1)
     {
-        size_t id = (size_t) strtol(arg2, 0, 0);
-        list_entry_t *current = list_start_iteration(&SESSIONlist);
-        while (current)
-        {
-            SESSION *session = (SESSION *)current;
-            if (session->ses_id == id)
-            {
-                session_disable_log_priority(session, priority);
-                list_terminate_iteration_early(&SESSIONlist, current);
-                break;
-            }
-            current = list_iterate(&SESSIONlist, current);
-        }
+        size_t id = (size_t) strtol(arg2, NULL, 10);
+        void *data[] = {&priority, &id, (void*)false};
 
-        if (!current)
+        if (dcb_foreach(seslog_cb, data))
         {
             dcb_printf(dcb, "Session not found: %s.\n", arg2);
         }

From 43f248927e2da98316c64bc94f5d9cf4995fdb0b Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Wed, 23 Nov 2016 09:03:50 +0200
Subject: [PATCH 29/42] Move thread related members of DCB into a substructure

The `thread` structure of a DCB now contains all the members that relate
to thread ownership of the DCB.
---
 include/maxscale/dcb.h | 11 +++++++----
 server/core/dcb.c      | 40 ++++++++++++++++++++--------------------
 server/core/poll.c     | 10 +++++-----
 server/core/session.c  |  2 +-
 4 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index ab12153b5..eec018c04 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -225,7 +225,6 @@ typedef struct dcb
     DCBEVENTQ       evq;            /**< The event queue for this DCB */
     int             fd;             /**< The descriptor */
     dcb_state_t     state;          /**< Current descriptor state */
-    int             owner;          /**< Owning thread */
     SSL_STATE       ssl_state;      /**< Current state of SSL if in use */
     int             flags;          /**< DCB flags */
     char            *remote;        /**< Address of remote end */
@@ -276,8 +275,12 @@ typedef struct dcb
     bool            ssl_write_want_write;    /*< Flag */
     int             dcb_port;       /**< port of target server */
     bool            was_persistent;  /**< Whether this DCB was in the persistent pool */
-    struct dcb      *thr_next; /**< Next DCB in owning thread's list */
-    struct dcb      *thr_tail; /**< Last DCB in owning thread's list */
+    struct
+    {
+        int id; /**< The owning thread's ID */
+        struct dcb *next; /**< Next DCB in owning thread's list */
+        struct dcb *tail; /**< Last DCB in owning thread's list */
+    } thread;
     skygw_chk_t     dcb_chk_tail;
 } DCB;
 
@@ -288,7 +291,7 @@ typedef struct dcb
     .cb_lock = SPINLOCK_INIT, .pollinlock = SPINLOCK_INIT, \
     .fd = DCBFD_CLOSED, .stats = DCBSTATS_INIT, .ssl_state = SSL_HANDSHAKE_UNKNOWN, \
     .state = DCB_STATE_ALLOC, .polloutlock = SPINLOCK_INIT, .dcb_chk_tail = CHK_NUM_DCB, \
-    .authenticator_data = NULL, .thr_next = NULL, .thr_tail = NULL}
+    .authenticator_data = NULL, .thread = {0}}
 
 /**
  * The DCB usage filer used for returning DCB's in use for a certain reason
diff --git a/server/core/dcb.c b/server/core/dcb.c
index 83bc874e7..e0b587ed7 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -1660,7 +1660,7 @@ dcb_close(DCB *dcb)
         /*<
          * Add closing dcb to the top of the list, setting zombie marker
          */
-        int owner = dcb->owner;
+        int owner = dcb->thread.id;
         dcb->dcb_is_zombie = true;
         dcb->memdata.next = zombies[owner];
         zombies[owner] = dcb;
@@ -3417,20 +3417,20 @@ void dcb_append_readqueue(DCB *dcb, GWBUF *buffer)
 
 void dcb_add_to_list(DCB *dcb)
 {
-    spinlock_acquire(&all_dcbs_lock[dcb->owner]);
+    spinlock_acquire(&all_dcbs_lock[dcb->thread.id]);
 
-    if (all_dcbs[dcb->owner] == NULL)
+    if (all_dcbs[dcb->thread.id] == NULL)
     {
-        all_dcbs[dcb->owner] = dcb;
-        all_dcbs[dcb->owner]->thr_tail = dcb;
+        all_dcbs[dcb->thread.id] = dcb;
+        all_dcbs[dcb->thread.id]->thread.tail = dcb;
     }
     else
     {
-        all_dcbs[dcb->owner]->thr_tail->thr_next = dcb;
-        all_dcbs[dcb->owner]->thr_tail = dcb;
+        all_dcbs[dcb->thread.id]->thread.tail->thread.next = dcb;
+        all_dcbs[dcb->thread.id]->thread.tail = dcb;
     }
 
-    spinlock_release(&all_dcbs_lock[dcb->owner]);
+    spinlock_release(&all_dcbs_lock[dcb->thread.id]);
 }
 
 /**
@@ -3440,36 +3440,36 @@ void dcb_add_to_list(DCB *dcb)
  */
 static void dcb_remove_from_list(DCB *dcb)
 {
-    spinlock_acquire(&all_dcbs_lock[dcb->owner]);
+    spinlock_acquire(&all_dcbs_lock[dcb->thread.id]);
 
-    if (dcb == all_dcbs[dcb->owner])
+    if (dcb == all_dcbs[dcb->thread.id])
     {
-        DCB *tail = all_dcbs[dcb->owner]->thr_tail;
-        all_dcbs[dcb->owner] = all_dcbs[dcb->owner]->thr_next;
-        all_dcbs[dcb->owner]->thr_tail = tail;
+        DCB *tail = all_dcbs[dcb->thread.id]->thread.tail;
+        all_dcbs[dcb->thread.id] = all_dcbs[dcb->thread.id]->thread.next;
+        all_dcbs[dcb->thread.id]->thread.tail = tail;
     }
     else
     {
-        DCB *current = all_dcbs[dcb->owner]->thr_next;
-        DCB *prev = all_dcbs[dcb->owner];
+        DCB *current = all_dcbs[dcb->thread.id]->thread.next;
+        DCB *prev = all_dcbs[dcb->thread.id];
 
         while (current)
         {
             if (current == dcb)
             {
-                if (current == all_dcbs[dcb->owner]->thr_tail)
+                if (current == all_dcbs[dcb->thread.id]->thread.tail)
                 {
-                    all_dcbs[dcb->owner]->thr_tail = prev;
+                    all_dcbs[dcb->thread.id]->thread.tail = prev;
                 }
-                prev->thr_next = current->thr_next;
+                prev->thread.next = current->thread.next;
                 break;
             }
             prev = current;
-            current = current->thr_next;
+            current = current->thread.next;
         }
     }
 
-    spinlock_release(&all_dcbs_lock[dcb->owner]);
+    spinlock_release(&all_dcbs_lock[dcb->thread.id]);
 }
 
 /**
diff --git a/server/core/poll.c b/server/core/poll.c
index e0acd30b1..60602ac18 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -375,14 +375,14 @@ poll_add_dcb(DCB *dcb)
 
     if (dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER)
     {
-        owner = dcb->session->client_dcb->owner;
+        owner = dcb->session->client_dcb->thread.id;
     }
     else
     {
         owner = (unsigned int)atomic_add(&next_epoll_fd, 1) % n_threads;
     }
 
-    dcb->owner = owner;
+    dcb->thread.id = owner;
     spinlock_release(&dcb->dcb_initlock);
 
     dcb_add_to_list(dcb);
@@ -495,7 +495,7 @@ poll_remove_dcb(DCB *dcb)
         }
         else
         {
-            rc = epoll_ctl(epoll_fd[dcb->owner], EPOLL_CTL_DEL, dcbfd, &ev);
+            rc = epoll_ctl(epoll_fd[dcb->thread.id], EPOLL_CTL_DEL, dcbfd, &ev);
         }
         /**
          * The poll_resolve_error function will always
@@ -885,7 +885,7 @@ process_pollq(int thread_id, struct epoll_event *event)
     unsigned long qtime;
 
     DCB *dcb = event->data.ptr;
-    ss_dassert(dcb->owner == thread_id || dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER);
+    ss_dassert(dcb->thread.id == thread_id || dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER);
 #if PROFILE_POLL
     memlog_log(plog, hkheartbeat - dcb->evq.inserted);
 #endif
@@ -1490,7 +1490,7 @@ static void poll_add_event_to_dcb(DCB*       dcb,
         event->next = NULL;
         event->tail = event;
 
-        int thr = dcb->owner;
+        int thr = dcb->thread.id;
 
         /** It is possible that a housekeeper or a monitor thread inserts a fake
          * event into the thread's event queue which is why the operation needs
diff --git a/server/core/session.c b/server/core/session.c
index 4b01cf781..cb556e215 100644
--- a/server/core/session.c
+++ b/server/core/session.c
@@ -290,7 +290,7 @@ session_link_dcb(SESSION *session, DCB *dcb)
     atomic_add(&session->refcount, 1);
     dcb->session = session;
     /** Move this DCB under the same thread */
-    dcb->owner = session->client_dcb->owner;
+    dcb->thread.id = session->client_dcb->thread.id;
     spinlock_release(&session->ses_lock);
     return true;
 }

From b32af705e6a663f0026d32782d17036e8cd7a530 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Wed, 23 Nov 2016 10:36:11 +0200
Subject: [PATCH 30/42] Extend routing capabilities

The routing capabilities now define the type of output the reply
processing chain expects. Currently, this only consists of two
capabilities; complete packet output and contiguous buffer output. The
latter implies the former.
---
 include/maxscale/routing.h                                 | 5 +++++
 server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/maxscale/routing.h b/include/maxscale/routing.h
index 1544414a2..c16c2eb81 100644
--- a/include/maxscale/routing.h
+++ b/include/maxscale/routing.h
@@ -36,6 +36,11 @@ typedef enum routing_capability
     /**< The transaction state and autocommit mode of the session are tracked;
          implies RCAP_TYPE_CONTIGUOUS_INPUT and RCAP_TYPE_STMT_INPUT. */
     RCAP_TYPE_TRANSACTION_TRACKING = 0x0007, /* 0b0000000000000111 */
+    /**< Responses are delivered one per buffer. */
+    RCAP_TYPE_STMT_OUTPUT           = 0x0010, /* 0b0000000000010000 */
+    /**< Each delivered buffer is contiguous; implies RCAP_TYPE_STMT_OUTPUT. */
+    RCAP_TYPE_CONTIGUOUS_OUTPUT     = 0x0030, /* 0b0000000000110000 */
+
 } routing_capability_t;
 
 #define RCAP_TYPE_NONE 0
diff --git a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
index 555d8f385..b3c3feb85 100644
--- a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
+++ b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
@@ -769,10 +769,10 @@ gw_read_and_write(DCB *dcb)
         ss_dassert(read_buffer != NULL);
     }
 
-    /** Ask what type of input the router/filter chain expects */
+    /** Ask what type of output the router/filter chain expects */
     uint64_t capabilities = service_get_capabilities(session->service);
 
-    if (rcap_type_required(capabilities, RCAP_TYPE_STMT_INPUT))
+    if (rcap_type_required(capabilities, RCAP_TYPE_STMT_OUTPUT))
     {
         GWBUF *tmp = modutil_get_complete_packets(&read_buffer);
         /* Put any residue into the read queue */
@@ -787,7 +787,7 @@ gw_read_and_write(DCB *dcb)
 
         read_buffer = tmp;
 
-        if (rcap_type_required(capabilities, RCAP_TYPE_CONTIGUOUS_INPUT))
+        if (rcap_type_required(capabilities, RCAP_TYPE_CONTIGUOUS_OUTPUT))
         {
             if ((tmp = gwbuf_make_contiguous(read_buffer)))
             {

From b09bf5612746dfd25ccfed15eacb560e5d8d7534 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Thu, 24 Nov 2016 09:36:37 +0200
Subject: [PATCH 31/42] Add missing NULL pointer check to DCB list handling

The thread specific list removal function didn't check whether the last
item was NULL before proceeding to alter the latest tail pointer.
---
 server/core/dcb.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/server/core/dcb.c b/server/core/dcb.c
index e0b587ed7..7436bcf96 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -3446,7 +3446,11 @@ static void dcb_remove_from_list(DCB *dcb)
     {
         DCB *tail = all_dcbs[dcb->thread.id]->thread.tail;
         all_dcbs[dcb->thread.id] = all_dcbs[dcb->thread.id]->thread.next;
-        all_dcbs[dcb->thread.id]->thread.tail = tail;
+
+        if (all_dcbs[dcb->thread.id])
+        {
+            all_dcbs[dcb->thread.id]->thread.tail = tail;
+        }
     }
     else
     {

From 5411180a50f73404a84329085fde8f2d4471b758 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Thu, 24 Nov 2016 23:05:23 +0200
Subject: [PATCH 32/42] Fix DCB validity checks

The DCB was always assumed to be non-NULL.
---
 server/core/dcb.c     | 2 +-
 server/core/session.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/core/dcb.c b/server/core/dcb.c
index 7436bcf96..fbfbdc21c 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -2503,7 +2503,7 @@ dcb_call_callback(DCB *dcb, DCB_REASON reason)
 int
 dcb_isvalid(DCB *dcb)
 {
-    return !dcb->dcb_is_zombie;
+    return dcb && !dcb->dcb_is_zombie;
 }
 
 /**
diff --git a/server/core/session.c b/server/core/session.c
index cb556e215..7648fe45e 100644
--- a/server/core/session.c
+++ b/server/core/session.c
@@ -427,7 +427,7 @@ session_final_free(SESSION *session)
 int
 session_isvalid(SESSION *session)
 {
-    return true;
+    return session != NULL;
 }
 
 /**

From 32b0f6a60782eab2c09d34e4826a393bd4657158 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Fri, 25 Nov 2016 11:01:03 +0200
Subject: [PATCH 33/42] Iterate using the correct list

The `thread.next` pointer refers to active DCBs in the current thread's
list and the `memdata.next` pointer refers to DCBs about to be freed. The
latter was mixed up with the former due to some changes in the naming.
---
 server/core/dcb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/server/core/dcb.c b/server/core/dcb.c
index fbfbdc21c..02f7bf4c0 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -1953,7 +1953,7 @@ dprintAllDCBs(DCB *pdcb)
     {
         spinlock_acquire(&all_dcbs_lock[i]);
 
-        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->thread.next)
         {
             dprintOneDCB(pdcb, dcb);
         }
@@ -1981,7 +1981,7 @@ dListDCBs(DCB *pdcb)
     for (int i = 0; i < nthr; i++)
     {
         spinlock_acquire(&all_dcbs_lock[i]);
-        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->thread.next)
         {
             dcb_printf(pdcb, " %-16p | %-26s | %-18s | %s\n",
                        dcb, gw_dcb_state2string(dcb->state),
@@ -2014,7 +2014,7 @@ dListClients(DCB *pdcb)
     for (int i = 0; i < nthr; i++)
     {
         spinlock_acquire(&all_dcbs_lock[i]);
-        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->thread.next)
         {
             if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
             {
@@ -2521,7 +2521,7 @@ dcb_hangup_foreach(struct server* server)
     {
         spinlock_acquire(&all_dcbs_lock[i]);
 
-        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->memdata.next)
+        for (DCB *dcb = all_dcbs[i]; dcb; dcb = dcb->thread.next)
         {
             spinlock_acquire(&dcb->dcb_initlock);
             if (dcb->state == DCB_STATE_POLLING && dcb->server &&
@@ -3498,7 +3498,7 @@ void dcb_process_idle_sessions(int thr)
          * check for it once per second. One heartbeat is 100 milliseconds. */
         next_timeout_check = hkheartbeat + 10;
 
-        for (DCB *dcb = all_dcbs[thr]; dcb; dcb = dcb->memdata.next)
+        for (DCB *dcb = all_dcbs[thr]; dcb; dcb = dcb->thread.next)
         {
             if (dcb->dcb_role == DCB_ROLE_CLIENT_HANDLER)
             {
@@ -3524,7 +3524,7 @@ bool dcb_foreach(bool(*func)(DCB *, void *), void *data)
     {
         spinlock_acquire(&all_dcbs_lock[i]);
 
-        for (DCB *dcb = all_dcbs[i]; dcb && more; dcb = dcb->memdata.next)
+        for (DCB *dcb = all_dcbs[i]; dcb && more; dcb = dcb->thread.next)
         {
             if (!func(dcb, data))
             {

From bfc60ae9407d4664094abc74f985e3ddb8beed02 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Fri, 25 Nov 2016 17:56:19 +0200
Subject: [PATCH 34/42] Remove false debug assertion

Not all routers require statement input.
---
 server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
index b3c3feb85..477f3a196 100644
--- a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
+++ b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c
@@ -838,7 +838,6 @@ gw_read_and_write(DCB *dcb)
      */
     if (protocol_get_srv_command((MySQLProtocol *)dcb->protocol, false) != MYSQL_COM_UNDEFINED)
     {
-        ss_dassert(rcap_type_required(capabilities, RCAP_TYPE_STMT_INPUT));
         read_buffer = process_response_data(dcb, read_buffer, gwbuf_length(read_buffer));
         /**
          * Received incomplete response to session command.

From bcbff604b034088a6673d3a0932a264fefc513c5 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Fri, 25 Nov 2016 18:28:06 +0200
Subject: [PATCH 35/42] Properly store errno on listener epoll errors

When an epoll error occurs for a listener, the errno variable must be
stored in another variable while the listener is removed from all of the
epoll instances.
---
 server/core/poll.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/server/core/poll.c b/server/core/poll.c
index 60602ac18..5620ea06e 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -387,6 +387,8 @@ poll_add_dcb(DCB *dcb)
 
     dcb_add_to_list(dcb);
 
+    int error_num = 0;
+
     if (dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER)
     {
         /** Listeners are added to all epoll instances */
@@ -396,6 +398,7 @@ poll_add_dcb(DCB *dcb)
         {
             if ((rc = epoll_ctl(epoll_fd[i], EPOLL_CTL_ADD, dcb->fd, &ev)))
             {
+                error_num = errno;
                 /** Remove the listener from the previous epoll instances */
                 for (int j = 0; j < i; j++)
                 {
@@ -407,13 +410,16 @@ poll_add_dcb(DCB *dcb)
     }
     else
     {
-        rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev);
+        if ((rc = epoll_ctl(epoll_fd[owner], EPOLL_CTL_ADD, dcb->fd, &ev)))
+        {
+            error_num = errno;
+        }
     }
 
     if (rc)
     {
         /* Some errors are actually considered acceptable */
-        rc = poll_resolve_error(dcb, errno, true);
+        rc = poll_resolve_error(dcb, error_num, true);
     }
     if (0 == rc)
     {
@@ -477,6 +483,8 @@ poll_remove_dcb(DCB *dcb)
 
     if (dcbfd > 0)
     {
+        int error_num = 0;
+
         if (dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER)
         {
             /** Listeners are added to all epoll instances */
@@ -490,12 +498,16 @@ poll_remove_dcb(DCB *dcb)
                     /** Even if one of the instances failed to remove it, try
                      * to remove it from all the others */
                     rc = tmp_rc;
+                    error_num = errno;
                 }
             }
         }
         else
         {
-            rc = epoll_ctl(epoll_fd[dcb->thread.id], EPOLL_CTL_DEL, dcbfd, &ev);
+            if ((rc = epoll_ctl(epoll_fd[dcb->thread.id], EPOLL_CTL_DEL, dcbfd, &ev)))
+            {
+                error_num = errno;
+            }
         }
         /**
          * The poll_resolve_error function will always
@@ -504,7 +516,7 @@ poll_remove_dcb(DCB *dcb)
          */
         if (rc)
         {
-            rc = poll_resolve_error(dcb, errno, false);
+            rc = poll_resolve_error(dcb, error_num, false);
         }
         if (rc)
         {

From 5aa791d16ec902ae6a0c09885911825abee5c553 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sat, 26 Nov 2016 05:52:26 +0200
Subject: [PATCH 36/42] Move persistent connections to thread specific lists

Making the lists of persistent DCBs thread specific is both a bug fix and
a performance enhancement. There was a small window where a non-owner
thread could receive events for a DCB. By partitioning the DCBs into
thread specific lists, this is avoided by removing the possibility of DCBs
moving between threads.
---
 include/maxscale/dcb.h    |  2 +-
 include/maxscale/server.h |  5 ++--
 server/core/dcb.c         | 19 ++++++--------
 server/core/poll.c        |  5 ++--
 server/core/server.c      | 55 +++++++++++++++++++++------------------
 5 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h
index eec018c04..4fe12ee8b 100644
--- a/include/maxscale/dcb.h
+++ b/include/maxscale/dcb.h
@@ -368,7 +368,7 @@ int dcb_add_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON, void *
 int dcb_remove_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON, void *), void *);
 int dcb_isvalid(DCB *);                     /* Check the DCB is in the linked list */
 int dcb_count_by_usage(DCB_USAGE);          /* Return counts of DCBs */
-int dcb_persistent_clean_count(DCB *, bool);      /* Clean persistent and return count */
+int dcb_persistent_clean_count(DCB *, int, bool);      /* Clean persistent and return count */
 void dcb_hangup_foreach (struct server* server);
 size_t dcb_get_session_id(DCB* dcb);
 bool dcb_get_ses_log_info(DCB* dcb, size_t* sesid, int* enabled_logs);
diff --git a/include/maxscale/server.h b/include/maxscale/server.h
index dbbd9d7e4..93b6aadd7 100644
--- a/include/maxscale/server.h
+++ b/include/maxscale/server.h
@@ -110,8 +110,7 @@ typedef struct server
     int            depth;          /**< Replication level in the tree */
     long           slaves[MAX_NUM_SLAVES]; /**< Slaves of this node */
     bool           master_err_is_logged; /*< If node failed, this indicates whether it is logged */
-    DCB            *persistent;    /**< List of unused persistent connections to the server */
-    SPINLOCK       persistlock;    /**< Lock for adjusting the persistent connections list */
+    DCB            **persistent;    /**< List of unused persistent connections to the server */
     long           persistpoolmax; /**< Maximum size of persistent connections pool */
     long           persistmaxtime; /**< Maximum number of seconds connection can live */
     int            persistmax;     /**< Maximum pool size actually achieved since startup */
@@ -272,7 +271,7 @@ extern void serverAddMonUser(SERVER *, char *, char *);
 extern void serverAddParameter(SERVER *, char *, char *);
 extern char *serverGetParameter(SERVER *, char *);
 extern void server_update_credentials(SERVER *, char *, char *);
-extern DCB  *server_get_persistent(SERVER *, char *, const char *);
+extern DCB  *server_get_persistent(SERVER *, char *, const char *, int);
 extern void server_update_address(SERVER *, char *);
 extern void server_update_port(SERVER *,  unsigned short);
 extern RESULTSET *serverGetList();
diff --git a/server/core/dcb.c b/server/core/dcb.c
index 02f7bf4c0..7f1274ea5 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -646,7 +646,7 @@ dcb_connect(SERVER *server, SESSION *session, const char *protocol)
     {
         MXS_DEBUG("%lu [dcb_connect] Looking for persistent connection DCB "
                   "user %s protocol %s\n", pthread_self(), user, protocol);
-        dcb = server_get_persistent(server, user, protocol);
+        dcb = server_get_persistent(server, user, protocol, session->client_dcb->thread.id);
         if (dcb)
         {
             /**
@@ -1695,7 +1695,7 @@ dcb_maybe_add_persistent(DCB *dcb)
         && (dcb->server->status & SERVER_RUNNING)
         && !dcb->dcb_errhandle_called
         && !(dcb->flags & DCBF_HUNG)
-        && (poolcount = dcb_persistent_clean_count(dcb, false)) < dcb->server->persistpoolmax)
+        && (poolcount = dcb_persistent_clean_count(dcb, dcb->thread.id, false)) < dcb->server->persistpoolmax)
     {
         DCB_CALLBACK *loopcallback;
         MXS_DEBUG("%lu [dcb_maybe_add_persistent] Adding DCB to persistent pool, user %s.\n",
@@ -1724,10 +1724,8 @@ dcb_maybe_add_persistent(DCB *dcb)
             MXS_FREE(loopcallback);
         }
         spinlock_release(&dcb->cb_lock);
-        spinlock_acquire(&dcb->server->persistlock);
-        dcb->nextpersistent = dcb->server->persistent;
-        dcb->server->persistent = dcb;
-        spinlock_release(&dcb->server->persistlock);
+        dcb->nextpersistent = dcb->server->persistent[dcb->thread.id];
+        dcb->server->persistent[dcb->thread.id] = dcb;
         atomic_add(&dcb->server->stats.n_persistent, 1);
         atomic_add(&dcb->server->stats.n_current, -1);
         return true;
@@ -2580,7 +2578,7 @@ dcb_null_auth(DCB *dcb, SERVER *server, SESSION *session, GWBUF *buf)
  * @return              A count of the DCBs remaining in the pool
  */
 int
-dcb_persistent_clean_count(DCB *dcb, bool cleanall)
+dcb_persistent_clean_count(DCB *dcb, int id, bool cleanall)
 {
     int count = 0;
     if (dcb && dcb->server)
@@ -2591,8 +2589,7 @@ dcb_persistent_clean_count(DCB *dcb, bool cleanall)
         DCB *disposals = NULL;
 
         CHK_SERVER(server);
-        spinlock_acquire(&server->persistlock);
-        persistentdcb = server->persistent;
+        persistentdcb = server->persistent[id];
         while (persistentdcb)
         {
             CHK_DCB(persistentdcb);
@@ -2611,7 +2608,7 @@ dcb_persistent_clean_count(DCB *dcb, bool cleanall)
                 }
                 else
                 {
-                    server->persistent = nextdcb;
+                    server->persistent[id] = nextdcb;
                 }
                 /* Add removed DCBs to disposal list for processing outside spinlock */
                 persistentdcb->nextpersistent = disposals;
@@ -2626,7 +2623,7 @@ dcb_persistent_clean_count(DCB *dcb, bool cleanall)
             persistentdcb = nextdcb;
         }
         server->persistmax = MXS_MAX(server->persistmax, count);
-        spinlock_release(&server->persistlock);
+
         /** Call possible callback for this DCB in case of close */
         while (disposals)
         {
diff --git a/server/core/poll.c b/server/core/poll.c
index 5620ea06e..9a5e65772 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -445,7 +445,7 @@ poll_add_dcb(DCB *dcb)
 int
 poll_remove_dcb(DCB *dcb)
 {
-    int dcbfd, rc = -1;
+    int dcbfd, rc = 0;
     struct  epoll_event ev;
     CHK_DCB(dcb);
 
@@ -493,12 +493,13 @@ poll_remove_dcb(DCB *dcb)
             for (int i = 0; i < nthr; i++)
             {
                 int tmp_rc = epoll_ctl(epoll_fd[i], EPOLL_CTL_DEL, dcb->fd, &ev);
-                if (tmp_rc)
+                if (tmp_rc && rc == 0)
                 {
                     /** Even if one of the instances failed to remove it, try
                      * to remove it from all the others */
                     rc = tmp_rc;
                     error_num = errno;
+                    ss_dassert(error_num);
                 }
             }
         }
diff --git a/server/core/server.c b/server/core/server.c
index a06088baf..664b54d67 100644
--- a/server/core/server.c
+++ b/server/core/server.c
@@ -87,15 +87,18 @@ SERVER* server_alloc(const char *name, const char *address, unsigned short port,
         return NULL;
     }
 
+    int nthr = config_threadcount();
     SERVER *server = (SERVER *)MXS_CALLOC(1, sizeof(SERVER));
     char *my_name = MXS_STRDUP(name);
     char *my_protocol = MXS_STRDUP(protocol);
     char *my_authenticator = MXS_STRDUP(authenticator);
+    DCB **persistent = MXS_CALLOC(nthr, sizeof(*persistent));
 
-    if (!server || !my_name || !my_protocol || !my_authenticator)
+    if (!server || !my_name || !my_protocol || !my_authenticator || !persistent)
     {
         MXS_FREE(server);
         MXS_FREE(my_name);
+        MXS_FREE(persistent);
         MXS_FREE(my_protocol);
         MXS_FREE(my_authenticator);
         return NULL;
@@ -125,7 +128,7 @@ SERVER* server_alloc(const char *name, const char *address, unsigned short port,
     server->parameters = NULL;
     server->server_string = NULL;
     spinlock_init(&server->lock);
-    server->persistent = NULL;
+    server->persistent = persistent;
     server->persistmax = 0;
     server->persistmaxtime = 0;
     server->persistpoolmax = 0;
@@ -133,7 +136,6 @@ SERVER* server_alloc(const char *name, const char *address, unsigned short port,
     server->monpw[0] = '\0';
     server->is_active = true;
     server->charset = SERVER_DEFAULT_CHARSET;
-    spinlock_init(&server->persistlock);
 
     spinlock_acquire(&server_spin);
     server->next = allServers;
@@ -183,7 +185,12 @@ server_free(SERVER *tofreeserver)
 
     if (tofreeserver->persistent)
     {
-        dcb_persistent_clean_count(tofreeserver->persistent, true);
+        int nthr = config_threadcount();
+
+        for (int i = 0; i < nthr; i++)
+        {
+            dcb_persistent_clean_count(tofreeserver->persistent[i], i, true);
+        }
     }
     MXS_FREE(tofreeserver);
     return 1;
@@ -197,17 +204,16 @@ server_free(SERVER *tofreeserver)
  * @param       protocol    The name of the protocol needed for the connection
  */
 DCB *
-server_get_persistent(SERVER *server, char *user, const char *protocol)
+server_get_persistent(SERVER *server, char *user, const char *protocol, int id)
 {
     DCB *dcb, *previous = NULL;
 
-    if (server->persistent
-        && dcb_persistent_clean_count(server->persistent, false)
-        && server->persistent
+    if (server->persistent[id]
+        && dcb_persistent_clean_count(server->persistent[id], id, false)
+        && server->persistent[id] // Check after cleaning
         && (server->status & SERVER_RUNNING))
     {
-        spinlock_acquire(&server->persistlock);
-        dcb = server->persistent;
+        dcb = server->persistent[id];
         while (dcb)
         {
             if (dcb->user
@@ -219,7 +225,7 @@ server_get_persistent(SERVER *server, char *user, const char *protocol)
             {
                 if (NULL == previous)
                 {
-                    server->persistent = dcb->nextpersistent;
+                    server->persistent[id] = dcb->nextpersistent;
                 }
                 else
                 {
@@ -227,7 +233,6 @@ server_get_persistent(SERVER *server, char *user, const char *protocol)
                 }
                 MXS_FREE(dcb->user);
                 dcb->user = NULL;
-                spinlock_release(&server->persistlock);
                 atomic_add(&server->stats.n_persistent, -1);
                 atomic_add(&server->stats.n_current, 1);
                 return dcb;
@@ -249,7 +254,6 @@ server_get_persistent(SERVER *server, char *user, const char *protocol)
             previous = dcb;
             dcb = dcb->nextpersistent;
         }
-        spinlock_release(&server->persistlock);
     }
     return NULL;
 }
@@ -549,8 +553,7 @@ dprintServer(DCB *dcb, SERVER *server)
     if (server->persistpoolmax)
     {
         dcb_printf(dcb, "\tPersistent pool size:                %d\n", server->stats.n_persistent);
-        dcb_printf(dcb, "\tPersistent measured pool size:       %d\n",
-                   dcb_persistent_clean_count(server->persistent, false));
+        dcb_printf(dcb, "\tPersistent measured pool size:       %d\n", server->stats.n_persistent);
         dcb_printf(dcb, "\tPersistent actual size max:          %d\n", server->persistmax);
         dcb_printf(dcb, "\tPersistent pool size limit:          %ld\n", server->persistpoolmax);
         dcb_printf(dcb, "\tPersistent max time (secs):          %ld\n", server->persistmaxtime);
@@ -595,19 +598,21 @@ void
 dprintPersistentDCBs(DCB *pdcb, SERVER *server)
 {
     DCB *dcb;
+    int nthr = config_threadcount();
 
-    spinlock_acquire(&server->persistlock);
-#if SPINLOCK_PROFILE
-    dcb_printf(pdcb, "DCB List Spinlock Statistics:\n");
-    spinlock_stats(&server->persistlock, spin_reporter, pdcb);
-#endif
-    dcb = server->persistent;
-    while (dcb)
+    for (int i = 0; i < nthr; i++)
     {
-        dprintOneDCB(pdcb, dcb);
-        dcb = dcb->nextpersistent;
+#if SPINLOCK_PROFILE
+        dcb_printf(pdcb, "DCB List Spinlock Statistics:\n");
+        spinlock_stats(&server->persistlock, spin_reporter, pdcb);
+#endif
+        dcb = server->persistent[i];
+        while (dcb)
+        {
+            dprintOneDCB(pdcb, dcb);
+            dcb = dcb->nextpersistent;
+        }
     }
-    spinlock_release(&server->persistlock);
 }
 
 /**

From 8573df6ee7654c182bbd6a1f04d4d6f472379bd7 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sat, 26 Nov 2016 06:04:34 +0200
Subject: [PATCH 37/42] Disable concurrect maxadmin access

The administrative operations in debugcmd.c now share a global lock.
---
 server/modules/routing/debugcli/debugcmd.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/server/modules/routing/debugcli/debugcmd.c b/server/modules/routing/debugcli/debugcmd.c
index 2f15c5e6c..cad322c67 100644
--- a/server/modules/routing/debugcli/debugcmd.c
+++ b/server/modules/routing/debugcli/debugcmd.c
@@ -1520,6 +1520,8 @@ convert_arg(int mode, char *arg, int arg_type)
     return 0;
 }
 
+static SPINLOCK debugcmd_lock = SPINLOCK_INIT;
+
 /**
  * We have a complete line from the user, lookup the commands and execute them
  *
@@ -1611,6 +1613,8 @@ execute_cmd(CLI_SESSION *cli)
 
     argc = i - 2;   /* The number of extra arguments to commands */
 
+    spinlock_acquire(&debugcmd_lock);
+
     if (!strcasecmp(args[0], "help"))
     {
         if (args[1] == NULL || *args[1] == 0)
@@ -1662,11 +1666,7 @@ execute_cmd(CLI_SESSION *cli)
         }
         found = 1;
     }
-    else if (!strcasecmp(args[0], "quit"))
-    {
-        return 0;
-    }
-    else if (argc >= 0)
+    else if (strcasecmp(args[0], "quit") && argc >= 0)
     {
         for (i = 0; cmds[i].cmd; i++)
         {
@@ -1710,7 +1710,7 @@ execute_cmd(CLI_SESSION *cli)
                                 if (arg_list[k] == 0)
                                 {
                                     dcb_printf(dcb, "Invalid argument: %s\n", args[k + 2]);
-                                    return 0;
+                                    break;
                                 }
                             }
 
@@ -1807,6 +1807,8 @@ execute_cmd(CLI_SESSION *cli)
                    "Command '%s' not known, type help for a list of available commands\n", args[0]);
     }
 
+    spinlock_release(&debugcmd_lock);
+
     memset(cli->cmdbuf, 0, CMDBUFLEN);
 
     return 1;

From 7cbbc6f8f7ae535a3ab3b1015155a934b2938823 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sat, 26 Nov 2016 08:19:36 +0200
Subject: [PATCH 38/42] Add a mechanism to synchronize persistent pool counts

The polling system now has a concept of messages. This can be used to send
a synchronous message to the polling system which waits for all threads to
process the message before returning.

Currently this is used to flush unused DCBs when server persistent
statistics are reported.
---
 include/maxscale/poll.h |  6 ++++
 server/core/dcb.c       |  7 +++--
 server/core/poll.c      | 65 ++++++++++++++++++++++++++++++++++++++++-
 server/core/server.c    |  1 +
 4 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/include/maxscale/poll.h b/include/maxscale/poll.h
index 315423cb2..28e9451ba 100644
--- a/include/maxscale/poll.h
+++ b/include/maxscale/poll.h
@@ -52,6 +52,11 @@ typedef enum
     POLL_STAT_MAX_EXECTIME
 } POLL_STAT;
 
+enum poll_message
+{
+    POLL_MSG_CLEAN_PERSISTENT = 0x01
+};
+
 extern  void            poll_init();
 extern  int             poll_add_dcb(DCB *);
 extern  int             poll_remove_dcb(DCB *);
@@ -71,5 +76,6 @@ extern  void            poll_fake_event(DCB *dcb, enum EPOLL_EVENTS ev);
 extern  void            poll_fake_hangup_event(DCB *dcb);
 extern  void            poll_fake_write_event(DCB *dcb);
 extern  void            poll_fake_read_event(DCB *dcb);
+extern  void            poll_send_message(enum poll_message msg, void *data);
 
 MXS_END_DECLS
diff --git a/server/core/dcb.c b/server/core/dcb.c
index 7f1274ea5..f7f307f67 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -1687,7 +1687,6 @@ dcb_close(DCB *dcb)
 static bool
 dcb_maybe_add_persistent(DCB *dcb)
 {
-    int  poolcount = -1;
     if (dcb->user != NULL
         && strlen(dcb->user)
         && dcb->server
@@ -1695,7 +1694,8 @@ dcb_maybe_add_persistent(DCB *dcb)
         && (dcb->server->status & SERVER_RUNNING)
         && !dcb->dcb_errhandle_called
         && !(dcb->flags & DCBF_HUNG)
-        && (poolcount = dcb_persistent_clean_count(dcb, dcb->thread.id, false)) < dcb->server->persistpoolmax)
+        && dcb_persistent_clean_count(dcb, dcb->thread.id, false) < dcb->server->persistpoolmax
+        && dcb->server->stats.n_persistent < dcb->server->persistpoolmax)
     {
         DCB_CALLBACK *loopcallback;
         MXS_DEBUG("%lu [dcb_maybe_add_persistent] Adding DCB to persistent pool, user %s.\n",
@@ -1742,7 +1742,7 @@ dcb_maybe_add_persistent(DCB *dcb)
                   dcb->dcb_errhandle_called ? "true" : "false",
                   (dcb->flags & DCBF_HUNG) ? "true" : "false",
                   dcb->server ? dcb->server->status : 0,
-                  poolcount);
+                  dcb->server->stats.n_persistent);
     }
     return false;
 }
@@ -2573,6 +2573,7 @@ dcb_null_auth(DCB *dcb, SERVER *server, SESSION *session, GWBUF *buf)
  * Check persistent pool for expiry or excess size and count
  *
  * @param dcb           The DCB being closed.
+ * @param id            Thread ID
  * @param cleanall      Boolean, if true the whole pool is cleared for the
  *                      server related to the given DCB
  * @return              A count of the DCBs remaining in the pool
diff --git a/server/core/poll.c b/server/core/poll.c
index 9a5e65772..764b47a47 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -33,6 +33,9 @@
 #include <maxscale/statistics.h>
 #include <maxscale/query_classifier.h>
 #include <maxscale/utils.h>
+#include <maxscale/server.h>
+#include <maxscale/thread.h>
+#include <maxscale/platform.h>
 
 #define         PROFILE_POLL    0
 
@@ -91,12 +94,19 @@ typedef struct fake_event
     struct fake_event *next;  /*< The next event */
 } fake_event_t;
 
+thread_local int thread_id; /**< This thread's ID */
 static int *epoll_fd;    /*< The epoll file descriptor */
 static int next_epoll_fd = 0; /*< Which thread handles the next DCB */
 static fake_event_t **fake_events; /*< Thread-specific fake event queue */
 static SPINLOCK      *fake_event_lock;
 static int do_shutdown = 0;  /*< Flag the shutdown of the poll subsystem */
 static GWBITMASK poll_mask;
+
+/** Poll cross-thread messaging variables */
+static int     *poll_msg;
+static void    *poll_msg_data = NULL;
+static SPINLOCK poll_msg_lock = SPINLOCK_INIT;
+
 #if MUTEX_EPOLL
 static simple_mutex_t epoll_wait_mutex; /*< serializes calls to epoll_wait */
 #endif
@@ -105,6 +115,7 @@ static int n_waiting = 0;    /*< No. of threads in epoll_wait */
 static int process_pollq(int thread_id, struct epoll_event *event);
 static void poll_add_event_to_dcb(DCB* dcb, GWBUF* buf, uint32_t ev);
 static bool poll_dcb_session_check(DCB *dcb, const char *);
+static void poll_check_message(void);
 
 DCB *eventq = NULL;
 SPINLOCK pollqlock = SPINLOCK_INIT;
@@ -246,6 +257,11 @@ poll_init()
         exit(-1);
     }
 
+    if ((poll_msg = MXS_CALLOC(n_threads, sizeof(int))) == NULL)
+    {
+        exit(-1);
+    }
+
     for (int i = 0; i < n_threads; i++)
     {
         spinlock_init(&fake_event_lock[i]);
@@ -661,7 +677,7 @@ poll_waitevents(void *arg)
 {
     struct epoll_event events[MAX_EVENTS];
     int i, nfds, timeout_bias = 1;
-    intptr_t thread_id = (intptr_t)arg;
+    thread_id = (intptr_t)arg;
     int poll_spins = 0;
 
     /** Add this thread to the bitmask of running polling threads */
@@ -816,6 +832,8 @@ poll_waitevents(void *arg)
         /** Process closed DCBs */
         dcb_process_zombies(thread_id);
 
+        poll_check_message();
+
         if (thread_data)
         {
             thread_data[thread_id].state = THREAD_IDLE;
@@ -1729,3 +1747,48 @@ eventTimesGetList()
 
     return set;
 }
+
+void poll_send_message(enum poll_message msg, void *data)
+{
+    spinlock_acquire(&poll_msg_lock);
+    int nthr = config_threadcount();
+    poll_msg_data = data;
+
+    for (int i = 0; i < nthr; i++)
+    {
+        if (i != thread_id)
+        {
+            /** Synchronize writes to poll_msg */
+            atomic_synchronize();
+        }
+        poll_msg[i] |= msg;
+    }
+
+    /** Handle this thread's message */
+    poll_check_message();
+
+    for (int i = 0; i < nthr; i++)
+    {
+        if (i != thread_id)
+        {
+            while (poll_msg[i] & msg)
+            {
+                thread_millisleep(1);
+            }
+        }
+    }
+
+    poll_msg_data = NULL;
+    spinlock_release(&poll_msg_lock);
+}
+
+static void poll_check_message()
+{
+    if (poll_msg[thread_id] & POLL_MSG_CLEAN_PERSISTENT)
+    {
+        SERVER *server = (SERVER*)poll_msg_data;
+        dcb_persistent_clean_count(server->persistent[thread_id], thread_id, false);
+        atomic_synchronize();
+        poll_msg[thread_id] &= ~POLL_MSG_CLEAN_PERSISTENT;
+    }
+}
diff --git a/server/core/server.c b/server/core/server.c
index 664b54d67..3daf47179 100644
--- a/server/core/server.c
+++ b/server/core/server.c
@@ -553,6 +553,7 @@ dprintServer(DCB *dcb, SERVER *server)
     if (server->persistpoolmax)
     {
         dcb_printf(dcb, "\tPersistent pool size:                %d\n", server->stats.n_persistent);
+        poll_send_message(POLL_MSG_CLEAN_PERSISTENT, server);
         dcb_printf(dcb, "\tPersistent measured pool size:       %d\n", server->stats.n_persistent);
         dcb_printf(dcb, "\tPersistent actual size max:          %d\n", server->persistmax);
         dcb_printf(dcb, "\tPersistent pool size limit:          %ld\n", server->persistpoolmax);

From 1b09faf2e4a4d019a7f8a133e1a454f3cb7ed5ac Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Sun, 27 Nov 2016 10:02:47 +0200
Subject: [PATCH 39/42] Fix crash on debug logging

The debug logging for persistent connections caused a crash if a
non-backend DCB was processed.
---
 server/core/dcb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/server/core/dcb.c b/server/core/dcb.c
index f7f307f67..b0f3ceb6a 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -1730,7 +1730,7 @@ dcb_maybe_add_persistent(DCB *dcb)
         atomic_add(&dcb->server->stats.n_current, -1);
         return true;
     }
-    else
+    else if (dcb->dcb_role == DCB_ROLE_BACKEND_HANDLER && dcb->server)
     {
         MXS_DEBUG("%lu [dcb_maybe_add_persistent] Not adding DCB %p to persistent pool, "
                   "user %s, max for pool %ld, error handle called %s, hung flag %s, "
@@ -1738,10 +1738,10 @@ dcb_maybe_add_persistent(DCB *dcb)
                   pthread_self(),
                   dcb,
                   dcb->user ? dcb->user : "",
-                  (dcb->server && dcb->server->persistpoolmax) ? dcb->server->persistpoolmax : 0,
+                  dcb->server->persistpoolmax,
                   dcb->dcb_errhandle_called ? "true" : "false",
                   (dcb->flags & DCBF_HUNG) ? "true" : "false",
-                  dcb->server ? dcb->server->status : 0,
+                  dcb->server->status,
                   dcb->server->stats.n_persistent);
     }
     return false;

From 0689b3df39d2536da9c1a13ddbff114b85426734 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Tue, 29 Nov 2016 11:12:53 +0200
Subject: [PATCH 40/42] Improve the polling statistics

The statistics of the polling system no longer match the implementation it
measures. Modified the statistics to better represent the new system by
calculating the number of epoll events each thread receives.
---
 include/maxscale/poll.h       |   1 -
 include/maxscale/statistics.h |  63 +++++++++++++++++++-
 server/core/poll.c            | 105 ++++++++++++++++++----------------
 server/core/statistics.c      |  44 ++++++++++++++
 4 files changed, 160 insertions(+), 53 deletions(-)

diff --git a/include/maxscale/poll.h b/include/maxscale/poll.h
index 28e9451ba..7eb0aeb63 100644
--- a/include/maxscale/poll.h
+++ b/include/maxscale/poll.h
@@ -46,7 +46,6 @@ typedef enum
     POLL_STAT_HANGUP,
     POLL_STAT_ACCEPT,
     POLL_STAT_EVQ_LEN,
-    POLL_STAT_EVQ_PENDING,
     POLL_STAT_EVQ_MAX,
     POLL_STAT_MAX_QTIME,
     POLL_STAT_MAX_EXECTIME
diff --git a/include/maxscale/statistics.h b/include/maxscale/statistics.h
index 851eb1c46..798df286f 100644
--- a/include/maxscale/statistics.h
+++ b/include/maxscale/statistics.h
@@ -31,6 +31,15 @@ MXS_BEGIN_DECLS
 
 typedef void* ts_stats_t;
 
+/** Enum values for ts_stats_get */
+enum ts_stats_type
+{
+    TS_STATS_MAX, /**< Maximum value */
+    TS_STATS_MIX, /**< Minimum value */
+    TS_STATS_SUM, /**< Sum of all value */
+    TS_STATS_AVG  /**< Average of all values */
+};
+
 /** stats_init should be called only once */
 void ts_stats_init();
 
@@ -39,7 +48,17 @@ void ts_stats_end();
 
 ts_stats_t ts_stats_alloc();
 void ts_stats_free(ts_stats_t stats);
-int64_t ts_stats_sum(ts_stats_t stats);
+
+/**
+ * @brief Get statistics
+ *
+ * @param stats Statistics to read
+ * @param type Type of statistics to get
+ * @return Statistics value
+ *
+ * @see enum ts_stats_type
+ */
+int64_t ts_stats_get(ts_stats_t stats, enum ts_stats_type type);
 
 /**
  * @brief Increment thread statistics by one
@@ -61,8 +80,6 @@ ts_stats_increment(ts_stats_t stats, int thread_id)
  * @param stats     Statistics to set
  * @param value     Value to set to
  * @param thread_id ID of thread
- *
- * @note Appears to be unused
  */
 static void inline
 ts_stats_set(ts_stats_t stats, int value, int thread_id)
@@ -70,4 +87,44 @@ ts_stats_set(ts_stats_t stats, int value, int thread_id)
     ((int64_t*)stats)[thread_id] = value;
 }
 
+/**
+ * @brief Assign the maximum value to a statistics element
+ *
+ * This sets the value for the specified thread if the current value is smaller.
+ *
+ * @param stats     Statistics to set
+ * @param value     Value to set to
+ * @param thread_id ID of thread
+ */
+static void inline
+ts_stats_set_max(ts_stats_t stats, int value, int thread_id)
+{
+    int64_t *p = (int64_t*) stats;
+
+    if (value > p[thread_id])
+    {
+        p[thread_id] = value;
+    }
+}
+
+/**
+ * @brief Assign the minimum value to a statistics element
+ *
+ * This sets the value for the specified thread if the current value is larger.
+ *
+ * @param stats     Statistics to set
+ * @param value     Value to set to
+ * @param thread_id ID of thread
+ */
+static void inline
+ts_stats_set_min(ts_stats_t stats, int value, int thread_id)
+{
+    int64_t *p = (int64_t*) stats;
+
+    if (value < p[thread_id])
+    {
+        p[thread_id] = value;
+    }
+}
+
 MXS_END_DECLS
diff --git a/server/core/poll.c b/server/core/poll.c
index 764b47a47..853bc602a 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -34,6 +34,7 @@
 #include <maxscale/query_classifier.h>
 #include <maxscale/utils.h>
 #include <maxscale/server.h>
+#include <maxscale/statistics.h>
 #include <maxscale/thread.h>
 #include <maxscale/platform.h>
 
@@ -158,6 +159,7 @@ typedef struct
     int n_fds;          /*< No. of descriptors thread is processing */
     DCB *cur_dcb;       /*< Current DCB being processed */
     uint32_t event;     /*< Current event being processed */
+    uint64_t cycle_start; /*< The time when the poll loop was started */
 } THREAD_DATA;
 
 static THREAD_DATA *thread_data = NULL;    /*< Status of each thread */
@@ -188,10 +190,8 @@ static struct
     ts_stats_t *n_nbpollev;     /*< Number of polls returning events */
     ts_stats_t *n_nothreads;    /*< Number of times no threads are polling */
     int32_t n_fds[MAXNFDS];     /*< Number of wakeups with particular n_fds value */
-    int32_t evq_length;         /*< Event queue length */
-    int32_t evq_pending;        /*< Number of pending descriptors in event queue */
-    int32_t evq_max;            /*< Maximum event queue length */
-    int32_t wake_evqpending;    /*< Woken from epoll_wait with pending events in queue */
+    ts_stats_t *evq_length;     /*< Event queue length */
+    ts_stats_t *evq_max;        /*< Maximum event queue length */
     ts_stats_t *blockingpolls;  /*< Number of epoll_waits with a timeout specified */
 } pollStats;
 
@@ -203,8 +203,8 @@ static struct
 {
     uint32_t qtimes[N_QUEUE_TIMES + 1];
     uint32_t exectimes[N_QUEUE_TIMES + 1];
-    uint64_t maxqtime;
-    uint64_t maxexectime;
+    ts_stats_t *maxqtime;
+    ts_stats_t *maxexectime;
 } queueStats;
 
 /**
@@ -288,6 +288,10 @@ poll_init()
         (pollStats.n_pollev = ts_stats_alloc()) == NULL ||
         (pollStats.n_nbpollev = ts_stats_alloc()) == NULL ||
         (pollStats.n_nothreads = ts_stats_alloc()) == NULL ||
+        (pollStats.evq_length = ts_stats_alloc()) == NULL ||
+        (pollStats.evq_max = ts_stats_alloc()) == NULL ||
+        (queueStats.maxqtime = ts_stats_alloc()) == NULL ||
+        (queueStats.maxexectime = ts_stats_alloc()) == NULL ||
         (pollStats.blockingpolls = ts_stats_alloc()) == NULL)
     {
         MXS_OOM_MESSAGE("FATAL: Could not allocate statistics data.");
@@ -736,7 +740,6 @@ poll_waitevents(void *arg)
                               (max_poll_sleep * timeout_bias) / 10);
             if (nfds == 0)
             {
-                atomic_add(&pollStats.wake_evqpending, 1);
                 poll_spins = 0;
             }
         }
@@ -755,6 +758,9 @@ poll_waitevents(void *arg)
 #endif /* BLOCKINGPOLL */
         if (nfds > 0)
         {
+            ts_stats_set(pollStats.evq_length, nfds, thread_id);
+            ts_stats_set_max(pollStats.evq_max, nfds, thread_id);
+
             timeout_bias = 1;
             if (poll_spins <= number_poll_spins + 1)
             {
@@ -791,6 +797,8 @@ poll_waitevents(void *arg)
              */
         }
 
+        thread_data[thread_id].cycle_start = hkheartbeat;
+
         /* Process of the queue of waiting requests */
         for (int i = 0; i < nfds; i++)
         {
@@ -913,15 +921,15 @@ static int
 process_pollq(int thread_id, struct epoll_event *event)
 {
     uint32_t ev = event->events;
-    unsigned long qtime;
-
     DCB *dcb = event->data.ptr;
     ss_dassert(dcb->thread.id == thread_id || dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER);
 #if PROFILE_POLL
     memlog_log(plog, hkheartbeat - dcb->evq.inserted);
 #endif
-    qtime = hkheartbeat - dcb->evq.inserted;
-    dcb->evq.started = hkheartbeat;
+
+    /** Calculate event queue statistics */
+    uint64_t started = hkheartbeat;
+    uint64_t qtime = started - thread_data[thread_id].cycle_start;
 
     if (qtime > N_QUEUE_TIMES)
     {
@@ -931,11 +939,8 @@ process_pollq(int thread_id, struct epoll_event *event)
     {
         queueStats.qtimes[qtime]++;
     }
-    if (qtime > queueStats.maxqtime)
-    {
-        queueStats.maxqtime = qtime;
-    }
 
+    ts_stats_set_max(queueStats.maxqtime, qtime, thread_id);
 
     CHK_DCB(dcb);
     if (thread_data)
@@ -1135,7 +1140,9 @@ process_pollq(int thread_id, struct epoll_event *event)
         }
     }
 #endif
-    qtime = hkheartbeat - dcb->evq.started;
+
+    /** Calculate event execution statistics */
+    qtime = hkheartbeat - started;
 
     if (qtime > N_QUEUE_TIMES)
     {
@@ -1145,10 +1152,8 @@ process_pollq(int thread_id, struct epoll_event *event)
     {
         queueStats.exectimes[qtime % N_QUEUE_TIMES]++;
     }
-    if (qtime > queueStats.maxexectime)
-    {
-        queueStats.maxexectime = qtime;
-    }
+
+    ts_stats_set_max(queueStats.maxexectime, qtime, thread_id);
 
     /** Reset session id from thread's local storage */
     mxs_log_tls.li_sesid = 0;
@@ -1229,31 +1234,31 @@ dprintPollStats(DCB *dcb)
 
     dcb_printf(dcb, "\nPoll Statistics.\n\n");
     dcb_printf(dcb, "No. of epoll cycles:                           %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_polls));
+               ts_stats_get(pollStats.n_polls, TS_STATS_SUM));
     dcb_printf(dcb, "No. of epoll cycles with wait:                 %" PRId64 "\n",
-               ts_stats_sum(pollStats.blockingpolls));
+               ts_stats_get(pollStats.blockingpolls, TS_STATS_SUM));
     dcb_printf(dcb, "No. of epoll calls returning events:           %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_pollev));
+               ts_stats_get(pollStats.n_pollev, TS_STATS_SUM));
     dcb_printf(dcb, "No. of non-blocking calls returning events:    %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_nbpollev));
+               ts_stats_get(pollStats.n_nbpollev, TS_STATS_SUM));
     dcb_printf(dcb, "No. of read events:                            %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_read));
+               ts_stats_get(pollStats.n_read, TS_STATS_SUM));
     dcb_printf(dcb, "No. of write events:                           %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_write));
+               ts_stats_get(pollStats.n_write, TS_STATS_SUM));
     dcb_printf(dcb, "No. of error events:                           %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_error));
+               ts_stats_get(pollStats.n_error, TS_STATS_SUM));
     dcb_printf(dcb, "No. of hangup events:                          %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_hup));
+               ts_stats_get(pollStats.n_hup, TS_STATS_SUM));
     dcb_printf(dcb, "No. of accept events:                          %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_accept));
+               ts_stats_get(pollStats.n_accept, TS_STATS_SUM));
     dcb_printf(dcb, "No. of times no threads polling:               %" PRId64 "\n",
-               ts_stats_sum(pollStats.n_nothreads));
-    dcb_printf(dcb, "Current event queue length:                    %" PRId32 "\n",
-               pollStats.evq_length);
-    dcb_printf(dcb, "Maximum event queue length:                    %" PRId32 "\n",
-               pollStats.evq_max);
-    dcb_printf(dcb, "No. of wakeups with pending queue:             %" PRId32 "\n",
-               pollStats.wake_evqpending);
+               ts_stats_get(pollStats.n_nothreads, TS_STATS_SUM));
+    dcb_printf(dcb, "Total event queue length:                      %" PRId64 "\n",
+               ts_stats_get(pollStats.evq_length, TS_STATS_AVG));
+    dcb_printf(dcb, "Average event queue length:                    %" PRId64 "\n",
+               ts_stats_get(pollStats.evq_length, TS_STATS_AVG));
+    dcb_printf(dcb, "Maximum event queue length:                    %" PRId64 "\n",
+               ts_stats_get(pollStats.evq_max, TS_STATS_MAX));
 
     dcb_printf(dcb, "No of poll completions with descriptors\n");
     dcb_printf(dcb, "\tNo. of descriptors\tNo. of poll completions.\n");
@@ -1620,10 +1625,11 @@ dShowEventStats(DCB *pdcb)
     int i;
 
     dcb_printf(pdcb, "\nEvent statistics.\n");
-    dcb_printf(pdcb, "Maximum queue time:           %3lu00ms\n", queueStats.maxqtime);
-    dcb_printf(pdcb, "Maximum execution time:       %3lu00ms\n", queueStats.maxexectime);
-    dcb_printf(pdcb, "Maximum event queue length:   %3d\n", pollStats.evq_max);
-    dcb_printf(pdcb, "Current event queue length:   %3d\n", pollStats.evq_length);
+    dcb_printf(pdcb, "Maximum queue time:           %3" PRId64 "00ms\n", ts_stats_get(queueStats.maxqtime, TS_STATS_MAX));
+    dcb_printf(pdcb, "Maximum execution time:       %3" PRId64 "00ms\n", ts_stats_get(queueStats.maxexectime, TS_STATS_MAX));
+    dcb_printf(pdcb, "Maximum event queue length:   %3" PRId64 "\n", ts_stats_get(pollStats.evq_max, TS_STATS_MAX));
+    dcb_printf(pdcb, "Total event queue length:     %3" PRId64 "\n", ts_stats_get(pollStats.evq_length, TS_STATS_SUM));
+    dcb_printf(pdcb, "Average event queue length:   %3" PRId64 "\n", ts_stats_get(pollStats.evq_length, TS_STATS_AVG));
     dcb_printf(pdcb, "\n");
     dcb_printf(pdcb, "               |    Number of events\n");
     dcb_printf(pdcb, "Duration       | Queued     | Executed\n");
@@ -1651,24 +1657,25 @@ poll_get_stat(POLL_STAT stat)
     switch (stat)
     {
     case POLL_STAT_READ:
-        return ts_stats_sum(pollStats.n_read);
+        return ts_stats_get(pollStats.n_read, TS_STATS_SUM);
     case POLL_STAT_WRITE:
-        return ts_stats_sum(pollStats.n_write);
+        return ts_stats_get(pollStats.n_write, TS_STATS_SUM);
     case POLL_STAT_ERROR:
-        return ts_stats_sum(pollStats.n_error);
+        return ts_stats_get(pollStats.n_error, TS_STATS_SUM);
     case POLL_STAT_HANGUP:
-        return ts_stats_sum(pollStats.n_hup);
+        return ts_stats_get(pollStats.n_hup, TS_STATS_SUM);
     case POLL_STAT_ACCEPT:
-        return ts_stats_sum(pollStats.n_accept);
+        return ts_stats_get(pollStats.n_accept, TS_STATS_SUM);
     case POLL_STAT_EVQ_LEN:
-        return pollStats.evq_length;
+        return ts_stats_get(pollStats.evq_length, TS_STATS_AVG);
     case POLL_STAT_EVQ_MAX:
-        return pollStats.evq_max;
+        return ts_stats_get(pollStats.evq_max, TS_STATS_MAX);
     case POLL_STAT_MAX_QTIME:
-        return (int)queueStats.maxqtime;
+        return ts_stats_get(queueStats.maxqtime, TS_STATS_MAX);
     case POLL_STAT_MAX_EXECTIME:
-        return (int)queueStats.maxexectime;
+        return ts_stats_get(queueStats.maxexectime, TS_STATS_MAX);
     default:
+        ss_dassert(false);
         break;
     }
     return 0;
diff --git a/server/core/statistics.c b/server/core/statistics.c
index 1730629a2..97f491518 100644
--- a/server/core/statistics.c
+++ b/server/core/statistics.c
@@ -91,3 +91,47 @@ int64_t ts_stats_sum(ts_stats_t stats)
     }
     return sum;
 }
+
+/**
+ * @brief Read the value of the statistics object
+ *
+ * Calculate
+ *
+ * @param stats Statistics to read
+ * @param type  The statistics type
+ * @return Value of statistics
+ */
+int64_t ts_stats_get(ts_stats_t stats, enum ts_stats_type type)
+{
+    ss_dassert(stats_initialized);
+    int64_t best = type == TS_STATS_MAX ? LONG_MIN : (type == TS_STATS_MIX ? LONG_MAX : 0);
+
+    for (int i = 0; i < thread_count; i++)
+    {
+        int64_t value = ((int64_t*)stats)[i];
+
+        switch (type)
+        {
+            case TS_STATS_MAX:
+                if (value > best)
+                {
+                    best = value;
+                }
+                break;
+
+            case TS_STATS_MIX:
+                if (value < best)
+                {
+                    best = value;
+                }
+                break;
+
+            case TS_STATS_AVG:
+            case TS_STATS_SUM:
+                best += value;
+                break;
+        }
+    }
+
+    return type == TS_STATS_AVG ? best / thread_count : best;
+}

From 42eb8add5d814c5dadebe26228d3e461f589c785 Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Wed, 30 Nov 2016 08:26:45 +0200
Subject: [PATCH 41/42] Lock listeners when adding or removing them from epoll

Locking the listener DCB when adding or removing it from epoll will
prevent cases where only a part of the threads have the listener in epoll.
---
 server/core/poll.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/server/core/poll.c b/server/core/poll.c
index 853bc602a..c0afb76dc 100644
--- a/server/core/poll.c
+++ b/server/core/poll.c
@@ -411,6 +411,7 @@ poll_add_dcb(DCB *dcb)
 
     if (dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER)
     {
+        spinlock_acquire(&dcb->dcb_initlock);
         /** Listeners are added to all epoll instances */
         int nthr = config_threadcount();
 
@@ -427,6 +428,7 @@ poll_add_dcb(DCB *dcb)
                 break;
             }
         }
+        spinlock_release(&dcb->dcb_initlock);
     }
     else
     {
@@ -507,6 +509,7 @@ poll_remove_dcb(DCB *dcb)
 
         if (dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER)
         {
+            spinlock_acquire(&dcb->dcb_initlock);
             /** Listeners are added to all epoll instances */
             int nthr = config_threadcount();
 
@@ -522,6 +525,7 @@ poll_remove_dcb(DCB *dcb)
                     ss_dassert(error_num);
                 }
             }
+            spinlock_release(&dcb->dcb_initlock);
         }
         else
         {

From 7b8497df7aa46d4536b2529ba1233dd1711f588a Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Wed, 30 Nov 2016 09:00:31 +0200
Subject: [PATCH 42/42] Make sure listener DCBs are removed cleanly from the
 list

As listener DCBs can be added and removed from the polling system multiple
times, the DCBs need to be reset to a clean state when they are removed.
---
 server/core/dcb.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/server/core/dcb.c b/server/core/dcb.c
index b0f3ceb6a..8589315fb 100644
--- a/server/core/dcb.c
+++ b/server/core/dcb.c
@@ -3471,6 +3471,11 @@ static void dcb_remove_from_list(DCB *dcb)
         }
     }
 
+    /** Reset the next and tail pointers so that if this DCB is added to the list
+     * again, it will be in a clean state. */
+    dcb->thread.next = NULL;
+    dcb->thread.tail = NULL;
+
     spinlock_release(&all_dcbs_lock[dcb->thread.id]);
 }