MXS-3059: Don't use STL containers concurrently

The diagnostics_json call could access the std::unordered_map at the same time it was being updated by the monitoring thread. This leads to undefined behavior which in the case of MXS-3059 manifested as a segfault.
2020-07-03 07:26:53 +03:00
parent ad4bd26ff0
commit 7520efd218
2 changed files with 10 additions and 3 deletions
--- a/server/modules/monitor/galeramon/galeramon.cc
+++ b/server/modules/monitor/galeramon/galeramon.cc
@ -103,12 +103,13 @@ json_t* GaleraMonitor::diagnostics_json() const
    }

    json_t* arr = json_array();
+    std::lock_guard<std::mutex> guard(m_lock);

    for (auto ptr : servers())
    {
-        auto it = m_info.find(ptr);
+        auto it = m_prev_info.find(ptr);

-        if (it != m_info.end())
+        if (it != m_prev_info.end())
        {
            json_t* obj = json_object();
            json_object_set_new(obj, "name", json_string(it->first->server->name()));
@ -357,7 +358,9 @@ void GaleraMonitor::calculate_cluster()

 void GaleraMonitor::pre_tick()
 {
-    // Clear the info before monitoring to make sure it's up to date
+    // Store the info of the previous tick in case it's used for diagnostics
+    std::lock_guard<std::mutex> guard(m_lock);
+    m_prev_info = std::move(m_info);
    m_info.clear();
 }

--- a/server/modules/monitor/galeramon/galeramon.hh
+++ b/server/modules/monitor/galeramon/galeramon.hh
@ -69,8 +69,12 @@ private:
    std::string m_cluster_uuid;         /**< The Cluster UUID */
    bool        m_log_no_members;       /**< Should we log if no member are found. */
    NodeMap     m_info;                 /**< Contains Galera Cluster variables of all nodes */
+    NodeMap     m_prev_info;            /**< Contains the info from the previous tick */
    int         m_cluster_size;         /**< How many nodes in the cluster */

+    // Prevents concurrent use that might occur during the diagnostics_json call
+    mutable std::mutex m_lock;
+
    GaleraMonitor(const std::string& name, const std::string& module);

    bool detect_cluster_size(const int n_nodes,