From 7520efd2189bde0934833ba7d45390991464cc82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Fri, 3 Jul 2020 07:26:53 +0300 Subject: [PATCH] MXS-3059: Don't use STL containers concurrently The diagnostics_json call could access the std::unordered_map at the same time it was being updated by the monitoring thread. This leads to undefined behavior which in the case of MXS-3059 manifested as a segfault. --- server/modules/monitor/galeramon/galeramon.cc | 9 ++++++--- server/modules/monitor/galeramon/galeramon.hh | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/server/modules/monitor/galeramon/galeramon.cc b/server/modules/monitor/galeramon/galeramon.cc index 7f85fadf3..10de4b259 100644 --- a/server/modules/monitor/galeramon/galeramon.cc +++ b/server/modules/monitor/galeramon/galeramon.cc @@ -103,12 +103,13 @@ json_t* GaleraMonitor::diagnostics_json() const } json_t* arr = json_array(); + std::lock_guard guard(m_lock); for (auto ptr : servers()) { - auto it = m_info.find(ptr); + auto it = m_prev_info.find(ptr); - if (it != m_info.end()) + if (it != m_prev_info.end()) { json_t* obj = json_object(); json_object_set_new(obj, "name", json_string(it->first->server->name())); @@ -357,7 +358,9 @@ void GaleraMonitor::calculate_cluster() void GaleraMonitor::pre_tick() { - // Clear the info before monitoring to make sure it's up to date + // Store the info of the previous tick in case it's used for diagnostics + std::lock_guard guard(m_lock); + m_prev_info = std::move(m_info); m_info.clear(); } diff --git a/server/modules/monitor/galeramon/galeramon.hh b/server/modules/monitor/galeramon/galeramon.hh index 263bc6c20..ce9180d59 100644 --- a/server/modules/monitor/galeramon/galeramon.hh +++ b/server/modules/monitor/galeramon/galeramon.hh @@ -69,8 +69,12 @@ private: std::string m_cluster_uuid; /**< The Cluster UUID */ bool m_log_no_members; /**< Should we log if no member are found. */ NodeMap m_info; /**< Contains Galera Cluster variables of all nodes */ + NodeMap m_prev_info; /**< Contains the info from the previous tick */ int m_cluster_size; /**< How many nodes in the cluster */ + // Prevents concurrent use that might occur during the diagnostics_json call + mutable std::mutex m_lock; + GaleraMonitor(const std::string& name, const std::string& module); bool detect_cluster_size(const int n_nodes,