From b4c5500fa1c658d89d6cf3e1f504f63956892c0b Mon Sep 17 00:00:00 2001
From: Esa Korhonen <esa.korhonen@mariadb.com>
Date: Thu, 7 Mar 2019 13:33:03 +0200
Subject: [PATCH 1/2] MXS-2362 Document SchemaRouter table-sharding limitations

---
 Documentation/Routers/SchemaRouter.md | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/Documentation/Routers/SchemaRouter.md b/Documentation/Routers/SchemaRouter.md
index 45da5b2d3..3e5ac3ca1 100644
--- a/Documentation/Routers/SchemaRouter.md
+++ b/Documentation/Routers/SchemaRouter.md
@@ -139,24 +139,20 @@ refresh_interval=60
 
 This functionality was introduced in 2.3.0.
 
-If the same database exists on multiple servers, but the database contains
-different tables in each server, the SchemaRouter is capable of
-transparently routing queries to the right server, depending on which table
-is being addressed.
+If the same database exists on multiple servers, but the database contains different
+tables in each server, SchemaRouter is capable of routing queries to the right server,
+depending on which table is being addressed.
 
-For instance, suppose the database `db` exists on servers _server1_ and
-_server2_, but that the database on _server1_ contains the table `tbl1` and
-on _server2_ contains the table `tbl2`.
-
-In that case, the query
+As an example, suppose the database `db` exists on servers _server1_ and _server2_, but
+that the database on _server1_ contains the table `tbl1` and on _server2_ contains the
+table `tbl2`. The query `SELECT * FROM db.tbl1` will be routed to _server1_ and the query
+`SELECT * FROM db.tbl2` will be routed to _server2_. As in the example queries, the table
+names must be qualified with the database names for table-level sharding to work.
+Specifically, the query series below is not supported.
 ```
-SELECT * FROM db.tbl1
+USE db;
+SELECT * FROM tbl1; // May be routed to an incorrect backend if using table sharding.
 ```
-will be routed to _server1_ and the query
-```
-SELECT * FROM db.tbl2
-```
-will be routed to _server2_.
 
 ## Router Options
 

From 040562f7186220fd23d394f656f19407cad5c429 Mon Sep 17 00:00:00 2001
From: Esa Korhonen <esa.korhonen@mariadb.com>
Date: Mon, 25 Feb 2019 16:54:47 +0200
Subject: [PATCH 2/2] MXS-2342 Run MariaDBMonitor diagnostics concurrent with
 the monitor loop

This fixes some situations where MaxAdmin/MaxCtrl would block and wait
until a monitor operation or tick is complete. This also fixes a deadlock
caused by calling monitor diagnostics inside a monitor script.

Concurrency is enabled by adding one mutex per server object to protect
array-like fields from concurrent reading/writing.
---
 .../modules/monitor/mariadbmon/mariadbmon.cc  | 48 +++++--------------
 .../monitor/mariadbmon/mariadbserver.cc       |  8 ++++
 .../monitor/mariadbmon/mariadbserver.hh       |  8 ++++
 3 files changed, 28 insertions(+), 36 deletions(-)

diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc
index 469286ada..0298c2500 100644
--- a/server/modules/monitor/mariadbmon/mariadbmon.cc
+++ b/server/modules/monitor/mariadbmon/mariadbmon.cc
@@ -253,34 +253,20 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params)
 void MariaDBMonitor::diagnostics(DCB* dcb) const
 {
     /* The problem with diagnostic printing is that some of the printed elements are array-like and their
-     * length could change during a monitor loop. Thus, the variables should only be read by the monitor
-     * thread and not the admin thread. Because the diagnostic must be printable even when the monitor is
-     * not running, the printing must be done outside the normal loop. */
+     * length could change during a monitor loop. Such variables are protected by mutexes. Locking is
+     * only required when the monitor thread writes to such a variable and when the admin thread is
+     * reading it. */
 
     mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN));
-    /* The 'dcb' is owned by the admin thread (the thread executing this function), and probably
-     * should not be written to by any other thread. To prevent this, have the monitor thread
-     * print the diagnostics to a string. */
-    string diag_str;
-
-    // 'execute' is not a const method, although the task we are sending is.
-    MariaDBMonitor* mutable_ptr = const_cast<MariaDBMonitor*>(this);
-    auto func = [this, &diag_str] {
-            diag_str = diagnostics_to_string();
-        };
-
-    if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO))
-    {
-        diag_str = DIAG_ERROR;
-    }
-
-    dcb_printf(dcb, "%s", diag_str.c_str());
+    dcb_printf(dcb, "%s", diagnostics_to_string().c_str());
 }
 
 string MariaDBMonitor::diagnostics_to_string() const
 {
     using maxscale::string_printf;
     string rval;
+    rval.reserve(1000); // Enough for basic output.
+
     rval += string_printf("Automatic failover:     %s\n", m_auto_failover ? "Enabled" : "Disabled");
     rval += string_printf("Failcount:              %d\n", m_failcount);
     rval += string_printf("Failover timeout:       %u\n", m_failover_timeout);
@@ -306,24 +292,16 @@ string MariaDBMonitor::diagnostics_to_string() const
 json_t* MariaDBMonitor::diagnostics_json() const
 {
     mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN));
-    json_t* rval = NULL;
-    MariaDBMonitor* mutable_ptr = const_cast<MariaDBMonitor*>(this);
-    auto func = [this, &rval] {
-            rval = to_json();
-        };
-
-    if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO))
-    {
-        rval = mxs_json_error_append(rval, "%s", DIAG_ERROR);
-    }
-
-    return rval;
+    return to_json();
 }
 
 json_t* MariaDBMonitor::to_json() const
 {
     json_t* rval = MonitorInstance::diagnostics_json();
-    json_object_set_new(rval, "master", m_master == NULL ? json_null() : json_string(m_master->name()));
+
+    // The m_master-pointer can be modified during a tick, but the pointed object cannot be deleted.
+    auto master = mxb::atomic::load(&m_master, mxb::atomic::RELAXED);
+    json_object_set_new(rval, "master", master == nullptr ? json_null() : json_string(master->name()));
     json_object_set_new(rval,
                         "master_gtid_domain_id",
                         m_master_gtid_domain == GTID_DOMAIN_UNKNOWN ? json_null() :
@@ -678,7 +656,7 @@ void MariaDBMonitor::log_master_changes()
 
 void MariaDBMonitor::assign_new_master(MariaDBServer* new_master)
 {
-    m_master = new_master;
+    mxb::atomic::store(&m_master, new_master, mxb::atomic::RELAXED);
     update_master_cycle_info();
     m_warn_current_master_invalid = true;
     m_warn_have_better_master = true;
@@ -820,8 +798,6 @@ bool MariaDBMonitor::run_manual_reset_replication(SERVER* master_server, json_t*
     return send_ok && rval;
 }
 
-
-
 /**
  * Command handler for 'switchover'
  *
diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc
index da384ca7b..83fae9708 100644
--- a/server/modules/monitor/mariadbmon/mariadbserver.cc
+++ b/server/modules/monitor/mariadbmon/mariadbserver.cc
@@ -25,6 +25,7 @@ using std::string;
 using maxscale::string_printf;
 using maxbase::Duration;
 using maxbase::StopWatch;
+using Guard = std::lock_guard<std::mutex>;
 
 MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index,
                              bool assume_unique_hostnames, bool query_events)
@@ -386,6 +387,7 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
 
     // Always write to m_slave_status. Even if the new status is equal by topology,
     // gtid:s etc may have changed.
+    Guard guard(m_arraylock);
     m_slave_status = std::move(slave_status_new);
     return true;
 }
@@ -400,6 +402,8 @@ bool MariaDBServer::update_gtids(string* errmsg_out)
     auto result = execute_query(query, errmsg_out);
     if (result.get() != NULL)
     {
+        Guard guard(m_arraylock);
+
         rval = true;
         if (result->next_row())
         {
@@ -655,9 +659,12 @@ string MariaDBServer::diagnostics() const
     const char fmt_int64[] = "%-23s %" PRIi64 "\n";
 
     string rval;
+    rval.reserve(300); // Enough for most common ouput.
+
     rval += string_printf(fmt_string, "Server:", name());
     rval += string_printf(fmt_int64, "Server ID:", m_server_id);
     rval += string_printf(fmt_string, "Read only:", (m_read_only ? "Yes" : "No"));
+    Guard guard(m_arraylock);
     if (!m_gtid_current_pos.empty())
     {
         rval += string_printf(fmt_string, "Gtid current position:", m_gtid_current_pos.to_string().c_str());
@@ -686,6 +693,7 @@ json_t* MariaDBServer::to_json() const
     json_object_set_new(result, "server_id", json_integer(m_server_id));
     json_object_set_new(result, "read_only", json_boolean(m_read_only));
 
+    Guard guard(m_arraylock);
     json_object_set_new(result,
                         "gtid_current_pos",
                         m_gtid_current_pos.empty() ? json_null() :
diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh
index ff78ec810..7ca24b5e0 100644
--- a/server/modules/monitor/mariadbmon/mariadbserver.hh
+++ b/server/modules/monitor/mariadbmon/mariadbserver.hh
@@ -15,6 +15,7 @@
 #include <functional>
 #include <string>
 #include <memory>
+#include <mutex>
 #include <maxscale/monitor.h>
 #include <maxbase/stopwatch.hh>
 #include "server_utils.hh"
@@ -114,6 +115,7 @@ public:
         bool log_slave_updates = false; /* Does the slave write replicated events to binlog? */
     };
 
+
     /* Monitored server base class/struct. MariaDBServer does not own the struct, it is not freed
      * (or connection closed) when a MariaDBServer is destroyed. */
     MXS_MONITORED_SERVER* m_server_base = NULL;
@@ -548,6 +550,12 @@ private:
         DISABLE
     };
 
+    /* Protects array-like fields from concurrent access. This is only required for fields which can be
+     * read from another thread while the monitor is running. In practice, these are fields read during
+     * diagnostics-methods. Reading inside monitor thread does not need to be mutexed, as outside threads
+     * only read the values. */
+    mutable std::mutex m_arraylock;
+
     bool               update_slave_status(std::string* errmsg_out = NULL);
     bool               sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status);
     const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess);