From b4c5500fa1c658d89d6cf3e1f504f63956892c0b Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Thu, 7 Mar 2019 13:33:03 +0200 Subject: [PATCH 1/2] MXS-2362 Document SchemaRouter table-sharding limitations --- Documentation/Routers/SchemaRouter.md | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/Documentation/Routers/SchemaRouter.md b/Documentation/Routers/SchemaRouter.md index 45da5b2d3..3e5ac3ca1 100644 --- a/Documentation/Routers/SchemaRouter.md +++ b/Documentation/Routers/SchemaRouter.md @@ -139,24 +139,20 @@ refresh_interval=60 This functionality was introduced in 2.3.0. -If the same database exists on multiple servers, but the database contains -different tables in each server, the SchemaRouter is capable of -transparently routing queries to the right server, depending on which table -is being addressed. +If the same database exists on multiple servers, but the database contains different +tables in each server, SchemaRouter is capable of routing queries to the right server, +depending on which table is being addressed. -For instance, suppose the database `db` exists on servers _server1_ and -_server2_, but that the database on _server1_ contains the table `tbl1` and -on _server2_ contains the table `tbl2`. - -In that case, the query +As an example, suppose the database `db` exists on servers _server1_ and _server2_, but +that the database on _server1_ contains the table `tbl1` and on _server2_ contains the +table `tbl2`. The query `SELECT * FROM db.tbl1` will be routed to _server1_ and the query +`SELECT * FROM db.tbl2` will be routed to _server2_. As in the example queries, the table +names must be qualified with the database names for table-level sharding to work. +Specifically, the query series below is not supported. ``` -SELECT * FROM db.tbl1 +USE db; +SELECT * FROM tbl1; // May be routed to an incorrect backend if using table sharding. ``` -will be routed to _server1_ and the query -``` -SELECT * FROM db.tbl2 -``` -will be routed to _server2_. ## Router Options From 040562f7186220fd23d394f656f19407cad5c429 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Mon, 25 Feb 2019 16:54:47 +0200 Subject: [PATCH 2/2] MXS-2342 Run MariaDBMonitor diagnostics concurrent with the monitor loop This fixes some situations where MaxAdmin/MaxCtrl would block and wait until a monitor operation or tick is complete. This also fixes a deadlock caused by calling monitor diagnostics inside a monitor script. Concurrency is enabled by adding one mutex per server object to protect array-like fields from concurrent reading/writing. --- .../modules/monitor/mariadbmon/mariadbmon.cc | 48 +++++-------------- .../monitor/mariadbmon/mariadbserver.cc | 8 ++++ .../monitor/mariadbmon/mariadbserver.hh | 8 ++++ 3 files changed, 28 insertions(+), 36 deletions(-) diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 469286ada..0298c2500 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -253,34 +253,20 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params) void MariaDBMonitor::diagnostics(DCB* dcb) const { /* The problem with diagnostic printing is that some of the printed elements are array-like and their - * length could change during a monitor loop. Thus, the variables should only be read by the monitor - * thread and not the admin thread. Because the diagnostic must be printable even when the monitor is - * not running, the printing must be done outside the normal loop. */ + * length could change during a monitor loop. Such variables are protected by mutexes. Locking is + * only required when the monitor thread writes to such a variable and when the admin thread is + * reading it. */ mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN)); - /* The 'dcb' is owned by the admin thread (the thread executing this function), and probably - * should not be written to by any other thread. To prevent this, have the monitor thread - * print the diagnostics to a string. */ - string diag_str; - - // 'execute' is not a const method, although the task we are sending is. - MariaDBMonitor* mutable_ptr = const_cast(this); - auto func = [this, &diag_str] { - diag_str = diagnostics_to_string(); - }; - - if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO)) - { - diag_str = DIAG_ERROR; - } - - dcb_printf(dcb, "%s", diag_str.c_str()); + dcb_printf(dcb, "%s", diagnostics_to_string().c_str()); } string MariaDBMonitor::diagnostics_to_string() const { using maxscale::string_printf; string rval; + rval.reserve(1000); // Enough for basic output. + rval += string_printf("Automatic failover: %s\n", m_auto_failover ? "Enabled" : "Disabled"); rval += string_printf("Failcount: %d\n", m_failcount); rval += string_printf("Failover timeout: %u\n", m_failover_timeout); @@ -306,24 +292,16 @@ string MariaDBMonitor::diagnostics_to_string() const json_t* MariaDBMonitor::diagnostics_json() const { mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN)); - json_t* rval = NULL; - MariaDBMonitor* mutable_ptr = const_cast(this); - auto func = [this, &rval] { - rval = to_json(); - }; - - if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO)) - { - rval = mxs_json_error_append(rval, "%s", DIAG_ERROR); - } - - return rval; + return to_json(); } json_t* MariaDBMonitor::to_json() const { json_t* rval = MonitorInstance::diagnostics_json(); - json_object_set_new(rval, "master", m_master == NULL ? json_null() : json_string(m_master->name())); + + // The m_master-pointer can be modified during a tick, but the pointed object cannot be deleted. + auto master = mxb::atomic::load(&m_master, mxb::atomic::RELAXED); + json_object_set_new(rval, "master", master == nullptr ? json_null() : json_string(master->name())); json_object_set_new(rval, "master_gtid_domain_id", m_master_gtid_domain == GTID_DOMAIN_UNKNOWN ? json_null() : @@ -678,7 +656,7 @@ void MariaDBMonitor::log_master_changes() void MariaDBMonitor::assign_new_master(MariaDBServer* new_master) { - m_master = new_master; + mxb::atomic::store(&m_master, new_master, mxb::atomic::RELAXED); update_master_cycle_info(); m_warn_current_master_invalid = true; m_warn_have_better_master = true; @@ -820,8 +798,6 @@ bool MariaDBMonitor::run_manual_reset_replication(SERVER* master_server, json_t* return send_ok && rval; } - - /** * Command handler for 'switchover' * diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index da384ca7b..83fae9708 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -25,6 +25,7 @@ using std::string; using maxscale::string_printf; using maxbase::Duration; using maxbase::StopWatch; +using Guard = std::lock_guard; MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index, bool assume_unique_hostnames, bool query_events) @@ -386,6 +387,7 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out) // Always write to m_slave_status. Even if the new status is equal by topology, // gtid:s etc may have changed. + Guard guard(m_arraylock); m_slave_status = std::move(slave_status_new); return true; } @@ -400,6 +402,8 @@ bool MariaDBServer::update_gtids(string* errmsg_out) auto result = execute_query(query, errmsg_out); if (result.get() != NULL) { + Guard guard(m_arraylock); + rval = true; if (result->next_row()) { @@ -655,9 +659,12 @@ string MariaDBServer::diagnostics() const const char fmt_int64[] = "%-23s %" PRIi64 "\n"; string rval; + rval.reserve(300); // Enough for most common ouput. + rval += string_printf(fmt_string, "Server:", name()); rval += string_printf(fmt_int64, "Server ID:", m_server_id); rval += string_printf(fmt_string, "Read only:", (m_read_only ? "Yes" : "No")); + Guard guard(m_arraylock); if (!m_gtid_current_pos.empty()) { rval += string_printf(fmt_string, "Gtid current position:", m_gtid_current_pos.to_string().c_str()); @@ -686,6 +693,7 @@ json_t* MariaDBServer::to_json() const json_object_set_new(result, "server_id", json_integer(m_server_id)); json_object_set_new(result, "read_only", json_boolean(m_read_only)); + Guard guard(m_arraylock); json_object_set_new(result, "gtid_current_pos", m_gtid_current_pos.empty() ? json_null() : diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index ff78ec810..7ca24b5e0 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include "server_utils.hh" @@ -114,6 +115,7 @@ public: bool log_slave_updates = false; /* Does the slave write replicated events to binlog? */ }; + /* Monitored server base class/struct. MariaDBServer does not own the struct, it is not freed * (or connection closed) when a MariaDBServer is destroyed. */ MXS_MONITORED_SERVER* m_server_base = NULL; @@ -548,6 +550,12 @@ private: DISABLE }; + /* Protects array-like fields from concurrent access. This is only required for fields which can be + * read from another thread while the monitor is running. In practice, these are fields read during + * diagnostics-methods. Reading inside monitor thread does not need to be mutexed, as outside threads + * only read the values. */ + mutable std::mutex m_arraylock; + bool update_slave_status(std::string* errmsg_out = NULL); bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status); const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess);