MXS-2342 Run MariaDBMonitor diagnostics concurrent with the monitor loop

This fixes some situations where MaxAdmin/MaxCtrl would block and wait
until a monitor operation or tick is complete. This also fixes a deadlock
caused by calling monitor diagnostics inside a monitor script.

Concurrency is enabled by adding one mutex per server object to protect
array-like fields from concurrent reading/writing.
This commit is contained in:
Esa Korhonen 2019-02-25 16:54:47 +02:00
parent b4c5500fa1
commit 040562f718
3 changed files with 28 additions and 36 deletions

View File

@ -253,34 +253,20 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params)
void MariaDBMonitor::diagnostics(DCB* dcb) const
{
/* The problem with diagnostic printing is that some of the printed elements are array-like and their
* length could change during a monitor loop. Thus, the variables should only be read by the monitor
* thread and not the admin thread. Because the diagnostic must be printable even when the monitor is
* not running, the printing must be done outside the normal loop. */
* length could change during a monitor loop. Such variables are protected by mutexes. Locking is
* only required when the monitor thread writes to such a variable and when the admin thread is
* reading it. */
mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN));
/* The 'dcb' is owned by the admin thread (the thread executing this function), and probably
* should not be written to by any other thread. To prevent this, have the monitor thread
* print the diagnostics to a string. */
string diag_str;
// 'execute' is not a const method, although the task we are sending is.
MariaDBMonitor* mutable_ptr = const_cast<MariaDBMonitor*>(this);
auto func = [this, &diag_str] {
diag_str = diagnostics_to_string();
};
if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO))
{
diag_str = DIAG_ERROR;
}
dcb_printf(dcb, "%s", diag_str.c_str());
dcb_printf(dcb, "%s", diagnostics_to_string().c_str());
}
string MariaDBMonitor::diagnostics_to_string() const
{
using maxscale::string_printf;
string rval;
rval.reserve(1000); // Enough for basic output.
rval += string_printf("Automatic failover: %s\n", m_auto_failover ? "Enabled" : "Disabled");
rval += string_printf("Failcount: %d\n", m_failcount);
rval += string_printf("Failover timeout: %u\n", m_failover_timeout);
@ -306,24 +292,16 @@ string MariaDBMonitor::diagnostics_to_string() const
json_t* MariaDBMonitor::diagnostics_json() const
{
mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN));
json_t* rval = NULL;
MariaDBMonitor* mutable_ptr = const_cast<MariaDBMonitor*>(this);
auto func = [this, &rval] {
rval = to_json();
};
if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO))
{
rval = mxs_json_error_append(rval, "%s", DIAG_ERROR);
}
return rval;
return to_json();
}
json_t* MariaDBMonitor::to_json() const
{
json_t* rval = MonitorInstance::diagnostics_json();
json_object_set_new(rval, "master", m_master == NULL ? json_null() : json_string(m_master->name()));
// The m_master-pointer can be modified during a tick, but the pointed object cannot be deleted.
auto master = mxb::atomic::load(&m_master, mxb::atomic::RELAXED);
json_object_set_new(rval, "master", master == nullptr ? json_null() : json_string(master->name()));
json_object_set_new(rval,
"master_gtid_domain_id",
m_master_gtid_domain == GTID_DOMAIN_UNKNOWN ? json_null() :
@ -678,7 +656,7 @@ void MariaDBMonitor::log_master_changes()
void MariaDBMonitor::assign_new_master(MariaDBServer* new_master)
{
m_master = new_master;
mxb::atomic::store(&m_master, new_master, mxb::atomic::RELAXED);
update_master_cycle_info();
m_warn_current_master_invalid = true;
m_warn_have_better_master = true;
@ -820,8 +798,6 @@ bool MariaDBMonitor::run_manual_reset_replication(SERVER* master_server, json_t*
return send_ok && rval;
}
/**
* Command handler for 'switchover'
*

View File

@ -25,6 +25,7 @@ using std::string;
using maxscale::string_printf;
using maxbase::Duration;
using maxbase::StopWatch;
using Guard = std::lock_guard<std::mutex>;
MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index,
bool assume_unique_hostnames, bool query_events)
@ -386,6 +387,7 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
// Always write to m_slave_status. Even if the new status is equal by topology,
// gtid:s etc may have changed.
Guard guard(m_arraylock);
m_slave_status = std::move(slave_status_new);
return true;
}
@ -400,6 +402,8 @@ bool MariaDBServer::update_gtids(string* errmsg_out)
auto result = execute_query(query, errmsg_out);
if (result.get() != NULL)
{
Guard guard(m_arraylock);
rval = true;
if (result->next_row())
{
@ -655,9 +659,12 @@ string MariaDBServer::diagnostics() const
const char fmt_int64[] = "%-23s %" PRIi64 "\n";
string rval;
rval.reserve(300); // Enough for most common ouput.
rval += string_printf(fmt_string, "Server:", name());
rval += string_printf(fmt_int64, "Server ID:", m_server_id);
rval += string_printf(fmt_string, "Read only:", (m_read_only ? "Yes" : "No"));
Guard guard(m_arraylock);
if (!m_gtid_current_pos.empty())
{
rval += string_printf(fmt_string, "Gtid current position:", m_gtid_current_pos.to_string().c_str());
@ -686,6 +693,7 @@ json_t* MariaDBServer::to_json() const
json_object_set_new(result, "server_id", json_integer(m_server_id));
json_object_set_new(result, "read_only", json_boolean(m_read_only));
Guard guard(m_arraylock);
json_object_set_new(result,
"gtid_current_pos",
m_gtid_current_pos.empty() ? json_null() :

View File

@ -15,6 +15,7 @@
#include <functional>
#include <string>
#include <memory>
#include <mutex>
#include <maxscale/monitor.h>
#include <maxbase/stopwatch.hh>
#include "server_utils.hh"
@ -114,6 +115,7 @@ public:
bool log_slave_updates = false; /* Does the slave write replicated events to binlog? */
};
/* Monitored server base class/struct. MariaDBServer does not own the struct, it is not freed
* (or connection closed) when a MariaDBServer is destroyed. */
MXS_MONITORED_SERVER* m_server_base = NULL;
@ -548,6 +550,12 @@ private:
DISABLE
};
/* Protects array-like fields from concurrent access. This is only required for fields which can be
* read from another thread while the monitor is running. In practice, these are fields read during
* diagnostics-methods. Reading inside monitor thread does not need to be mutexed, as outside threads
* only read the values. */
mutable std::mutex m_arraylock;
bool update_slave_status(std::string* errmsg_out = NULL);
bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status);
const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess);